Skip to content

Commit

Permalink
Merge pull request #1082 from griffithlab/issue_1081
Browse files Browse the repository at this point in the history
Fix issue where method to determine matched wt result didn't return where appropriate
  • Loading branch information
susannasiebert committed Mar 13, 2024
2 parents ca9f2ba + 230d279 commit dd495dd
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 0 deletions.
1 change: 1 addition & 0 deletions pvactools/lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(self, result, mt_position,
result['wt_epitope_position'] = int(baseline_best_match_position)
result['mutation_position'] = 'NA'
result['match_direction'] = 'left'
return

#If there is no previous result or the previous WT epitope was matched "from the left" we start by comparing to the baseline match
if previous_result is None or previous_result['match_direction'] == 'left':
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
allele peptide ic50 percentile mhcflurry_processing_score mhcflurry_presentation_score mhcflurry_presentation_percentile seq_num start
HLA-A*24:02 DTSRYDER 31264.66851907564 46.754125 0.0869760289788246 0.0043984437344503 62.74467391304348 187 1
HLA-A*24:02 TSRYDERP 33806.22726529197 86.33675000000002 0.0158451311290264 0.003173682025688 99.28660326086955 187 2
HLA-A*24:02 SRYDERPG 32955.94166770689 74.78250000000001 0.031451903283596 0.0034369717445758 99.28660326086955 187 3
HLA-A*24:02 RYDERPGP 27668.54284089453 20.141 0.0197256654500961 0.0039059341057787 99.28660326086955 187 4
HLA-A*24:02 YDERPGPS 32010.14530654057 56.84775000000001 0.0059497654438018 0.0032311945457766 99.28660326086955 187 5
HLA-A*24:02 DERPGPSP 32005.76502753196 56.84775000000001 0.3851186856627464 0.0122490124774301 18.61798913043478 187 6
HLA-A*24:02 ERPGPSPL 29062.45395607057 25.78325 0.0125743299722671 0.0036317943448696 99.28660326086955 187 7
HLA-A*24:02 RPGPSPLP 31379.21541747516 46.754125 0.0469118244946002 0.0038058568113157 99.28660326086955 187 8
HLA-A*24:02 DTSRYDER 31264.66851907564 46.754125 0.0869760289788246 0.0043984437344503 62.74467391304348 188 1
HLA-A*24:02 TSRYDERP 33806.22726529197 86.33675000000002 0.0158451311290264 0.003173682025688 99.28660326086955 188 2
HLA-A*24:02 SRYDERPG 32955.94166770689 74.78250000000001 0.031451903283596 0.0034369717445758 99.28660326086955 188 3
HLA-A*24:02 RYDERPGP 27668.54284089453 20.141 0.0197256654500961 0.0039059341057787 99.28660326086955 188 4
HLA-A*24:02 YDERPGPS 32010.14530654057 56.84775000000001 0.0059497654438018 0.0032311945457766 99.28660326086955 188 5
HLA-A*24:02 DERPGPSP 32005.76502753196 56.84775000000001 0.3851186856627464 0.0122490124774301 18.61798913043478 188 6
HLA-A*24:02 ERPGPSPL 29062.45395607057 25.78325 0.0125743299722671 0.0036317943448696 99.28660326086955 188 7
HLA-A*24:02 RPGPSPLP 31379.21541747516 46.754125 0.0469118244946002 0.0038058568113157 99.28660326086955 188 8
HLA-A*24:02 PGPSPLPH 29567.11179728288 29.901375 0.0039555989205837 0.0034648792128485 99.28660326086955 188 9
HLA-A*24:02 GPSPLPHP 32238.826831033744 62.53750000000001 0.0091095454990863 0.0032449602113947 99.28660326086955 188 10
HLA-A*24:02 PSPLPHPG 29379.043180397464 27.701625000000003 0.0016487352550029 0.0034581053211255 99.28660326086955 188 11
HLA-A*24:02 SPLPHPGP 31158.76639899322 46.754125 0.0541136674582958 0.0039304161323562 99.28660326086955 188 12
HLA-A*24:02 PLPHPGPS 30217.233477385817 35.31175 0.0016611069440841 0.0033653525925785 99.28660326086955 188 13
HLA-A*24:02 LPHPGPSP 30302.49188859721 35.31175 0.2167538255453109 0.0071564257627674 31.64317934782608 188 14
HLA-A*24:02 PHPGPSPL 20576.049372663783 8.502375 0.0041200742125511 0.0049231312981258 62.74467391304348 188 15
HLA-A*24:02 HPGPSPLP 30566.063840187377 38.64925 0.0734055526554584 0.0042857722816334 62.74467391304348 188 16
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
187:
- WT.4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
188:
- MT.4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
allele seq_num start end length peptide ic50 rank
HLA-A*24:02 187 1 8 8 DTSRYDER 40708.92 64
HLA-A*24:02 187 2 9 8 TSRYDERP 44150.12 92
HLA-A*24:02 187 3 10 8 SRYDERPG 44389.61 94
HLA-A*24:02 187 4 11 8 RYDERPGP 19088.05 14
HLA-A*24:02 187 5 12 8 YDERPGPS 42740.04 80
HLA-A*24:02 187 6 13 8 DERPGPSP 45360.65 99
HLA-A*24:02 187 7 14 8 ERPGPSPL 34610.27 37
HLA-A*24:02 187 8 15 8 RPGPSPLP 35752.13 41
HLA-A*24:02 188 1 8 8 DTSRYDER 40708.92 64
HLA-A*24:02 188 10 17 8 GPSPLPHP 44872.5 97
HLA-A*24:02 188 11 18 8 PSPLPHPG 40270.83 61
HLA-A*24:02 188 12 19 8 SPLPHPGP 43675.0 88
HLA-A*24:02 188 13 20 8 PLPHPGPS 41825.1 73
HLA-A*24:02 188 14 21 8 LPHPGPSP 45115.92 98
HLA-A*24:02 188 15 22 8 PHPGPSPL 28178.94 25
HLA-A*24:02 188 16 23 8 HPGPSPLP 42740.04 80
HLA-A*24:02 188 2 9 8 TSRYDERP 44150.12 92
HLA-A*24:02 188 3 10 8 SRYDERPG 44389.61 94
HLA-A*24:02 188 4 11 8 RYDERPGP 19088.05 14
HLA-A*24:02 188 5 12 8 YDERPGPS 42740.04 80
HLA-A*24:02 188 6 13 8 DERPGPSP 45360.65 99
HLA-A*24:02 188 7 14 8 ERPGPSPL 34610.27 37
HLA-A*24:02 188 8 15 8 RPGPSPLP 35752.13 41
HLA-A*24:02 188 9 16 8 PGPSPLPH 41375.0 69
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chromosome_name start stop reference variant gene_name transcript_name transcript_support_level transcript_length biotype amino_acid_change codon_change ensembl_gene_id hgvsc hgvsp wildtype_amino_acid_sequence frameshift_amino_acid_sequence fusion_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index protein_length_change fusion_read_support fusion_expression
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC SNRNP70 ENST00000598441.6 1 437 protein_coding R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH AGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC/AGGCCCGGCCCCTCCCCGCTTCCGCAC ENSG00000104852 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis MTQFLPPNLLALFAPRDPIPYLPPLEKLPHEKHHNQPYCGIAPYIREFEDPRDAPPPTRAETREERMERKRREKIERRQQEVETELKMWDPHNDPNAQGDAFKTLFVARVNYDTTESKLRREFEVYGPIKRIHMVYSKRSGKPRGYAFIEYEHERDMHSAYKHADGKKIDGRRVLVDVERGRTVKGWRPRRLGGGLGGTRRGGADVNIRHSGRDDTSRYDERPGPSPLPHRDRDRDRERERRERSRERDKERERRRSRSRDRRRRSRSRDKEERRRSRERSKDKDRDRKRRSSRSRERARRERERKEELRGGGGDMAEPSEAGDAPPDDGPPGELGPDGPDGPEEKGRDRDRERRRSHRSERERRRDRDRDRDRDREHKRGERGSERGRDEARGGGGGQDNGLEGLGNDSRDMYMESEGGDGYLAPENGYLMEAAPE inframe_del 222-230 NA 5.92318170493785 NA NA 541 0.642 NA NA 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH NA NA
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Chromosome Start Stop Reference Variant Transcript Transcript Support Level Transcript Length Biotype Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HGVSc HGVSp HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT IC50 Score Method Best MT IC50 Score Corresponding WT IC50 Score Corresponding Fold Change Best MT Percentile Method Best MT Percentile Corresponding WT Percentile Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median MT IC50 Score Median WT IC50 Score Median Fold Change Median MT Percentile Median WT Percentile MHCflurryEL Processing WT Score MHCflurryEL Processing MT Score MHCflurryEL Presentation WT Score MHCflurryEL Presentation MT Score MHCflurryEL Presentation WT Percentile MHCflurryEL Presentation MT Percentile MHCflurry WT IC50 Score MHCflurry MT IC50 Score MHCflurry WT Percentile MHCflurry MT Percentile NetMHCcons WT IC50 Score NetMHCcons MT IC50 Score NetMHCcons WT Percentile NetMHCcons MT Percentile Index
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 9 NA PGPSPLPH NA MHCflurry 29567.112 NA NA MHCflurry 29.901 NA 541 0.642 NA NA NA NA 5.923 NA 35471.056 NA NA 69.0 NA NA 0.0039555989205837 NA 0.0034648792128485 NA 99.28660326086955 NA 29567.11179728288 NA 29.901375 NA 41375.0 NA 69.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 10 NA GPSPLPHP NA MHCflurry 32238.827 NA NA MHCflurry 62.538 NA 541 0.642 NA NA NA NA 5.923 NA 38555.663 NA NA 97.0 NA NA 0.0091095454990863 NA 0.0032449602113947 NA 99.28660326086955 NA 32238.826831033744 NA 62.53750000000001 NA 44872.5 NA 97.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 11 NA PSPLPHPG NA MHCflurry 29379.043 NA NA MHCflurry 27.702 NA 541 0.642 NA NA NA NA 5.923 NA 34824.937 NA NA 61.0 NA NA 0.0016487352550029 NA 0.0034581053211255 NA 99.28660326086955 NA 29379.043180397464 NA 27.701625000000003 NA 40270.83 NA 61.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 12 NA SPLPHPGP NA MHCflurry 31158.766 NA NA MHCflurry 46.754 NA 541 0.642 NA NA NA NA 5.923 NA 37416.883 NA NA 88.0 NA NA 0.0541136674582958 NA 0.0039304161323562 NA 99.28660326086955 NA 31158.76639899322 NA 46.754125 NA 43675.0 NA 88.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 13 NA PLPHPGPS NA MHCflurry 30217.233 NA NA MHCflurry 35.312 NA 541 0.642 NA NA NA NA 5.923 NA 36021.167 NA NA 73.0 NA NA 0.0016611069440841 NA 0.0033653525925785 NA 99.28660326086955 NA 30217.233477385817 NA 35.31175 NA 41825.1 NA 73.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 14 NA LPHPGPSP NA MHCflurry 30302.492 NA NA MHCflurry 31.643 NA 541 0.642 NA NA NA NA 5.923 NA 37709.206 NA NA 35.312 NA NA 0.2167538255453109 NA 0.0071564257627674 NA 31.64317934782608 NA 30302.49188859721 NA 35.31175 NA 45115.92 NA 98.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 15 NA PHPGPSPL NA MHCflurry 20576.049 NA NA MHCflurry 8.502 NA 541 0.642 NA NA NA NA 5.923 NA 24377.495 NA NA 25.0 NA NA 0.0041200742125511 NA 0.0049231312981258 NA 62.74467391304348 NA 20576.049372663783 NA 8.502375 NA 28178.94 NA 25.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
chr19 49107710 49107819 GAGGTAAGATTGGGCGACCGGTGTCCTGGGGTGGGGGGCGGTCACGGGGGGAGCCCAGCCACACAGGTCTGCCCACCTCATCCAGGCCCGGCCCCTCCCCGCTTCCGCAC GAGGCCCGGCCCCTCCCCGCTTCCGCAC ENST00000598441.6 1 437 protein_coding ENSG00000104852 inframe_del R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH 222-230 SNRNP70 ENST00000598441.6:c.665+2_666del ENSP00000472998.1:p.Ter223_Ter230delinsProGlyProSerProLeuProHis HLA-A*24:02 8 16 NA HPGPSPLP NA MHCflurry 30566.064 NA NA MHCflurry 38.649 NA 541 0.642 NA NA NA NA 5.923 NA 36653.052 NA NA 62.745 NA NA 0.0734055526554584 NA 0.0042857722816334 NA 62.74467391304348 NA 30566.063840187377 NA 38.64925 NA 42740.04 NA 80.0 4899.SNRNP70.ENST00000598441.6.inframe_del.222-230R*DWATGVLGWGAVTGGAQPHRSAHLIQARPLPASAX/RPGPSPLPH
26 changes: 26 additions & 0 deletions tests/test_output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,3 +415,29 @@ def test_parse_output_runs_and_produces_expected_output_for_empty_percentile_2(s
self.assertFalse(parser.execute())
expected_output_file = os.path.join(self.test_data_dir, "mhcflurry_no_percentile", "output_no_percentile.iedb.parsed.tsv")
self.assertTrue(compare(parse_output_output_file.name, expected_output_file))

def test_parse_output_runs_and_produces_expeceted_output_for_complex_inframe_insertion(self):
parse_output_input_iedb_file = [
os.path.join(self.test_data_dir, "complex_inframe_insertion", "input.MHCflurry.HLA-A*24:02.8.tsv"),
os.path.join(self.test_data_dir, "complex_inframe_insertion", "input.netmhccons.HLA-A*24:02.8.tsv"),
]
parse_output_input_tsv_file = os.path.join(self.test_data_dir, "complex_inframe_insertion", "input.tsv")
parse_output_key_file = os.path.join(self.test_data_dir, "complex_inframe_insertion", "input.key")
parse_output_output_file = tempfile.NamedTemporaryFile()

parse_output_params = {
'input_iedb_files' : parse_output_input_iedb_file,
'input_tsv_file' : parse_output_input_tsv_file,
'key_file' : parse_output_key_file,
'output_file' : parse_output_output_file.name,
'sample_name' : 'input',
'flurry_state' : 'both',
}
parser = DefaultOutputParser(**parse_output_params)

self.assertFalse(parser.execute())
expected_output_file = os.path.join(self.test_data_dir, "complex_inframe_insertion", "output.iedb.parsed.tsv")
import shutil
shutil.copy(parse_output_output_file.name, expected_output_file)

self.assertTrue(compare(parse_output_output_file.name, expected_output_file))

0 comments on commit dd495dd

Please sign in to comment.