Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into staging
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Jan 18, 2022
2 parents b4caacd + 6e43147 commit 8faefb3
Show file tree
Hide file tree
Showing 13 changed files with 197 additions and 16 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -22,3 +22,5 @@ nytprof.out
__pycache__
build/
dist/
.DS_Store
docs/.DS_Store
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -69,7 +69,7 @@
# The short X.Y version.
version = '2.0'
# The full version, including alpha/beta/rc tags.
release = '2.0.5'
release = '2.0.6'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
13 changes: 3 additions & 10 deletions docs/index.rst
Expand Up @@ -60,16 +60,9 @@ New in release |release|

This is a bugfix release. It fixes the following problem(s):

- Some users have reported "Cannot open file" errors when running
NetMHCstabpan. This release adds a retry when this error in encountered.
- This release adds stricter checking to pVACbind for unsupported amino acids.
Sequences containing an unsupported amino acid will be skipped. The
following amino acids are supported: ``A``, ``R``, ``N``, ``D``, ``C``, ``E``,
``Q``, ``G``, ``H``, ``I``, ``L``, ``K``, ``M``, ``F``, ``P``, ``S``, ``T``,
``W``, ``Y``, ``V``.
- Some VEP predictions for supported variant types might not contain any
protein position information, rendering pVACseq unable to parse such
annotations. Annotations without protein position information will now be skipped.
- When running pVAcseq with a proximal variants VCF, proximal DNPs affecting
multiple amino acids were not handled correctly and would result in an error.
This issue has now been fixed.

New in version |version|
------------------------
Expand Down
8 changes: 8 additions & 0 deletions docs/releases/2_0.rst
Expand Up @@ -160,3 +160,11 @@ This is a bugfix release. It fixes the following problem(s):
protein position information, rendering pVACseq unable to parse such
annotations. Annotations without protein position information will now be skipped.

Version 2.0.6
-------------

This is a bugfix release. It fixes the following problem(s):

- When running pVAcseq with a proximal variants VCF, proximal DNPs affecting
multiple amino acids were not handled correctly and would result in an error.
This issue has now been fixed.
32 changes: 27 additions & 5 deletions pvactools/lib/fasta_generator.py
Expand Up @@ -111,20 +111,42 @@ def add_proximal_variants(self, somatic_variant_index, wildtype_subsequence, mut
codon_changes = [ item['codon_change'] for item in filtered_lines ]
proximal_variant_mutant_amino_acid = ProximalVariant.combine_conflicting_variants(codon_changes)

proximal_variant_position = int(protein_position) - 1 - mutation_offset
if proximal_variant_position <= 0 or proximal_variant_position >= len(wildtype_subsequence):
if '-' in protein_position:
proximal_variant_start_position = int(protein_position.split('-')[0]) - 1 - mutation_offset
else:
proximal_variant_start_position = int(protein_position) - 1 - mutation_offset
proximal_variant_end_position = proximal_variant_start_position + len(proximal_variant_mutant_amino_acid)

if proximal_variant_end_position <= 0 or proximal_variant_start_position >= len(wildtype_subsequence):
continue
if len(proximal_variant_wildtype_amino_acid) != len(proximal_variant_mutant_amino_acid):
print("Nearby variant is not a missense mutation. Skipping.")
continue
if wildtype_subsequence[proximal_variant_position] != proximal_variant_wildtype_amino_acid:

if proximal_variant_end_position > len(wildtype_subsequence):
#the DNP extends past the end of the wildtype_subsequence
missing_amino_acids_count = proximal_variant_end_position - len(wildtype_subsequence)
#remove proximal variant amino acids after wildtype subsquence end
proximal_variant_wildtype_amino_acid = proximal_variant_wildtype_amino_acid[:len(proximal_variant_wildtype_amino_acid) - missing_amino_acids_count]
proximal_variant_mutant_amino_acid = proximal_variant_mutant_amino_acid[:len(proximal_variant_mutant_amino_acid) - missing_amino_acids_count]

if proximal_variant_start_position < 0:
#DNP starts before the beginning of the wildtype subsequence
missing_amino_acids_count = abs(proximal_variant_start_position)
proximal_variant_start_position = 0
#remove proximal variant amino acids before wildtype subsequence start
proximal_variant_wildtype_amino_acid = proximal_variant_wildtype_amino_acid[missing_amino_acids_count:]
proximal_variant_mutant_amino_acid = proximal_variant_mutant_amino_acid[missing_amino_acids_count:]

if wildtype_subsequence[proximal_variant_start_position:proximal_variant_end_position] != proximal_variant_wildtype_amino_acid:
sys.exit(
"Error when processing proximal variant.\n" +
"The wildtype amino acid for variant %s with substring %s is different than expected.\n" % (somatic_variant_index, wildtype_subsequence) +
"Actual wildtype amino acid: %s\n" % wildtype_subsequence[proximal_variant_position] +
"Actual wildtype amino acid: %s\n" % wildtype_subsequence[proximal_variant_start_position:proximal_variant_end_position] +
"Wildtype amino acid of the proximal_variant: %s" % proximal_variant_wildtype_amino_acid
)
wildtype_subsequence_with_proximal_variants = wildtype_subsequence_with_proximal_variants[:proximal_variant_position] + proximal_variant_mutant_amino_acid + wildtype_subsequence_with_proximal_variants[proximal_variant_position+1:]

wildtype_subsequence_with_proximal_variants = wildtype_subsequence_with_proximal_variants[:proximal_variant_start_position] + proximal_variant_mutant_amino_acid + wildtype_subsequence_with_proximal_variants[proximal_variant_end_position:]
return wildtype_subsequence_with_proximal_variants

def execute(self):
Expand Down
Binary file removed pvactools/tools/pvacview/.DS_Store
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/input.tsv
@@ -0,0 +1,11 @@
chromosome_name start stop reference variant gene_name transcript_name transcript_support_level amino_acid_change codon_change ensembl_gene_id hgvsc hgvsp wildtype_amino_acid_sequence frameshift_amino_acid_sequence fusion_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index protein_length_change
chr16 28593449 28593450 AG GA SULT1A2 ENST00000335715 1 A/V gCT/gTC ENSG00000197165 ENST00000335715.9:c.491_492delinsTC ENSP00000338742.4:p.Ala164Val MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGCSLSFRSEL missense 164 NA 0.0534405615378929 NA NA 185 0.136 NA NA 1.SULT1A2.ENST00000335715.missense.164A/V
chr16 28593449 28593450 AG GA SULT1A2 ENST00000395630 5 A/V gCT/gTC ENSG00000197165 ENST00000395630.5:c.491_492delinsTC ENSP00000378992.1:p.Ala164Val MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGCSLSFRSEL missense 164 NA 0.0534405615378929 NA NA 185 0.136 NA NA 2.SULT1A2.ENST00000395630.missense.164A/V
chr16 28593449 28593450 AG GA SULT1A2 ENST00000526384 3 A/V gCT/gTC ENSG00000197165 ENST00000526384.1:c.491_492delinsTC ENSP00000435358.1:p.Ala164Val MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLF missense 164 NA 0.0534405615378929 NA NA 185 0.136 NA NA 3.SULT1A2.ENST00000526384.missense.164A/V
chr16 28593449 28593450 AG GA SULT1A2 ENST00000534108 2 GW/GR ggCTgg/ggTCgg ENSG00000197165 ENST00000534108.5:c.576_577delinsTC ENSP00000433075.1:p.Trp193Arg MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGVCVLGARGVEEDRAGASAHQTFPDPLLRDGDSEKHTSPTTPEDTPAPGSAPPDSVGSEGQGGLCCPQRKGCGGFLLPLLPHGQSVPSPWDLGKLPGEVHGWRSVLWVLVPARARVVGAEPHPPCSLPLL missense 192-193 NA 0.0534405615378929 NA NA 185 0.136 NA NA 4.SULT1A2.ENST00000534108.missense.192-193GW/GR
chr16 28593449 28593450 AG GA AC020765.6 ENST00000677940 NA A/V gCT/gTC ENSG00000288656 ENST00000677940.1:c.257_258delinsTC ENSP00000503077.1:p.Ala86Val MLAKLLCDQVVGAPIAVSAFYAGMSILQGKDDIFLDLKQKFWNTYMVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGC missense 86 NA 0.0 NA NA 185 0.136 NA NA 5.AC020765.6.ENST00000677940.missense.86A/V
chr16 28593472 28593473 T G SULT1A2 ENST00000335715 1 E/D gaA/gaC ENSG00000197165 ENST00000335715.9:c.468A>C ENSP00000338742.4:p.Glu156Asp MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGCSLSFRSEL missense 156 NA 0.0534405615378929 NA NA 195 0.121 0 NA 6.SULT1A2.ENST00000335715.missense.156E/D
chr16 28593472 28593473 T G SULT1A2 ENST00000395630 5 E/D gaA/gaC ENSG00000197165 ENST00000395630.5:c.468A>C ENSP00000378992.1:p.Glu156Asp MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGCSLSFRSEL missense 156 NA 0.0534405615378929 NA NA 195 0.121 0 NA 7.SULT1A2.ENST00000395630.missense.156E/D
chr16 28593472 28593473 T G SULT1A2 ENST00000526384 3 E/D gaA/gaC ENSG00000197165 ENST00000526384.1:c.468A>C ENSP00000435358.1:p.Glu156Asp MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGMETLKNTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLF missense 156 NA 0.0534405615378929 NA NA 195 0.121 0 NA 8.SULT1A2.ENST00000526384.missense.156E/D
chr16 28593472 28593473 T G SULT1A2 ENST00000534108 2 K/Q Aag/Cag ENSG00000197165 ENST00000534108.5:c.553A>C ENSP00000433075.1:p.Lys185Gln MELIQDISRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKVPGIPSGVCVLGARGVEEDRAGASAHQTFPDPLLRDGDSEKHTSPTTPEDTPAPGSAPPDSVGSEGQGGLCCPQRKGCGGFLLPLLPHGQSVPSPWDLGKLPGEVHGWRSVLWVLVPARARVVGAEPHPPCSLPLL missense 185 NA 0.0534405615378929 NA NA 195 0.121 0 NA 9.SULT1A2.ENST00000534108.missense.185K/Q
chr16 28593472 28593473 T G AC020765.6 ENST00000677940 NA E/D gaA/gaC ENSG00000288656 ENST00000677940.1:c.234A>C ENSP00000503077.1:p.Glu78Asp MLAKLLCDQVVGAPIAVSAFYAGMSILQGKDDIFLDLKQKFWNTYMVVYVARNAKDVAVSYYHFYHMAKVYPHPGTWESFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDLMVEHTSFKEMKKNPMTNYTTVRREFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAKKMAGC missense 78 NA 0.0 NA NA 195 0.121 0 NA 10.AC020765.6.ENST00000677940.missense.78E/D
16 changes: 16 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/output.cut_off.fasta
@@ -0,0 +1,16 @@
>1
SFLEKFMAGEVSYGS
>2
SFLEKFMVGEVSYGS
>3
KLPGEVHGWRSVLWVL
>4
QLPGEVHGRRSVLWVL
>5
YPHPGTWESFLEKFM
>6
YPHPGTWDSFLEKFM
>7
PSPWDLGKLPGEVHG
>8
PSPWDLGQLPGEVHG
28 changes: 28 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/output.cut_off.key
@@ -0,0 +1,28 @@
1:
- WT.1.SULT1A2.ENST00000335715.missense.164A/V
- WT.2.SULT1A2.ENST00000395630.missense.164A/V
- WT.3.SULT1A2.ENST00000526384.missense.164A/V
- WT.5.AC020765.6.ENST00000677940.missense.86A/V
2:
- MT.1.SULT1A2.ENST00000335715.missense.164A/V
- MT.2.SULT1A2.ENST00000395630.missense.164A/V
- MT.3.SULT1A2.ENST00000526384.missense.164A/V
- MT.5.AC020765.6.ENST00000677940.missense.86A/V
3:
- WT.4.SULT1A2.ENST00000534108.missense.192-193GW/GR
4:
- MT.4.SULT1A2.ENST00000534108.missense.192-193GW/GR
5:
- WT.6.SULT1A2.ENST00000335715.missense.156E/D
- WT.7.SULT1A2.ENST00000395630.missense.156E/D
- WT.8.SULT1A2.ENST00000526384.missense.156E/D
- WT.10.AC020765.6.ENST00000677940.missense.78E/D
6:
- MT.6.SULT1A2.ENST00000335715.missense.156E/D
- MT.7.SULT1A2.ENST00000395630.missense.156E/D
- MT.8.SULT1A2.ENST00000526384.missense.156E/D
- MT.10.AC020765.6.ENST00000677940.missense.78E/D
7:
- WT.9.SULT1A2.ENST00000534108.missense.185K/Q
8:
- MT.9.SULT1A2.ENST00000534108.missense.185K/Q
16 changes: 16 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/output.fasta
@@ -0,0 +1,16 @@
>1
WESFLEKFMAGEVSYGSWY
>2
WDSFLEKFMVGEVSYGSWY
>3
LGKLPGEVHGWRSVLWVLVP
>4
LGQLPGEVHGRRSVLWVLVP
>5
KVYPHPGTWESFLEKFMAG
>6
KVYPHPGTWDSFLEKFMVG
>7
SVPSPWDLGKLPGEVHGWR
>8
SVPSPWDLGQLPGEVHGRR
28 changes: 28 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/output.key
@@ -0,0 +1,28 @@
1:
- WT.1.SULT1A2.ENST00000335715.missense.164A/V
- WT.2.SULT1A2.ENST00000395630.missense.164A/V
- WT.3.SULT1A2.ENST00000526384.missense.164A/V
- WT.5.AC020765.6.ENST00000677940.missense.86A/V
2:
- MT.1.SULT1A2.ENST00000335715.missense.164A/V
- MT.2.SULT1A2.ENST00000395630.missense.164A/V
- MT.3.SULT1A2.ENST00000526384.missense.164A/V
- MT.5.AC020765.6.ENST00000677940.missense.86A/V
3:
- WT.4.SULT1A2.ENST00000534108.missense.192-193GW/GR
4:
- MT.4.SULT1A2.ENST00000534108.missense.192-193GW/GR
5:
- WT.6.SULT1A2.ENST00000335715.missense.156E/D
- WT.7.SULT1A2.ENST00000395630.missense.156E/D
- WT.8.SULT1A2.ENST00000526384.missense.156E/D
- WT.10.AC020765.6.ENST00000677940.missense.78E/D
6:
- MT.6.SULT1A2.ENST00000335715.missense.156E/D
- MT.7.SULT1A2.ENST00000395630.missense.156E/D
- MT.8.SULT1A2.ENST00000526384.missense.156E/D
- MT.10.AC020765.6.ENST00000677940.missense.78E/D
7:
- WT.9.SULT1A2.ENST00000534108.missense.185K/Q
8:
- MT.9.SULT1A2.ENST00000534108.missense.185K/Q
11 changes: 11 additions & 0 deletions tests/test_data/fasta_generator/proximal_dnp/proximal_variants.tsv
@@ -0,0 +1,11 @@
chromosome_name start stop reference variant amino_acid_change codon_change protein_position type main_somatic_variant
chr16 28593472 28593473 T G E/D gaA/gaC 156 somatic 1.SULT1A2.ENST00000335715.missense.164A/V
chr16 28593472 28593473 T G E/D gaA/gaC 156 somatic 2.SULT1A2.ENST00000395630.missense.164A/V
chr16 28593472 28593473 T G E/D gaA/gaC 156 somatic 3.SULT1A2.ENST00000526384.missense.164A/V
chr16 28593472 28593473 T G K/Q Aag/Cag 185 somatic 4.SULT1A2.ENST00000534108.missense.192-193GW/GR
chr16 28593472 28593473 T G E/D gaA/gaC 78 somatic 5.AC020765.6.ENST00000677940.missense.86A/V
chr16 28593449 28593450 AG GA A/V gCT/gTC 164 somatic 6.SULT1A2.ENST00000335715.missense.156E/D
chr16 28593449 28593450 AG GA A/V gCT/gTC 164 somatic 7.SULT1A2.ENST00000395630.missense.156E/D
chr16 28593449 28593450 AG GA A/V gCT/gTC 164 somatic 8.SULT1A2.ENST00000526384.missense.156E/D
chr16 28593449 28593450 AG GA GW/GR ggCTgg/ggTCgg 192-193 somatic 9.SULT1A2.ENST00000534108.missense.185K/Q
chr16 28593449 28593450 AG GA A/V gCT/gTC 86 somatic 10.AC020765.6.ENST00000677940.missense.78E/D
46 changes: 46 additions & 0 deletions tests/test_fasta_generator.py
Expand Up @@ -747,6 +747,52 @@ def test_multiple_proximal_variants_on_same_codon_results_in_stop_codon(self):
self.assertEqual(os.path.getsize(generate_fasta_output_file.name), 0)
self.assertEqual(os.path.getsize(generate_fasta_key_output_file.name), 0)

def test_proximal_dnp(self):
test_data_dir = os.path.join(self.test_data_dir, 'proximal_dnp')
generate_fasta_input_file = os.path.join(test_data_dir, 'input.tsv')
generate_fasta_proximal_variants_file = os.path.join(test_data_dir, 'proximal_variants.tsv')
generate_fasta_output_file = tempfile.NamedTemporaryFile()
generate_fasta_key_output_file = tempfile.NamedTemporaryFile()

generate_fasta_params = {
'input_file' : generate_fasta_input_file,
'epitope_length' : 10,
'flanking_sequence_length' : 9,
'output_file' : generate_fasta_output_file.name,
'output_key_file' : generate_fasta_key_output_file.name,
'proximal_variants_file' : generate_fasta_proximal_variants_file,
}
generator = FastaGenerator(**generate_fasta_params)

self.assertFalse(generator.execute())
expected_output_file = os.path.join(test_data_dir, 'output.fasta')
self.assertTrue(cmp(generate_fasta_output_file.name, expected_output_file))
expected_key_output_file = os.path.join(test_data_dir, 'output.key')
self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file))

def test_proximal_dnp_cut_off(self):
test_data_dir = os.path.join(self.test_data_dir, 'proximal_dnp')
generate_fasta_input_file = os.path.join(test_data_dir, 'input.tsv')
generate_fasta_proximal_variants_file = os.path.join(test_data_dir, 'proximal_variants.tsv')
generate_fasta_output_file = tempfile.NamedTemporaryFile()
generate_fasta_key_output_file = tempfile.NamedTemporaryFile()

generate_fasta_params = {
'input_file' : generate_fasta_input_file,
'epitope_length' : 8,
'flanking_sequence_length' : 7,
'output_file' : generate_fasta_output_file.name,
'output_key_file' : generate_fasta_key_output_file.name,
'proximal_variants_file' : generate_fasta_proximal_variants_file,
}
generator = FastaGenerator(**generate_fasta_params)

self.assertFalse(generator.execute())
expected_output_file = os.path.join(test_data_dir, 'output.cut_off.fasta')
self.assertTrue(cmp(generate_fasta_output_file.name, expected_output_file))
expected_key_output_file = os.path.join(test_data_dir, 'output.cut_off.key')
self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file))

def test_distance_from_start_works_as_expected(self):
generate_fasta_input_file = tempfile.NamedTemporaryFile()
generate_fasta_output_file = tempfile.NamedTemporaryFile()
Expand Down

0 comments on commit 8faefb3

Please sign in to comment.