Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/hotfix'
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Mar 11, 2020
2 parents 878a208 + 1d96a18 commit 8ab7511
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
# The short X.Y version.
version = '1.5'
# The full version, including alpha/beta/rc tags.
release = '1.5.5'
release = '1.5.6'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
17 changes: 6 additions & 11 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,12 @@ New in release |release|

This is a hotfix release. It fixes the following issues:

- The ``pvacfuse run`` command would previously output a misleading warning
message if an AGFusion input directory didn't contain any processable fusion
entries. This warning message has been fixed.
- Between VEP versions, the Downstream protein sequence prediction for some
frameshift mutations was changed to now include a leading wildtype amino
acid. This potential difference in VEP-predicted Downstream protein
sequences was not accounted for and would result in frameshift mutation
protein prediction that would duplicate this leading wildtype amino acid.
This version updates our prediction pipeline to remove this duplicated amino
acid and output a fatal error if the Downstream protein sequence does not
contain the leading wildtype amino acid.
- The ``pvacbind run`` command would previously error out if one of the input
sequences would contain a X stop codon. This update will remove the X amino
acid and the downstream sequence before further processing the remaining
protein sequence.
- A bug in the ``pvacfuse top_score_filter`` code would previsouly result
in an error when trying to run this command. This has now been fixed.

New in version |version|
------------------------
Expand Down
10 changes: 10 additions & 0 deletions docs/releases/1_5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,13 @@ This is a hotfix release. It fixes the following issues:
This version updates our prediction pipeline to remove this duplicated amino
acid and output a fatal error if the Downstream protein sequence does not
contain the leading wildtype amino acid.

1.5.6
-----

- The ``pvacbind run`` command would previously error out if one of the input
sequences would contain a X stop codon. This update will remove the X amino
acid and the downstream sequence before further processing the remaining
protein sequence.
- A bug in the ``pvacfuse top_score_filter`` code would previsouly result
in an error when trying to run this command. This has now been fixed.
12 changes: 9 additions & 3 deletions lib/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,10 +503,16 @@ def uniquify_records(self, records):
count += 1
return (uniq_records, keys)

def create_per_length_fasta(self, length):
def create_per_length_fasta_and_process_stops(self, length):
stop_chars = set('X*')
records = []
for record in SeqIO.parse(self.input_file, "fasta"):
if len(str(record.seq)) >= length:
sequence = str(record.seq).upper()
x_index = sequence.index('X') if 'X' in sequence else len(sequence)
star_index = sequence.index('*') if '*' in sequence else len(sequence)
sequence = sequence[0:min(x_index, star_index)]
if len(sequence) >= length:
record.seq = Seq(sequence, IUPAC.protein)
records.append(record)
SeqIO.write(records, self.fasta_basename(length), "fasta")

Expand Down Expand Up @@ -702,7 +708,7 @@ def execute(self):

split_parsed_output_files = []
for length in self.epitope_lengths:
self.create_per_length_fasta(length)
self.create_per_length_fasta_and_process_stops(length)
chunks = self.split_fasta_file(length)
self.call_iedb(chunks, length)
split_parsed_output_files.extend(self.parse_outputs(chunks, length))
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

setup(
name="pvactools",
version="1.5.5",
version="1.5.6",
packages=[
"tools",
"tools.pvacbind",
Expand Down
10 changes: 10 additions & 0 deletions tests/test_data/pvacbind/input_with_stops.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>1
LPLPPPPLLPLLLLLXGASGG
>2
LPLPPPPLLPLLP*LLLLGASGG
>3
DPASAAAAAAAAAAxAAAAVIPTVSTPPP
>4
DPASAAAAAXAAVIP*TVSTPPP
>5
VNSXATLSRTLLAAAGGSSLQ
8 changes: 8 additions & 0 deletions tests/test_data/pvacbind/output_with_stops.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>1
LPLPPPPLLPLLLLL
>2
LPLPPPPLLPLLP
>3
DPASAAAAAAAAAA
>4
DPASAAAAA
18 changes: 18 additions & 0 deletions tests/test_pvacbind.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,24 @@ def test_run_compiles(self):
))
self.assertTrue(compiled_run_path)

def test_process_stops(self):
output_dir = tempfile.TemporaryDirectory(dir = self.test_data_directory)
params = {
'input_file': os.path.join(self.test_data_directory, "input_with_stops.fasta"),
'input_file_type': 'fasta',
'sample_name': 'Test',
'alleles': ['HLA-G*01:09'],
'prediction_algorithms': ['NetMHC'],
'output_dir': output_dir.name,
'epitope_lengths': [9],
}
pipeline = PvacbindPipeline(**params)
pipeline.create_per_length_fasta_and_process_stops(9)
output_file = os.path.join(output_dir.name, 'tmp', 'Test.9.fa')
expected_file = os.path.join(self.test_data_directory, 'output_with_stops.fasta')
self.assertTrue(cmp(output_file, expected_file))
output_dir.cleanup()

def test_pvacbind_pipeline(self):
with patch('requests.post', unittest.mock.Mock(side_effect = lambda url, data, files=None: make_response(
data,
Expand Down

0 comments on commit 8ab7511

Please sign in to comment.