Skip to content

Commit

Permalink
Merge pull request #143 from hammerlab/minimal-epitope-report
Browse files Browse the repository at this point in the history
Minimal epitope report
  • Loading branch information
julia326 committed Aug 3, 2017
2 parents 7726c7c + 4198cad commit 4386d26
Show file tree
Hide file tree
Showing 9 changed files with 239 additions and 38 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ xlsxwriter
xlrd
xvfbwrapper
future>=0.16.0 # needed by pylint
astropy
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
'xlsxwriter',
'xvfbwrapper',
'future>=0.16.0', # needed by pylint
'astropy',
],

long_description=readme,
Expand Down
2 changes: 1 addition & 1 deletion vaxrank/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.1"
__version__ = "0.6.0"
32 changes: 32 additions & 0 deletions vaxrank/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def new_run_arg_parser():
add_mhc_args(arg_parser)
add_vaccine_peptide_args(arg_parser)
add_output_args(arg_parser)
add_optional_output_args(arg_parser)
add_supplemental_report_args(arg_parser)
return arg_parser

Expand All @@ -73,10 +74,41 @@ def cached_run_arg_parser():
default="",
help="Path to JSON file containing results of vaccine peptide report")
add_output_args(arg_parser)
add_optional_output_args(arg_parser)
add_supplemental_report_args(arg_parser)
return arg_parser


# Lets the user specify whether they want to see particular sections in the report.
def add_optional_output_args(arg_parser):
manufacturability_args = arg_parser.add_mutually_exclusive_group(required=False)
manufacturability_args.add_argument(
"--include-manufacturability-in-report",
dest="manufacturability",
action="store_true")
manufacturability_args.add_argument(
"--no-manufacturability-in-report",
dest="manufacturability",
action="store_false")
arg_parser.set_defaults(manufacturability=True)

wt_epitope_args = arg_parser.add_mutually_exclusive_group(required=False)
wt_epitope_args.add_argument(
"--include-non-overlapping-epitopes-in-report",
dest="wt_epitopes",
action="store_true",
help="Set to true to include a report section for each vaccine peptide containing "
"strong binders that do not overlap the mutation")

wt_epitope_args.add_argument(
"--no-non-overlapping-epitopes-in-report",
dest="wt_epitopes",
action="store_false",
help="Set to false to exclude report information for each vaccine peptide about "
"strong binders that do not overlap the mutation")
arg_parser.set_defaults(wt_epitopes=True)


def add_output_args(arg_parser):
output_args_group = arg_parser.add_argument_group("Output options")

Expand Down
91 changes: 75 additions & 16 deletions vaxrank/epitope_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from __future__ import absolute_import, print_function, division
from collections import namedtuple, OrderedDict
import traceback
import logging
import os

Expand All @@ -31,6 +32,7 @@
"wt_peptide_sequence",
"length",
"ic50",
"wt_ic50",
"percentile_rank",
"prediction_method_name",
"overlaps_mutation",
Expand Down Expand Up @@ -173,58 +175,114 @@ def predict_epitopes(
mhctools_binding_predictions = mhc_predictor.predict_subsequences(
{protein_fragment.gene_name: protein_fragment.amino_acids})
except Exception as exc:
logger.error('MHC prediction errored for protein fragment %s, with exception text "%s"',
protein_fragment, exc)
logger.error(
'MHC prediction errored for protein fragment %s, with traceback: %s',
protein_fragment, traceback.format_exc())
return results

# compute the WT epitopes for each mutant fragment's epitopes; mutant -> WT
wt_peptides = {}
for binding_prediction in mhctools_binding_predictions:
peptide = binding_prediction.peptide
peptide_length = binding_prediction.length
peptide_start_offset = binding_prediction.offset
peptide_end_offset = peptide_start_offset + peptide_length

overlaps_mutation = protein_fragment.interval_overlaps_mutation(
start_offset=peptide_start_offset,
end_offset=peptide_end_offset)

if overlaps_mutation:
full_reference_protein_sequence = (
protein_fragment.predicted_effect().original_protein_sequence
)
global_epitope_start_pos = (
protein_fragment.global_start_pos() + peptide_start_offset
)
wt_peptide = full_reference_protein_sequence[
global_epitope_start_pos:global_epitope_start_pos + peptide_length]
wt_peptides[peptide] = wt_peptide

wt_predictions = []
try:
# filter to minimum peptide lengths
valid_wt_peptides = [
x for x in wt_peptides.values() if len(x) >= mhc_predictor.min_peptide_length
]
if len(valid_wt_peptides) > 0:
wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides)
except ValueError as err:
logger.error(
'MHC prediction for WT peptides errored, with traceback: %s',
traceback.format_exc())

wt_predictions_grouped = {}
# break it out: (peptide, allele) -> prediction
for wt_prediction in wt_predictions:
wt_predictions_grouped[(wt_prediction.peptide, wt_prediction.allele)] = wt_prediction

# convert from mhctools.BindingPrediction objects to EpitopePrediction
# which differs primarily by also having a boolean field
# 'overlaps_mutation' that indicates whether the epitope overlaps
# mutant amino acids or both sides of a deletion
num_total = 0
num_occurs_in_reference = 0
num_low_scoring = 0
for binding_prediction in mhctools_binding_predictions:
num_total += 1
peptide = binding_prediction.peptide
peptide_length = binding_prediction.length
peptide_start_offset = binding_prediction.offset
peptide_end_offset = binding_prediction.offset + binding_prediction.length
peptide_end_offset = peptide_start_offset + peptide_length

overlaps_mutation = protein_fragment.interval_overlaps_mutation(
start_offset=peptide_start_offset,
end_offset=peptide_end_offset)

peptide = binding_prediction.peptide
occurs_in_reference = index_contains_kmer(fm, peptide)
if occurs_in_reference:
logger.debug('Peptide %s occurs in reference', peptide)
num_occurs_in_reference += 1
overlaps_mutation = protein_fragment.interval_overlaps_mutation(
start_offset=peptide_start_offset,
end_offset=peptide_end_offset)

# compute WT epitope sequence, if this epitope overlaps the mutation
if overlaps_mutation:
full_reference_protein_sequence = (
protein_fragment.predicted_effect().original_protein_sequence
)
global_epitope_start_pos = protein_fragment.global_start_pos() + peptide_start_offset
wt_peptide = full_reference_protein_sequence[
global_epitope_start_pos:global_epitope_start_pos + binding_prediction.length]
wt_peptide = wt_peptides[peptide]
wt_prediction = wt_predictions_grouped.get((wt_peptide, binding_prediction.allele))
wt_ic50 = None
if wt_prediction is None:
# this can happen in a stop-loss variant: do we want to check that here?
if len(wt_peptide) < mhc_predictor.min_peptide_length:
logger.info(
'No prediction for too-short WT epitope %s: possible stop-loss variant',
wt_peptide)
else:
wt_ic50 = wt_prediction.value

else:
wt_peptide = peptide
wt_ic50 = binding_prediction.value

epitope_prediction = EpitopePrediction(
allele=binding_prediction.allele,
peptide_sequence=peptide,
wt_peptide_sequence=wt_peptide,
length=len(binding_prediction.peptide),
length=len(peptide),
ic50=binding_prediction.value,
wt_ic50=wt_ic50,
percentile_rank=binding_prediction.percentile_rank,
prediction_method_name=binding_prediction.prediction_method_name,
overlaps_mutation=overlaps_mutation,
source_sequence=protein_fragment.amino_acids,
offset=binding_prediction.offset,
offset=peptide_start_offset,
occurs_in_reference=occurs_in_reference)
if epitope_prediction.logistic_epitope_score() >= min_epitope_score:
key = (epitope_prediction.peptide_sequence, epitope_prediction.allele)
results[key] = epitope_prediction
else:
num_low_scoring += 1

logger.info('%d out of %d peptides occur in reference', num_occurs_in_reference, num_total)
logger.info('%d total peptides: %d occur in reference, %d failed score threshold',
num_total, num_occurs_in_reference, num_low_scoring)
return results

def slice_epitope_predictions(
Expand All @@ -242,6 +300,7 @@ def slice_epitope_predictions(
wt_peptide_sequence=p.wt_peptide_sequence,
length=p.length,
ic50=p.ic50,
wt_ic50=p.wt_ic50,
percentile_rank=p.percentile_rank,
prediction_method_name=p.prediction_method_name,
overlaps_mutation=p.overlaps_mutation,
Expand Down
39 changes: 34 additions & 5 deletions vaxrank/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import tempfile

from astropy.io import ascii as asc
import jinja2
import pandas as pd
import pdfkit
Expand Down Expand Up @@ -79,6 +80,9 @@ def __init__(
'reviewers': reviewers.split(',') if reviewers else [],
'final_review': final_review,
'input_json_file': input_json_file,
# these report sections are optional
'include_manufacturability': args_for_report['manufacturability'],
'include_wt_epitopes': args_for_report['wt_epitopes'],
}

# map from peptide objects to their COSMIC IDs if they exist
Expand Down Expand Up @@ -213,10 +217,12 @@ def _epitope_data(self, epitope_prediction):
"""
epitope_data = OrderedDict([
('Sequence', epitope_prediction.peptide_sequence),
('IC50', epitope_prediction.ic50),
('Normalized binding score', round(
('IC50', '%.2f nM' % epitope_prediction.ic50),
('Score', round(
epitope_prediction.logistic_epitope_score(), 4)),
('Allele', epitope_prediction.allele),
('Allele', epitope_prediction.allele.replace('HLA-', '')),
('WT sequence', epitope_prediction.wt_peptide_sequence),
('WT IC50', '%.2f nM' % epitope_prediction.wt_ic50),
])
return epitope_data

Expand Down Expand Up @@ -301,18 +307,41 @@ def compute_template_data(self):
manufacturability_data = self._manufacturability_data(vaccine_peptide)

epitopes = []
wt_epitopes = []
sorted_epitope_predictions = sorted(
vaccine_peptide.epitope_predictions, key=attrgetter('ic50'))
for epitope_prediction in sorted_epitope_predictions:
epitope_data = self._epitope_data(epitope_prediction)
# mutant vs WT epitope data
if epitope_prediction.overlaps_mutation:
epitope_data = self._epitope_data(epitope_prediction)
epitopes.append(epitope_data)

else:
# only care about allele, ic50, sequence (other info is redundant)
key_list = ['Allele', 'IC50', 'Sequence']
wt_epitopes.append({key: epitope_data[key] for key in key_list})


# hack: make a nicely-formatted fixed width table for epitopes, used in ASCII report
with tempfile.TemporaryFile(mode='r+') as temp:
asc.write(epitopes, temp, format='fixed_width_two_line', delimiter_pad=' ')
temp.seek(0)
ascii_epitopes = temp.read()

ascii_wt_epitopes = None
if len(wt_epitopes) > 0:
with tempfile.TemporaryFile(mode='r+') as temp:
asc.write(
wt_epitopes, temp, format='fixed_width_two_line', delimiter_pad=' ')
temp.seek(0)
ascii_wt_epitopes = temp.read()
peptide_dict = {
'header_display_data': header_display_data,
'peptide_data': peptide_data,
'manufacturability_data': manufacturability_data,
'epitopes': epitopes,
'ascii_epitopes': ascii_epitopes,
'wt_epitopes': wt_epitopes,
'ascii_wt_epitopes': ascii_wt_epitopes,
}
peptides.append(peptide_dict)

Expand Down
53 changes: 45 additions & 8 deletions vaxrank/templates/stylesheet.css
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ col.patient-info-column-one {
width: 40%;
}

col.patient-info-column-two {

}

/* Command-line args */
table.args {
width: 90%;
Expand All @@ -57,10 +53,6 @@ col.args-column-one {
width: 40%;
}

col.args-column-two {

}

/* Variants */
ol.main {
padding-left: 1.2em;
Expand Down Expand Up @@ -113,13 +105,28 @@ td.variant-head {
}

/* Peptides */
h4.peptides {
page-break-before: always;
}

div.wt-epitopes {
page-break-after: always;
page-break-inside: avoid;
margin: 2em 0 2em 2em;
}

table.wt-epitopes {
font-size: 80%;
}

ol.peptides {
padding-left: 0;
list-style-position: inside;
}

li.peptide {
page-break-inside: avoid;
page-break-before: always;
}

table.peptide {
Expand Down Expand Up @@ -161,6 +168,36 @@ col.peptide-data-column-two {
width: 30%;
}

col.epitope-data-column-one {
width: 16%;
}

col.epitope-data-column-two {
width: 16%;
}

col.epitope-data-column-three {
width: 16%;
}

col.epitope-data-column-four {
width: 16%;
}

col.epitope-data-column-five {
width: 16%;
}

col.epitope-data-column-six {
width: 16%;
}

table.epitope-inner {
font-size: 80%;
width: 100%;
border: none;
}

/* Signatures */
table.signature {
width: 70%;
Expand Down

0 comments on commit 4386d26

Please sign in to comment.