Merge pull request #143 from hammerlab/minimal-epitope-report

Minimal epitope report
openvax · Aug 3, 2017 · 4386d26 · 4386d26
2 parents 7726c7c + 4198cad
commit 4386d26
Show file tree

Hide file tree

Showing 9 changed files with 239 additions and 38 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -13,3 +13,4 @@ xlsxwriter
 xlrd
 xvfbwrapper
 future>=0.16.0  # needed by pylint
+astropy
diff --git a/setup.py b/setup.py
@@ -82,6 +82,7 @@
             'xlsxwriter',
             'xvfbwrapper',
             'future>=0.16.0',  # needed by pylint
+            'astropy',
         ],
 
         long_description=readme,

diff --git a/vaxrank/__init__.py b/vaxrank/__init__.py
@@ -1 +1 @@
-__version__ = "0.5.1"
+__version__ = "0.6.0"
diff --git a/vaxrank/cli.py b/vaxrank/cli.py
@@ -57,6 +57,7 @@ def new_run_arg_parser():
     add_mhc_args(arg_parser)
     add_vaccine_peptide_args(arg_parser)
     add_output_args(arg_parser)
+    add_optional_output_args(arg_parser)
     add_supplemental_report_args(arg_parser)
     return arg_parser
 
@@ -73,10 +74,41 @@ def cached_run_arg_parser():
         default="",
         help="Path to JSON file containing results of vaccine peptide report")
     add_output_args(arg_parser)
+    add_optional_output_args(arg_parser)
     add_supplemental_report_args(arg_parser)
     return arg_parser
 
 
+# Lets the user specify whether they want to see particular sections in the report.
+def add_optional_output_args(arg_parser):
+    manufacturability_args = arg_parser.add_mutually_exclusive_group(required=False)
+    manufacturability_args.add_argument(
+        "--include-manufacturability-in-report",
+        dest="manufacturability",
+        action="store_true")
+    manufacturability_args.add_argument(
+        "--no-manufacturability-in-report",
+        dest="manufacturability",
+        action="store_false")
+    arg_parser.set_defaults(manufacturability=True)
+
+    wt_epitope_args = arg_parser.add_mutually_exclusive_group(required=False)
+    wt_epitope_args.add_argument(
+        "--include-non-overlapping-epitopes-in-report",
+        dest="wt_epitopes",
+        action="store_true",
+        help="Set to true to include a report section for each vaccine peptide containing "
+             "strong binders that do not overlap the mutation")
+
+    wt_epitope_args.add_argument(
+        "--no-non-overlapping-epitopes-in-report",
+        dest="wt_epitopes",
+        action="store_false",
+        help="Set to false to exclude report information for each vaccine peptide about "
+             "strong binders that do not overlap the mutation")
+    arg_parser.set_defaults(wt_epitopes=True)
+
+
 def add_output_args(arg_parser):
     output_args_group = arg_parser.add_argument_group("Output options")
 

diff --git a/vaxrank/epitope_prediction.py b/vaxrank/epitope_prediction.py
@@ -14,6 +14,7 @@
 
 from __future__ import absolute_import, print_function, division
 from collections import namedtuple, OrderedDict
+import traceback
 import logging
 import os
 
@@ -31,6 +32,7 @@
     "wt_peptide_sequence",
     "length",
     "ic50",
+    "wt_ic50",
     "percentile_rank",
     "prediction_method_name",
     "overlaps_mutation",
@@ -173,58 +175,114 @@ def predict_epitopes(
         mhctools_binding_predictions = mhc_predictor.predict_subsequences(
             {protein_fragment.gene_name: protein_fragment.amino_acids})
     except Exception as exc:
-        logger.error('MHC prediction errored for protein fragment %s, with exception text "%s"',
-            protein_fragment, exc)
+        logger.error(
+            'MHC prediction errored for protein fragment %s, with traceback: %s',
+            protein_fragment, traceback.format_exc())
         return results
 
+    # compute the WT epitopes for each mutant fragment's epitopes; mutant -> WT
+    wt_peptides = {}
+    for binding_prediction in mhctools_binding_predictions:
+        peptide = binding_prediction.peptide
+        peptide_length = binding_prediction.length
+        peptide_start_offset = binding_prediction.offset
+        peptide_end_offset = peptide_start_offset + peptide_length
+
+        overlaps_mutation = protein_fragment.interval_overlaps_mutation(
+            start_offset=peptide_start_offset,
+            end_offset=peptide_end_offset)
+
+        if overlaps_mutation:
+            full_reference_protein_sequence = (
+                protein_fragment.predicted_effect().original_protein_sequence
+            )
+            global_epitope_start_pos = (
+                protein_fragment.global_start_pos() + peptide_start_offset
+            )
+            wt_peptide = full_reference_protein_sequence[
+                global_epitope_start_pos:global_epitope_start_pos + peptide_length]
+            wt_peptides[peptide] = wt_peptide
+
+    wt_predictions = []
+    try:
+        # filter to minimum peptide lengths
+        valid_wt_peptides = [
+            x for x in wt_peptides.values() if len(x) >= mhc_predictor.min_peptide_length
+        ]
+        if len(valid_wt_peptides) > 0:
+            wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides)
+    except ValueError as err:
+        logger.error(
+            'MHC prediction for WT peptides errored, with traceback: %s',
+            traceback.format_exc())
+
+    wt_predictions_grouped = {}
+    # break it out: (peptide, allele) -> prediction
+    for wt_prediction in wt_predictions:
+        wt_predictions_grouped[(wt_prediction.peptide, wt_prediction.allele)] = wt_prediction
+
     # convert from mhctools.BindingPrediction objects to EpitopePrediction
     # which differs primarily by also having a boolean field
     # 'overlaps_mutation' that indicates whether the epitope overlaps
     # mutant amino acids or both sides of a deletion
     num_total = 0
     num_occurs_in_reference = 0
+    num_low_scoring = 0
     for binding_prediction in mhctools_binding_predictions:
         num_total += 1
+        peptide = binding_prediction.peptide
+        peptide_length = binding_prediction.length
         peptide_start_offset = binding_prediction.offset
-        peptide_end_offset = binding_prediction.offset + binding_prediction.length
+        peptide_end_offset = peptide_start_offset + peptide_length
+
+        overlaps_mutation = protein_fragment.interval_overlaps_mutation(
+            start_offset=peptide_start_offset,
+            end_offset=peptide_end_offset)
 
-        peptide = binding_prediction.peptide
         occurs_in_reference = index_contains_kmer(fm, peptide)
         if occurs_in_reference:
             logger.debug('Peptide %s occurs in reference', peptide)
             num_occurs_in_reference += 1
-        overlaps_mutation = protein_fragment.interval_overlaps_mutation(
-            start_offset=peptide_start_offset,
-            end_offset=peptide_end_offset)
 
         # compute WT epitope sequence, if this epitope overlaps the mutation
         if overlaps_mutation:
-            full_reference_protein_sequence = (
-                protein_fragment.predicted_effect().original_protein_sequence
-            )
-            global_epitope_start_pos = protein_fragment.global_start_pos() + peptide_start_offset
-            wt_peptide = full_reference_protein_sequence[
-                global_epitope_start_pos:global_epitope_start_pos + binding_prediction.length]
+            wt_peptide = wt_peptides[peptide]
+            wt_prediction = wt_predictions_grouped.get((wt_peptide, binding_prediction.allele))
+            wt_ic50 = None
+            if wt_prediction is None:
+                # this can happen in a stop-loss variant: do we want to check that here?
+                if len(wt_peptide) < mhc_predictor.min_peptide_length:
+                    logger.info(
+                        'No prediction for too-short WT epitope %s: possible stop-loss variant',
+                        wt_peptide)
+            else:
+                wt_ic50 = wt_prediction.value
+
         else:
             wt_peptide = peptide
+            wt_ic50 = binding_prediction.value
 
         epitope_prediction = EpitopePrediction(
                 allele=binding_prediction.allele,
                 peptide_sequence=peptide,
                 wt_peptide_sequence=wt_peptide,
-                length=len(binding_prediction.peptide),
+                length=len(peptide),
                 ic50=binding_prediction.value,
+                wt_ic50=wt_ic50,
                 percentile_rank=binding_prediction.percentile_rank,
                 prediction_method_name=binding_prediction.prediction_method_name,
                 overlaps_mutation=overlaps_mutation,
                 source_sequence=protein_fragment.amino_acids,
-                offset=binding_prediction.offset,
+                offset=peptide_start_offset,
                 occurs_in_reference=occurs_in_reference)
         if epitope_prediction.logistic_epitope_score() >= min_epitope_score:
             key = (epitope_prediction.peptide_sequence, epitope_prediction.allele)
             results[key] = epitope_prediction
+        else:
+            num_low_scoring += 1
 
-    logger.info('%d out of %d peptides occur in reference', num_occurs_in_reference, num_total)
+    logger.info('%d total peptides: %d occur in reference, %d failed score threshold',
+        num_total, num_occurs_in_reference, num_low_scoring)
     return results
 
 def slice_epitope_predictions(
@@ -242,6 +300,7 @@ def slice_epitope_predictions(
             wt_peptide_sequence=p.wt_peptide_sequence,
             length=p.length,
             ic50=p.ic50,
+            wt_ic50=p.wt_ic50,
             percentile_rank=p.percentile_rank,
             prediction_method_name=p.prediction_method_name,
             overlaps_mutation=p.overlaps_mutation,

diff --git a/vaxrank/report.py b/vaxrank/report.py
@@ -21,6 +21,7 @@
 import sys
 import tempfile
 
+from astropy.io import ascii as asc
 import jinja2
 import pandas as pd
 import pdfkit
@@ -79,6 +80,9 @@ def __init__(
             'reviewers': reviewers.split(',') if reviewers else [],
             'final_review': final_review,
             'input_json_file': input_json_file,
+            # these report sections are optional
+            'include_manufacturability': args_for_report['manufacturability'],
+            'include_wt_epitopes': args_for_report['wt_epitopes'],
         }
 
         # map from peptide objects to their COSMIC IDs if they exist
@@ -213,10 +217,12 @@ def _epitope_data(self, epitope_prediction):
         """
         epitope_data = OrderedDict([
             ('Sequence', epitope_prediction.peptide_sequence),
-            ('IC50', epitope_prediction.ic50),
-            ('Normalized binding score', round(
+            ('IC50', '%.2f nM' % epitope_prediction.ic50),
+            ('Score', round(
                 epitope_prediction.logistic_epitope_score(), 4)),
-            ('Allele', epitope_prediction.allele),
+            ('Allele', epitope_prediction.allele.replace('HLA-', '')),
+            ('WT sequence', epitope_prediction.wt_peptide_sequence),
+            ('WT IC50', '%.2f nM' % epitope_prediction.wt_ic50),
         ])
         return epitope_data
 
@@ -301,18 +307,41 @@ def compute_template_data(self):
                 manufacturability_data = self._manufacturability_data(vaccine_peptide)
 
                 epitopes = []
+                wt_epitopes = []
                 sorted_epitope_predictions = sorted(
                     vaccine_peptide.epitope_predictions, key=attrgetter('ic50'))
                 for epitope_prediction in sorted_epitope_predictions:
+                    epitope_data = self._epitope_data(epitope_prediction)
+                    # mutant vs WT epitope data
                     if epitope_prediction.overlaps_mutation:
-                        epitope_data = self._epitope_data(epitope_prediction)
                         epitopes.append(epitope_data)
-
+                    else:
+                        # only care about allele, ic50, sequence (other info is redundant)
+                        key_list = ['Allele', 'IC50', 'Sequence']
+                        wt_epitopes.append({key: epitope_data[key] for key in key_list})
+
+
+                # hack: make a nicely-formatted fixed width table for epitopes, used in ASCII report
+                with tempfile.TemporaryFile(mode='r+') as temp:
+                    asc.write(epitopes, temp, format='fixed_width_two_line', delimiter_pad=' ')
+                    temp.seek(0)
+                    ascii_epitopes = temp.read()
+
+                ascii_wt_epitopes = None
+                if len(wt_epitopes) > 0:
+                    with tempfile.TemporaryFile(mode='r+') as temp:
+                        asc.write(
+                            wt_epitopes, temp, format='fixed_width_two_line', delimiter_pad=' ')
+                        temp.seek(0)
+                        ascii_wt_epitopes = temp.read()
                 peptide_dict = {
                     'header_display_data': header_display_data,
                     'peptide_data': peptide_data,
                     'manufacturability_data': manufacturability_data,
                     'epitopes': epitopes,
+                    'ascii_epitopes': ascii_epitopes,
+                    'wt_epitopes': wt_epitopes,
+                    'ascii_wt_epitopes': ascii_wt_epitopes,
                 }
                 peptides.append(peptide_dict)
 

diff --git a/vaxrank/templates/stylesheet.css b/vaxrank/templates/stylesheet.css
@@ -44,10 +44,6 @@ col.patient-info-column-one {
 	width: 40%;
 }
 
-col.patient-info-column-two {
-
-}
-
 /* Command-line args */
 table.args {
 	width: 90%;
@@ -57,10 +53,6 @@ col.args-column-one {
 	width: 40%;
 }
 
-col.args-column-two {
-
-}
-
 /* Variants */
 ol.main {
 	padding-left: 1.2em;
@@ -113,13 +105,28 @@ td.variant-head {
 }
 
 /* Peptides */
+h4.peptides {
+	page-break-before: always;
+}
+
+div.wt-epitopes {
+	page-break-after: always;
+	page-break-inside: avoid;
+	margin: 2em 0 2em 2em;
+}
+
+table.wt-epitopes {
+	font-size: 80%;
+}
+
 ol.peptides {
 	padding-left: 0;
 	list-style-position: inside;
 }
 
 li.peptide {
 	page-break-inside: avoid;
+	page-break-before: always;
 }
 
 table.peptide {
@@ -161,6 +168,36 @@ col.peptide-data-column-two {
 	width: 30%;
 }
 
+col.epitope-data-column-one {
+	width: 16%;
+}
+
+col.epitope-data-column-two {
+	width: 16%;
+}
+
+col.epitope-data-column-three {
+	width: 16%;
+}
+
+col.epitope-data-column-four {
+	width: 16%;
+}
+
+col.epitope-data-column-five {
+	width: 16%;
+}
+
+col.epitope-data-column-six {
+	width: 16%;
+}
+
+table.epitope-inner {
+	font-size: 80%;
+	width: 100%;
+	border: none;
+}
+
 /* Signatures */
 table.signature {
 	width: 70%;