Skip to content

Commit

Permalink
Merge dfe6f88 into 8f08993
Browse files Browse the repository at this point in the history
  • Loading branch information
timodonnell committed Feb 15, 2017
2 parents 8f08993 + dfe6f88 commit 2840b44
Show file tree
Hide file tree
Showing 20 changed files with 1,936 additions and 6 deletions.
5 changes: 5 additions & 0 deletions mhcflurry/antigen_presentation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Prediction of antigen presention

This submodule contains predictors for naturally presented MHC ligands. These predictors are typically trained on peptides eluted from cell surfaces and identified with mass-spec. The models combine MHC binding affinity with cleavage prediction and the level of expression of transcripts containing the given peptide.

This is a work in progress and not ready for production use.
10 changes: 10 additions & 0 deletions mhcflurry/antigen_presentation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .presentation_model import PresentationModel
from .percent_rank_transform import PercentRankTransform
from . import presentation_component_models, decoy_strategies

__all__ = [
"PresentationModel",
"PercentRankTransform",
"presentation_component_models",
"decoy_strategies",
]
9 changes: 9 additions & 0 deletions mhcflurry/antigen_presentation/decoy_strategies/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .decoy_strategy import DecoyStrategy
from .same_transcripts_as_hits import SameTranscriptsAsHits
from .uniform_random import UniformRandom

__all__ = [
"DecoyStrategy",
"SameTranscriptsAsHits",
"UniformRandom",
]
57 changes: 57 additions & 0 deletions mhcflurry/antigen_presentation/decoy_strategies/decoy_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pandas


class DecoyStrategy(object):
"""
A mechanism for selecting decoys (non-hit peptides) given hits (
peptides detected via mass-spec).
Subclasses should override either decoys() or decoys_for_experiment().
Whichever one is not overriden is implemented using the other.
"""

def __init__(self):
pass

def decoys(self, hits_df):
"""
Given a df of hits with columns 'experiment_name' and 'peptide',
return a df with the same structure giving decoys.
Subclasses should override either this or decoys_for_experiment()
"""

assert 'experiment_name' in hits_df.columns
assert 'peptide' in hits_df.columns
assert len(hits_df) > 0
grouped = hits_df.groupby("experiment_name")
dfs = []
for (experiment_name, sub_df) in grouped:
decoys = self.decoys_for_experiment(
experiment_name,
sub_df.peptide.values)
df = pandas.DataFrame({
'peptide': decoys,
})
df["experiment_name"] = experiment_name
dfs.append(df)
return pandas.concat(dfs, ignore_index=True)

def decoys_for_experiment(self, experiment_name, hit_list):
"""
Return decoys for a single experiment.
Parameters
------------
experiment_name : string
hit_list : list of string
List of hits
"""
# prevent infinite recursion:
assert self.decoys is not DecoyStrategy.decoys

hits_df = pandas.DataFrame({'peptide': hit_list})
hits_df["experiment_name"] = experiment_name
return self.decoys(hits_df)
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import numpy

from .decoy_strategy import DecoyStrategy


class SameTranscriptsAsHits(DecoyStrategy):
"""
Decoy strategy that selects decoys from the same transcripts the
hits come from. The transcript for each hit is taken to be the
transcript containing the hit with the the highest expression for
the given experiment.
Parameters
------------
experiment_to_expression_group : dict of string -> string
Maps experiment names to expression groups.
peptides_and_transcripts: pandas.DataFrame
Must have columns 'peptide' and 'transcript', index unimportant.
peptide_to_expression_group_to_transcript : pandas.DataFrame
Indexed by peptides, columns are expression groups. Values
give transcripts to use.
decoys_per_hit : int
"""
def __init__(
self,
experiment_to_expression_group,
peptides_and_transcripts,
peptide_to_expression_group_to_transcript,
decoys_per_hit=10):
DecoyStrategy.__init__(self)
assert decoys_per_hit > 0
self.experiment_to_expression_group = experiment_to_expression_group
self.peptides_and_transcripts = peptides_and_transcripts
self.peptide_to_expression_group_to_transcript = (
peptide_to_expression_group_to_transcript)
self.decoys_per_hit = decoys_per_hit

def decoys_for_experiment(self, experiment_name, hit_list):
assert len(hit_list) > 0, "No hits for %s" % experiment_name
expression_group = self.experiment_to_expression_group[experiment_name]
transcripts = self.peptide_to_expression_group_to_transcript.ix[
hit_list, expression_group
]
assert len(transcripts) > 0, experiment_name

universe = self.peptides_and_transcripts.ix[
self.peptides_and_transcripts.transcript.isin(transcripts) &
(~ self.peptides_and_transcripts.peptide.isin(hit_list))
].peptide.values
assert len(universe) > 0, experiment_name

return numpy.random.choice(
universe,
replace=True,
size=self.decoys_per_hit * len(hit_list))
21 changes: 21 additions & 0 deletions mhcflurry/antigen_presentation/decoy_strategies/uniform_random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy

from .decoy_strategy import DecoyStrategy


class UniformRandom(DecoyStrategy):
"""
Decoy strategy that selects decoys randomly from a provided universe
of peptides.
"""
def __init__(self, all_peptides, decoys_per_hit=999):
DecoyStrategy.__init__(self)
self.all_peptides = set(all_peptides)
self.decoys_per_hit = decoys_per_hit

def decoys_for_experiment(self, experiment_name, hit_list):
decoy_pool = self.all_peptides.difference(set(hit_list))
return numpy.random.choice(
list(decoy_pool),
replace=True,
size=self.decoys_per_hit * len(hit_list))
39 changes: 39 additions & 0 deletions mhcflurry/antigen_presentation/percent_rank_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy


class PercentRankTransform(object):
"""
Transform arbitrary values into percent ranks.
"""

def __init__(self, n_bins=1e5):
self.n_bins = int(n_bins)
self.cdf = None
self.bin_edges = None

def fit(self, values):
"""
Fit the transform using the given values, which are used to
establish percentiles.
"""
assert self.cdf is None
assert self.bin_edges is None
assert len(values) > 0
(hist, self.bin_edges) = numpy.histogram(values, bins=self.n_bins)
self.cdf = numpy.ones(len(hist) + 3) * numpy.nan
self.cdf[0] = 0.0
self.cdf[1] = 0.0
self.cdf[-1] = 100.0
numpy.cumsum(hist * 100.0 / numpy.sum(hist), out=self.cdf[2:-1])
assert not numpy.isnan(self.cdf).any()

def transform(self, values):
"""
Return percent ranks (range [0, 100]) for the given values.
"""
assert self.cdf is not None
assert self.bin_edges is not None
indices = numpy.searchsorted(self.bin_edges, values)
result = self.cdf[indices]
assert len(result) == len(values)
return result
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from .presentation_component_model import PresentationComponentModel
from .expression import Expression
from .mhcflurry_released import MHCflurryReleased
from .mhcflurry_trained_on_hits import MHCflurryTrainedOnHits
from .fixed_affinity_predictions import FixedAffinityPredictions
from .fixed_per_peptide_quantity import FixedPerPeptideQuantity
from .fixed_per_peptide_and_transcript_quantity import (
FixedPerPeptideAndTranscriptQuantity)

__all__ = [
"PresentationComponentModel",
"Expression",
"MHCflurryReleased",
"MHCflurryTrainedOnHits",
"FixedAffinityPredictions",
"FixedPerPeptideQuantity",
"FixedPerPeptideAndTranscriptQuantity",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from .presentation_component_model import PresentationComponentModel

from ...common import assert_no_null


class Expression(PresentationComponentModel):
"""
Model input for transcript expression.
Parameters
------------
experiment_to_expression_group : dict of string -> string
Maps experiment names to expression groups.
expression_values : pandas.DataFrame
Columns should be expression groups. Indices should be peptide.
"""

def __init__(
self, experiment_to_expression_group, expression_values, **kwargs):
PresentationComponentModel.__init__(self, **kwargs)
assert all(
group in expression_values.columns
for group in experiment_to_expression_group.values())

assert_no_null(experiment_to_expression_group)

self.experiment_to_expression_group = experiment_to_expression_group
self.expression_values = expression_values

def column_names(self):
return ["expression"]

def requires_fitting(self):
return False

def predict_for_experiment(self, experiment_name, peptides):
expression_group = self.experiment_to_expression_group[experiment_name]
return {
"expression": (
self.expression_values.ix[peptides, expression_group]
.values)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from .presentation_component_model import PresentationComponentModel

from ...common import assert_no_null


class FixedAffinityPredictions(PresentationComponentModel):
"""
Parameters
------------
experiment_to_alleles : dict: string -> string list
Normalized allele names for each experiment.
panel : pandas.Panel
Dimensions should be:
- "value", "percentile_rank" (IC50 and percent rank)
- peptide (string)
- allele (string)
"""

def __init__(
self,
experiment_to_alleles,
panel,
name='precomputed',
**kwargs):
PresentationComponentModel.__init__(self, **kwargs)
self.experiment_to_alleles = experiment_to_alleles
for key in panel.items:
assert_no_null(panel[key])
self.panel = panel
self.name = name

def column_names(self):
return [
"%s_affinity" % self.name,
"%s_percentile_rank" % self.name
]

def requires_fitting(self):
return False

def predict_min_across_alleles(self, alleles, peptides):
return {
("%s_affinity" % self.name): (
self.panel
.value[alleles]
.min(axis=1)
.ix[peptides].values),
("%s_percentile_rank" % self.name): (
self.panel
.percentile_rank[alleles]
.min(axis=1)
.ix[peptides].values)
}

def predict_for_experiment(self, experiment_name, peptides):
alleles = self.experiment_to_alleles[experiment_name]
return self.predict_min_across_alleles(alleles, peptides)
Loading

0 comments on commit 2840b44

Please sign in to comment.