-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
1,936 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Prediction of antigen presention | ||
|
||
This submodule contains predictors for naturally presented MHC ligands. These predictors are typically trained on peptides eluted from cell surfaces and identified with mass-spec. The models combine MHC binding affinity with cleavage prediction and the level of expression of transcripts containing the given peptide. | ||
|
||
This is a work in progress and not ready for production use. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from .presentation_model import PresentationModel | ||
from .percent_rank_transform import PercentRankTransform | ||
from . import presentation_component_models, decoy_strategies | ||
|
||
__all__ = [ | ||
"PresentationModel", | ||
"PercentRankTransform", | ||
"presentation_component_models", | ||
"decoy_strategies", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from .decoy_strategy import DecoyStrategy | ||
from .same_transcripts_as_hits import SameTranscriptsAsHits | ||
from .uniform_random import UniformRandom | ||
|
||
__all__ = [ | ||
"DecoyStrategy", | ||
"SameTranscriptsAsHits", | ||
"UniformRandom", | ||
] |
57 changes: 57 additions & 0 deletions
57
mhcflurry/antigen_presentation/decoy_strategies/decoy_strategy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import pandas | ||
|
||
|
||
class DecoyStrategy(object): | ||
""" | ||
A mechanism for selecting decoys (non-hit peptides) given hits ( | ||
peptides detected via mass-spec). | ||
Subclasses should override either decoys() or decoys_for_experiment(). | ||
Whichever one is not overriden is implemented using the other. | ||
""" | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def decoys(self, hits_df): | ||
""" | ||
Given a df of hits with columns 'experiment_name' and 'peptide', | ||
return a df with the same structure giving decoys. | ||
Subclasses should override either this or decoys_for_experiment() | ||
""" | ||
|
||
assert 'experiment_name' in hits_df.columns | ||
assert 'peptide' in hits_df.columns | ||
assert len(hits_df) > 0 | ||
grouped = hits_df.groupby("experiment_name") | ||
dfs = [] | ||
for (experiment_name, sub_df) in grouped: | ||
decoys = self.decoys_for_experiment( | ||
experiment_name, | ||
sub_df.peptide.values) | ||
df = pandas.DataFrame({ | ||
'peptide': decoys, | ||
}) | ||
df["experiment_name"] = experiment_name | ||
dfs.append(df) | ||
return pandas.concat(dfs, ignore_index=True) | ||
|
||
def decoys_for_experiment(self, experiment_name, hit_list): | ||
""" | ||
Return decoys for a single experiment. | ||
Parameters | ||
------------ | ||
experiment_name : string | ||
hit_list : list of string | ||
List of hits | ||
""" | ||
# prevent infinite recursion: | ||
assert self.decoys is not DecoyStrategy.decoys | ||
|
||
hits_df = pandas.DataFrame({'peptide': hit_list}) | ||
hits_df["experiment_name"] = experiment_name | ||
return self.decoys(hits_df) |
58 changes: 58 additions & 0 deletions
58
mhcflurry/antigen_presentation/decoy_strategies/same_transcripts_as_hits.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import numpy | ||
|
||
from .decoy_strategy import DecoyStrategy | ||
|
||
|
||
class SameTranscriptsAsHits(DecoyStrategy): | ||
""" | ||
Decoy strategy that selects decoys from the same transcripts the | ||
hits come from. The transcript for each hit is taken to be the | ||
transcript containing the hit with the the highest expression for | ||
the given experiment. | ||
Parameters | ||
------------ | ||
experiment_to_expression_group : dict of string -> string | ||
Maps experiment names to expression groups. | ||
peptides_and_transcripts: pandas.DataFrame | ||
Must have columns 'peptide' and 'transcript', index unimportant. | ||
peptide_to_expression_group_to_transcript : pandas.DataFrame | ||
Indexed by peptides, columns are expression groups. Values | ||
give transcripts to use. | ||
decoys_per_hit : int | ||
""" | ||
def __init__( | ||
self, | ||
experiment_to_expression_group, | ||
peptides_and_transcripts, | ||
peptide_to_expression_group_to_transcript, | ||
decoys_per_hit=10): | ||
DecoyStrategy.__init__(self) | ||
assert decoys_per_hit > 0 | ||
self.experiment_to_expression_group = experiment_to_expression_group | ||
self.peptides_and_transcripts = peptides_and_transcripts | ||
self.peptide_to_expression_group_to_transcript = ( | ||
peptide_to_expression_group_to_transcript) | ||
self.decoys_per_hit = decoys_per_hit | ||
|
||
def decoys_for_experiment(self, experiment_name, hit_list): | ||
assert len(hit_list) > 0, "No hits for %s" % experiment_name | ||
expression_group = self.experiment_to_expression_group[experiment_name] | ||
transcripts = self.peptide_to_expression_group_to_transcript.ix[ | ||
hit_list, expression_group | ||
] | ||
assert len(transcripts) > 0, experiment_name | ||
|
||
universe = self.peptides_and_transcripts.ix[ | ||
self.peptides_and_transcripts.transcript.isin(transcripts) & | ||
(~ self.peptides_and_transcripts.peptide.isin(hit_list)) | ||
].peptide.values | ||
assert len(universe) > 0, experiment_name | ||
|
||
return numpy.random.choice( | ||
universe, | ||
replace=True, | ||
size=self.decoys_per_hit * len(hit_list)) |
21 changes: 21 additions & 0 deletions
21
mhcflurry/antigen_presentation/decoy_strategies/uniform_random.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import numpy | ||
|
||
from .decoy_strategy import DecoyStrategy | ||
|
||
|
||
class UniformRandom(DecoyStrategy): | ||
""" | ||
Decoy strategy that selects decoys randomly from a provided universe | ||
of peptides. | ||
""" | ||
def __init__(self, all_peptides, decoys_per_hit=999): | ||
DecoyStrategy.__init__(self) | ||
self.all_peptides = set(all_peptides) | ||
self.decoys_per_hit = decoys_per_hit | ||
|
||
def decoys_for_experiment(self, experiment_name, hit_list): | ||
decoy_pool = self.all_peptides.difference(set(hit_list)) | ||
return numpy.random.choice( | ||
list(decoy_pool), | ||
replace=True, | ||
size=self.decoys_per_hit * len(hit_list)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import numpy | ||
|
||
|
||
class PercentRankTransform(object): | ||
""" | ||
Transform arbitrary values into percent ranks. | ||
""" | ||
|
||
def __init__(self, n_bins=1e5): | ||
self.n_bins = int(n_bins) | ||
self.cdf = None | ||
self.bin_edges = None | ||
|
||
def fit(self, values): | ||
""" | ||
Fit the transform using the given values, which are used to | ||
establish percentiles. | ||
""" | ||
assert self.cdf is None | ||
assert self.bin_edges is None | ||
assert len(values) > 0 | ||
(hist, self.bin_edges) = numpy.histogram(values, bins=self.n_bins) | ||
self.cdf = numpy.ones(len(hist) + 3) * numpy.nan | ||
self.cdf[0] = 0.0 | ||
self.cdf[1] = 0.0 | ||
self.cdf[-1] = 100.0 | ||
numpy.cumsum(hist * 100.0 / numpy.sum(hist), out=self.cdf[2:-1]) | ||
assert not numpy.isnan(self.cdf).any() | ||
|
||
def transform(self, values): | ||
""" | ||
Return percent ranks (range [0, 100]) for the given values. | ||
""" | ||
assert self.cdf is not None | ||
assert self.bin_edges is not None | ||
indices = numpy.searchsorted(self.bin_edges, values) | ||
result = self.cdf[indices] | ||
assert len(result) == len(values) | ||
return result |
18 changes: 18 additions & 0 deletions
18
mhcflurry/antigen_presentation/presentation_component_models/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from .presentation_component_model import PresentationComponentModel | ||
from .expression import Expression | ||
from .mhcflurry_released import MHCflurryReleased | ||
from .mhcflurry_trained_on_hits import MHCflurryTrainedOnHits | ||
from .fixed_affinity_predictions import FixedAffinityPredictions | ||
from .fixed_per_peptide_quantity import FixedPerPeptideQuantity | ||
from .fixed_per_peptide_and_transcript_quantity import ( | ||
FixedPerPeptideAndTranscriptQuantity) | ||
|
||
__all__ = [ | ||
"PresentationComponentModel", | ||
"Expression", | ||
"MHCflurryReleased", | ||
"MHCflurryTrainedOnHits", | ||
"FixedAffinityPredictions", | ||
"FixedPerPeptideQuantity", | ||
"FixedPerPeptideAndTranscriptQuantity", | ||
] |
45 changes: 45 additions & 0 deletions
45
mhcflurry/antigen_presentation/presentation_component_models/expression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from .presentation_component_model import PresentationComponentModel | ||
|
||
from ...common import assert_no_null | ||
|
||
|
||
class Expression(PresentationComponentModel): | ||
""" | ||
Model input for transcript expression. | ||
Parameters | ||
------------ | ||
experiment_to_expression_group : dict of string -> string | ||
Maps experiment names to expression groups. | ||
expression_values : pandas.DataFrame | ||
Columns should be expression groups. Indices should be peptide. | ||
""" | ||
|
||
def __init__( | ||
self, experiment_to_expression_group, expression_values, **kwargs): | ||
PresentationComponentModel.__init__(self, **kwargs) | ||
assert all( | ||
group in expression_values.columns | ||
for group in experiment_to_expression_group.values()) | ||
|
||
assert_no_null(experiment_to_expression_group) | ||
|
||
self.experiment_to_expression_group = experiment_to_expression_group | ||
self.expression_values = expression_values | ||
|
||
def column_names(self): | ||
return ["expression"] | ||
|
||
def requires_fitting(self): | ||
return False | ||
|
||
def predict_for_experiment(self, experiment_name, peptides): | ||
expression_group = self.experiment_to_expression_group[experiment_name] | ||
return { | ||
"expression": ( | ||
self.expression_values.ix[peptides, expression_group] | ||
.values) | ||
} |
59 changes: 59 additions & 0 deletions
59
mhcflurry/antigen_presentation/presentation_component_models/fixed_affinity_predictions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from .presentation_component_model import PresentationComponentModel | ||
|
||
from ...common import assert_no_null | ||
|
||
|
||
class FixedAffinityPredictions(PresentationComponentModel): | ||
""" | ||
Parameters | ||
------------ | ||
experiment_to_alleles : dict: string -> string list | ||
Normalized allele names for each experiment. | ||
panel : pandas.Panel | ||
Dimensions should be: | ||
- "value", "percentile_rank" (IC50 and percent rank) | ||
- peptide (string) | ||
- allele (string) | ||
""" | ||
|
||
def __init__( | ||
self, | ||
experiment_to_alleles, | ||
panel, | ||
name='precomputed', | ||
**kwargs): | ||
PresentationComponentModel.__init__(self, **kwargs) | ||
self.experiment_to_alleles = experiment_to_alleles | ||
for key in panel.items: | ||
assert_no_null(panel[key]) | ||
self.panel = panel | ||
self.name = name | ||
|
||
def column_names(self): | ||
return [ | ||
"%s_affinity" % self.name, | ||
"%s_percentile_rank" % self.name | ||
] | ||
|
||
def requires_fitting(self): | ||
return False | ||
|
||
def predict_min_across_alleles(self, alleles, peptides): | ||
return { | ||
("%s_affinity" % self.name): ( | ||
self.panel | ||
.value[alleles] | ||
.min(axis=1) | ||
.ix[peptides].values), | ||
("%s_percentile_rank" % self.name): ( | ||
self.panel | ||
.percentile_rank[alleles] | ||
.min(axis=1) | ||
.ix[peptides].values) | ||
} | ||
|
||
def predict_for_experiment(self, experiment_name, peptides): | ||
alleles = self.experiment_to_alleles[experiment_name] | ||
return self.predict_min_across_alleles(alleles, peptides) |
Oops, something went wrong.