Skip to content

Commit

Permalink
Merge 7114c6e into ea31881
Browse files Browse the repository at this point in the history
  • Loading branch information
jburos committed Aug 18, 2017
2 parents ea31881 + 7114c6e commit a7b575a
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 47 deletions.
58 changes: 33 additions & 25 deletions cohorts/cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from .variant_filters import no_filter
from .styling import set_styling
from . import variant_filters
from .errors import BamFileNotFound, TumorBamFileNotFound, RNABamFileNotFound

logger = get_logger(__name__, level=logging.INFO)

Expand Down Expand Up @@ -609,7 +610,8 @@ def _load_single_patient_variants(self, patient, filter_fn, use_cache=True, **kw

# Note None here is different from 0. We want to preserve None
if merged_variants is None:
logger.info("Variants did not exist for patient %s" % patient.id)
# note as debug, since also reported by _load_single_patient_merged_variants
logger.debug("_load_single_patient_variants: Variants did not exist for patient %s" % patient.id)
return None

logger.debug("... applying filters to variants for: {}".format(patient.id))
Expand Down Expand Up @@ -681,7 +683,7 @@ def _load_single_patient_merged_variants(self, patient, use_cache=True):
# variants. 0 variants will lead to 0 neoantigens, for example, but 0 variant
# collections will lead to NaN variants and neoantigens.
if no_variants:
print("Variants did not exist for patient %s" % patient.id)
logger.info("Variants did not exist for patient %s" % patient.id)
merged_variants = None

# save merged variants to file
Expand Down Expand Up @@ -837,32 +839,38 @@ def _load_single_patient_effects(self, patient, only_nonsynonymous, all_effects,
else:
cached = self.load_from_cache(self.cache_names["effect"], patient.id, cached_file_name)
if cached is not None:
return filter_effects(effect_collection=cached,
variant_collection=variants,
patient=patient,
filter_fn=filter_fn,
**kwargs)
filtered_effects = filter_effects(effect_collection=cached,
variant_collection=variants,
patient=patient,
filter_fn=filter_fn,
**kwargs)
else:

effects = variants.effects()

effects = variants.effects()
self.save_to_cache(effects, self.cache_names["all_effect"], patient.id, cached_file_name)

self.save_to_cache(effects, self.cache_names["all_effect"], patient.id, cached_file_name)
effects = EffectCollection(list(effects.top_priority_effect_per_variant().values()))
self.save_to_cache(effects, self.cache_names["effect"], patient.id, cached_file_name)

effects = EffectCollection(list(effects.top_priority_effect_per_variant().values()))
self.save_to_cache(effects, self.cache_names["effect"], patient.id, cached_file_name)
# Always take the top priority effect per variant so we end up with a single
# effect per variant.
nonsynonymous_effects = EffectCollection(
list(effects.drop_silent_and_noncoding().top_priority_effect_per_variant().values()))
self.save_to_cache(nonsynonymous_effects, self.cache_names["nonsynonymous_effect"], patient.id, cached_file_name)

filtered_effects = filter_effects(
effect_collection=(
nonsynonymous_effects if only_nonsynonymous else effects),
variant_collection=variants,
patient=patient,
filter_fn=filter_fn,
**kwargs)

# Always take the top priority effect per variant so we end up with a single
# effect per variant.
nonsynonymous_effects = EffectCollection(
list(effects.drop_silent_and_noncoding().top_priority_effect_per_variant().values()))
self.save_to_cache(nonsynonymous_effects, self.cache_names["nonsynonymous_effect"], patient.id, cached_file_name)
if filtered_effects is None:
logger.info("Effects did not exist for patient {}".format(patient.id))

return filter_effects(
effect_collection=(
nonsynonymous_effects if only_nonsynonymous else effects),
variant_collection=variants,
patient=patient,
filter_fn=filter_fn,
**kwargs)
return filtered_effects

def load_kallisto(self):
"""
Expand Down Expand Up @@ -1098,9 +1106,9 @@ def load_single_patient_isovar(self, patient, variants, epitope_lengths):
import logging
logging.disable(logging.INFO)
if patient.tumor_sample is None:
raise ValueError("Patient %s has no tumor sample" % patient.id)
raise TumorBamFileNotFound(patient_id=patient.id)
if patient.tumor_sample.bam_path_rna is None:
raise ValueError("Patient %s has no tumor RNA BAM path" % patient.id)
raise RNABamFileNotFound(patient_id=patient.id)
rna_bam_file = AlignmentFile(patient.tumor_sample.bam_path_rna)

# To ensure that e.g. 8-11mers overlap substitutions, we need at least this
Expand Down
32 changes: 32 additions & 0 deletions cohorts/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

class BamFileNotFound(Exception):

def __init__(self, filepath=None, patient_id=None, filetype=None):
self.filepath = filepath
self.patient_id = patient_id
self.filetype = 'Bamfile' if filetype is None else filetype

def __str__(self):
if self.patient_id is not None:
patient_str = 'Patient {}'.format(self.patient_id)
else:
patient_str = 'Patient'

if self.filepath is not None:
print_str = 'The {} ({}) for {} was not found.'.format(self.filetype, repr(self.filepath), patient_str)
else:
print_str = '{} has no {}.'.format(patient_str, self.filetype)
return print_str

class RNABamFileNotFound(BamFileNotFound):
def __init__(self, filetype='tumor RNA Bamfile', *args, **kwargs):
super().__init__(filetype=filetype, *args, **kwargs)

class TumorBamFileNotFound(BamFileNotFound):
def __init__(self, filetype='tumor sample bamfile', *args, **kwargs):
super().__init__(filetype=filetype, *args, **kwargs)

class NormalBamFileNotFound(BamFileNotFound):
def __init__(self, filetype='normal sample bamfile', *args, **kwargs):
super().__init__(filetype=filetype, *args, **kwargs)

59 changes: 38 additions & 21 deletions cohorts/varcode_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
# limitations under the License.

from varcode import EffectCollection, Variant
from .errors import BamFileNotFound
import logging

logger = logging.getLogger(__name__)


def genome(variant_collection):
return variant_collection[0].ensembl
Expand Down Expand Up @@ -92,15 +97,19 @@ def filter_variants(variant_collection, patient, filter_fn, **kwargs):
Filtered variant collection, with only the variants passing the filter
"""
if filter_fn:
return variant_collection.clone_with_new_elements([
variant
for variant in variant_collection
if filter_fn(FilterableVariant(
try:
return variant_collection.clone_with_new_elements([
variant
for variant in variant_collection
if filter_fn(FilterableVariant(
variant=variant,
variant_collection=variant_collection,
patient=patient,
), **kwargs)
])
])
except BamFileNotFound as e:
logger.warning(repr(e))
return None
else:
return variant_collection

Expand All @@ -121,27 +130,35 @@ def filter_effects(effect_collection, variant_collection, patient, filter_fn, **
Filtered effect collection, with only the variants passing the filter
"""
if filter_fn:
return EffectCollection([
effect
for effect in effect_collection
if filter_fn(FilterableEffect(
effect=effect,
variant_collection=variant_collection,
patient=patient), **kwargs)])
try:
return EffectCollection([
effect
for effect in effect_collection
if filter_fn(FilterableEffect(
effect=effect,
variant_collection=variant_collection,
patient=patient), **kwargs)])
except BamFileNotFound as e:
logger.warning(repr(e))
return None
else:
return effect_collection

def filter_neoantigens(neoantigens_df, variant_collection, patient, filter_fn):
if filter_fn:
filter_mask = neoantigens_df.apply(
lambda row: filter_fn(
FilterableNeoantigen(neoantigen_row=row,
variant_collection=variant_collection,
patient=patient)),
axis=1,
# reduce ensures that an empty result is a Series vs. a DataFrame
reduce=True)
return neoantigens_df[filter_mask]
try:
filter_mask = neoantigens_df.apply(
lambda row: filter_fn(
FilterableNeoantigen(neoantigen_row=row,
variant_collection=variant_collection,
patient=patient)),
axis=1,
# reduce ensures that an empty result is a Series vs. a DataFrame
reduce=True)
return neoantigens_df[filter_mask]
except BamFileNotFound as e:
logger.warning(repr(e))
return None
else:
return neoantigens_df

Expand Down
6 changes: 5 additions & 1 deletion cohorts/variant_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from .variant_stats import variant_stats_from_variant
from .utils import get_logger
from .errors import RNABamFileNotFound

from varcode import Variant
from varcode.common import memoize
Expand Down Expand Up @@ -79,7 +80,10 @@ def variant_expressed_filter(filterable_variant, **kwargs):
cohort=filterable_variant.patient.cohort,
patient=filterable_variant.patient,
variant_collection=filterable_variant.variant_collection)
return filterable_variant.variant in expressed_variants
if expressed_variants is None:
return None
else:
return filterable_variant.variant in expressed_variants

def effect_expressed_filter(filterable_effect, **kwargs):
return variant_expressed_filter(filterable_effect, **kwargs)
Expand Down

0 comments on commit a7b575a

Please sign in to comment.