Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions oncotator/datasources/EnsemblTranscriptDatasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def _get_best_scores(txs, scoring_function, comparator):
return [k for (k, v) in scores.iteritems() if v == best]

@staticmethod
def _sort_by_multiple_criteria(txs, scoring_functions):
def _select_best_with_multiple_criteria(txs, scoring_functions):
"""Sort using multiple scoring functions
:param txs: transcripts to sort
:param scoring_functions: a tuple of the form ( tx -> B, [B] -> B)
Expand Down Expand Up @@ -324,6 +324,12 @@ def _choose_best_effect_transcript(self, txs, variant_type, ref_allele, alt_alle
Ties are broken by which transcript has the longer coding length.
Additional ties are broken with appris rank

1. most detrimental effect
2. curation level
3. appris rank
4. longest protein change
5. lexicographical sort on transcript ID

:param list txs: list of Transcript
:param str variant_type:
:param str ref_allele:
Expand All @@ -334,10 +340,13 @@ def _choose_best_effect_transcript(self, txs, variant_type, ref_allele, alt_alle
"""
if len(txs) == 0:
return None
best_effect_txs = EnsemblTranscriptDatasource._sort_by_multiple_criteria(txs,
best_effect_txs = EnsemblTranscriptDatasource._select_best_with_multiple_criteria(txs,
[(lambda x: self._calculate_effect_score(x, start, end, alt_allele, ref_allele, variant_type), min),
(self._calculate_canonical_score, max),
(self._get_appris_rank, min),
(lambda x: len(x.get_seq()), max),
(self._get_appris_rank, min)])
(lambda x: x.get_transcript_id(), min)]
)
return best_effect_txs[0]

def _choose_canonical_transcript(self, txs, variant_type, ref_allele, alt_allele, start, end):
Expand All @@ -348,6 +357,8 @@ def _choose_canonical_transcript(self, txs, variant_type, ref_allele, alt_allele
2. appris rank
3. most detrimental effect
4. longest protein change
5. lexicographical on transcript ID

:param list txs: list of Transcript
:param str variant_type:
:param str ref_allele:
Expand All @@ -358,11 +369,12 @@ def _choose_canonical_transcript(self, txs, variant_type, ref_allele, alt_allele
"""
if len(txs) == 0:
return None
highest_scoring_tx = EnsemblTranscriptDatasource._sort_by_multiple_criteria(txs, [
highest_scoring_tx = EnsemblTranscriptDatasource._select_best_with_multiple_criteria(txs, [
(self._calculate_canonical_score, max),
(self._get_appris_rank, min),
(lambda x: self._calculate_effect_score(x, start, end, alt_allele, ref_allele, variant_type), min),
(lambda x: len(x.get_seq()), max)])
(lambda x: len(x.get_seq()), max),
(lambda x: x.get_transcript_id(), min)])
return highest_scoring_tx[0]

def get_overlapping_transcripts(self, chr, start, end, padding=0):
Expand Down