# Notebook for demonstrating evidence matching between assayed fusions and categorical fusions

In [1]:
import warnings
from os import environ

warnings.filterwarnings("ignore")

environ["GENE_NORM_DB_URL"] = "postgresql://postgres@localhost:5432/gene_normalizer"
environ["UTA_DB_URL"] = "postgresql://uta_admin:uta@localhost:5432/uta/uta_20240523b"

In [2]:
from civicpy import civic

from fusor.fusor import FUSOR
from fusor.translator import Translator

fusor = FUSOR()
translator = Translator(fusor=fusor)

In [3]:
# Generate KIF5B::RET AssayedFusion from StarFusion file
from pathlib import Path

from cool_seq_tool.schemas import Assembly, CoordinateType

from fusor.harvester import StarFusionHarvester

path = Path("../../tests/fixtures/star-fusion.fusion_predictions.abridged.tsv")
harvester = StarFusionHarvester()
fusions_list = harvester.load_records(path)
assayed_fusion_star_fusion = await translator.from_star_fusion(
        fusions_list[0],
        CoordinateType.RESIDUE.value,
        Assembly.GRCH38.value
    )

In [4]:
# Load in accepted fusion variants
variants = civic.get_all_fusion_variants(include_status="accepted")

In [5]:
for fusion in variants:
    if "KIF5B" in fusion.vicc_compliant_name or "RET" in fusion.vicc_compliant_name:
        print(fusion.vicc_compliant_name)

KIF5B(entrez:3799)::RET(entrez:5979)
CCDC6(entrez:8030)::RET(entrez:5979)
v::RET(entrez:5979)
KIF5B(entrez:3799)::EGFR(entrez:1956)


The output above lists all possible categorical fusions with KIF5B or RET as a partner. 
We expect matches for the first fusion as its partners are both KIF5B and RET, and the 
third fusion, as its 5' partner is a multiple possible genes element and its 3' partner
is RET.

In [6]:
# Generate CategoricalFusion objects from CIViC
from fusor.harvester import CIVICHarvester

harvester = CIVICHarvester(fusions_list=variants)
fusions_list = harvester.load_records()

categorical_fusions = []
for fusion in fusions_list:
    if "?" in fusion.vicc_compliant_name:
        continue
    cex = await translator.from_civic(civic=fusion)
    categorical_fusions.append(cex)



In [7]:
# Define evidence matching method
from fusor.models import (
    AssayedFusion,
    CategoricalFusion,
    GeneElement,
    MultiplePossibleGenesElement,
    TranscriptSegmentElement,
    UnknownGeneElement,
)


def compare_structure(assayed_element: TranscriptSegmentElement | UnknownGeneElement | GeneElement,
                      categorical_element: TranscriptSegmentElement | MultiplePossibleGenesElement |
                      GeneElement, is_five_prime_partner: bool) -> tuple[bool | str, int]:
    """Compare transcript segments for an assayed and categorical fusions
    :param assayed_transcript: The assayed fusion transcript or unknown gene element or gene element
    :param categorical_transcript: The categorical fusion transcript or mulitple possible genes element
    :param is_five_prime_partner: If the 5' fusion partner is being compared
    :return A boolean or string indicating if a match is found and a score indiciating the degree of match
    """
    # Set default match score
    match_score = 0

    # If the assayed partner is unknown or the categorical partner is a multiple possible gene element, return match score of 0 as no precise information
    # regarding the compared elements is known
    if isinstance(assayed_element, UnknownGeneElement) or isinstance(categorical_element, MultiplePossibleGenesElement):
        return "NA", 0

    # Compare gene partners first
    if assayed_element.gene == categorical_element.gene:
        match_score += 1
    else:
        return False, 0

    # Then compare transcript partners if transcript data exists
    if isinstance(assayed_element, TranscriptSegmentElement) and isinstance(categorical_element, TranscriptSegmentElement):
        if (
            assayed_element.transcript
            and categorical_element.transcript
            and assayed_element.transcript == categorical_element.transcript
        ):
            match_score += 1
        else:
            return False, 0

        if is_five_prime_partner:
            fields_to_compare = ["exonEnd", "exonEndOffset", "elementGenomicEnd"]
        else:
            fields_to_compare = ["exonStart", "exonStartOffset", "elementGenomicStart"]

        for field in fields_to_compare:
            if getattr(assayed_element, field) == getattr(categorical_element, field):
                match_score += 1
            else:
                return False, 0

    return True, match_score

def compare_fusion(assayed_fusion: AssayedFusion, categorical_fusion: CategoricalFusion) -> bool | tuple[bool,int]:
    """Compare assayed and categorical fusions
    :param assayed_fusion: AssayedFusion object
    :param categorical_fusion: CategoricalFusion object
    :return A boolean or a tuple containing a boolean and match score
    """
    assayed_transcript_segments = assayed_fusion.structure
    categorical_transcript_segments = categorical_fusion.structure
    match_score = 0

    # Check for linker elements first
    if (
        len(assayed_transcript_segments) == len(categorical_transcript_segments) == 3):
        if assayed_transcript_segments[1] == categorical_transcript_segments[1]:
            match_score += 1
            assayed_transcript_segments.pop(1)
            categorical_transcript_segments.pop(1)
        else:
            return False

    # Compare other structural elements
    match_data_5prime = compare_structure(assayed_transcript_segments[0], categorical_transcript_segments[0], True)
    if not match_data_5prime[0]:
        return False
    match_data_3prime = compare_structure(assayed_transcript_segments[1], categorical_transcript_segments[1], False)
    if not match_data_3prime[0]:
        return False
    return True, match_score + match_data_5prime[1] + match_data_3prime[1]

def match_fusion(assayed_fusion: AssayedFusion, categorical_fusions: list[CategoricalFusion]) -> list[tuple[CategoricalFusion, int]]:
    """Return best matching fusion
    :param assayed_fusion: The assayed fusion object
    :param categorical_fusions: A list of categorical fusion objects
    :return A list of tuples containing matching categorical fusion objects and their associated match score
    """
    fusions = []
    for categorical_fusion in categorical_fusions:
        match_information = compare_fusion(assayed_fusion, categorical_fusion)
        if match_information:
            fusions.append((categorical_fusion, match_information[1]))
    return sorted(fusions, key=lambda x: x[1], reverse=True)

In [8]:
# Generate list of matches, report match score
matches = match_fusion(assayed_fusion_star_fusion, categorical_fusions)
matches

[(CategoricalFusion(type=<FUSORTypes.CATEGORICAL_FUSION: 'CategoricalFusion'>, regulatoryElement=None, structure=[TranscriptSegmentElement(type=<FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>, transcript='refseq:NM_004521.3', exonStart=None, exonStartOffset=None, exonEnd=24, exonEndOffset=0, gene=MappableConcept(id=None, extensions=None, conceptType='Gene', name='KIF5B', primaryCoding=Coding(id='hgnc:6324', extensions=None, name=None, system='https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/', systemVersion=None, code=code(root='HGNC:6324'), iris=None), mappings=None), elementGenomicStart=None, elementGenomicEnd=SequenceLocation(id='ga4gh:SL.nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U', type='SequenceLocation', name=None, description=None, aliases=None, extensions=None, digest='nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U', sequenceReference=SequenceReference(id='refseq:NC_000010.11', type='SequenceReference', name=None, description=None, aliases=None, extensions=None, refgetA

Both matching categorical fusions are selected and are sorted in descending order 
by the degree of match.

In [9]:
# View evidence item linked to matched categorical fusion
matches[0][0].civicMolecularProfiles[0].evidence_items[0].__dict__

{'_assertions': [],
 '_therapies': [<CIViC therapy 117>],
 '_phenotypes': [],
 '_incomplete': {'therapies'},
 '_partial': False,
 'type': 'evidence',
 'id': 698,
 'variant_origin': 'SOMATIC',
 'therapy_interaction_type': None,
 'therapy_ids': [117],
 'status': 'accepted',
 'source_id': 378,
 'significance': 'SENSITIVITYRESPONSE',
 'rating': 2,
 'phenotype_ids': [],
 'name': 'EID698',
 'molecular_profile_id': 269,
 'evidence_type': 'PREDICTIVE',
 'evidence_level': 'C',
 'evidence_direction': 'SUPPORTS',
 'disease_id': 30,
 'description': 'A case study of a patient with EGFR, KRAS, BRAF, HER2, ALK, ROS1 and MET negative adenocarcinoma of the lung. FISH analysis revealed a KIF5B-RET fusion. The RET inhibitor Vandetanib led to remission in the patient.',
 'assertion_ids': [],
 '_include_status': ['accepted', 'submitted', 'rejected']}

In [10]:
# View evidence item linked to matched categorical fusion
matches[1][0].civicMolecularProfiles[0].evidence_items[0].__dict__

{'_assertions': [<CIViC assertion 78>],
 '_therapies': [<CIViC therapy 601>],
 '_phenotypes': [],
 '_incomplete': {'therapies'},
 '_partial': False,
 'type': 'evidence',
 'id': 8852,
 'variant_origin': 'SOMATIC',
 'therapy_interaction_type': None,
 'therapy_ids': [601],
 'status': 'accepted',
 'source_id': 3693,
 'significance': 'SENSITIVITYRESPONSE',
 'rating': 4,
 'phenotype_ids': [],
 'name': 'EID8852',
 'molecular_profile_id': 1595,
 'evidence_type': 'PREDICTIVE',
 'evidence_level': 'A',
 'evidence_direction': 'SUPPORTS',
 'disease_id': 16,
 'description': 'In this phase 1/2 trial (NCT03157128), patients with RET altered thyroid cancers were enrolled to receive the highly selective RET inhibitor selpercatinib. Among 19 patients with previously treated RET fusion positive thyroid cancer, the percentage who had a response was 79% (95% CI, 54 to 94), and 1-year progression-free survival was 64% (95% CI, 37 to 82). Responses included 8 of 11 (73%) papillary thyroid cancers, 2 of 3 poor