# Notebook for demonstrating evidence matching between assayed fusions and categorical fusions

In [1]:
import warnings
from os import environ

warnings.filterwarnings("ignore")

# These are the configurations for the gene normalizer and UTA databases. These should
# be adjusted by the user based on the locations where these databases exist.
environ["GENE_NORM_DB_URL"] = "postgresql://postgres@localhost:5432/gene_normalizer"
environ["UTA_DB_URL"] = "postgresql://uta_admin:uta@localhost:5432/uta/uta_20240523b"

In [2]:
from civicpy import civic

from fusor.fusor import FUSOR
from fusor.translator import Translator

fusor = FUSOR()
translator = Translator(fusor=fusor)

In [3]:
# Generate KIF5B::RET AssayedFusion from StarFusion file
from pathlib import Path

from cool_seq_tool.schemas import Assembly, CoordinateType

from fusor.harvester import StarFusionHarvester

path = Path("../../tests/fixtures/star-fusion.fusion_predictions.abridged.tsv")
harvester = StarFusionHarvester()
fusions_list = harvester.load_records(path)
assayed_fusion_star_fusion = await translator.from_star_fusion(
        fusions_list[0],
        CoordinateType.RESIDUE.value,
        Assembly.GRCH38.value
    )

In [4]:
# Load in accepted fusion variants
variants = civic.get_all_fusion_variants(include_status="accepted")

In [5]:
for fusion in variants:
    if "KIF5B" in fusion.vicc_compliant_name or "RET" in fusion.vicc_compliant_name:
        print(fusion.vicc_compliant_name)

KIF5B(entrez:3799)::RET(entrez:5979)
CCDC6(entrez:8030)::RET(entrez:5979)
v::RET(entrez:5979)
KIF5B(entrez:3799)::EGFR(entrez:1956)


The output above lists all possible categorical fusions with KIF5B or RET as a partner. 
We expect matches for the first fusion as its partners are both KIF5B and RET (and have
equivalent breakpoint locations), and the third fusion, as its 5' partner is a multiple 
possible genes element and its 3' partner is RET.

In [6]:
# Generate list of matches, report match score
from fusor.fusion_matching import FusionMatcher
from fusor.harvester import CIVICHarvester
from fusor.models import FusionSet

# Save categorical fusions cache and create FusionSet
harvester = CIVICHarvester()
harvester.fusions_list = variants
fusions_list = harvester.load_records()

civic_fusions = []
for fusion in fusions_list:
    if "?" in fusion.vicc_compliant_name:
        continue
    cex = await translator.from_civic(civic=fusion)
    civic_fusions.append(cex)
cache_dir = Path("../../src/fusor/data")
fs = FusionSet(assayedFusions=[assayed_fusion_star_fusion], categoricalFusions=civic_fusions)

# Initialize FusionMatcher and define sources to match against
fm = FusionMatcher(cache_dir=cache_dir,fusion_set=fs)

# Generate list of matching fusions
matches = await fm.match_fusion()
for match in matches:
    print(match[0].model_dump(exclude_none=True))
    print(f"Match Score: {match[1]}")



{'type': <FUSORTypes.CATEGORICAL_FUSION: 'CategoricalFusion'>, 'structure': [{'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>, 'transcript': 'refseq:NM_004521.3', 'exonEnd': 24, 'exonEndOffset': 0, 'gene': {'conceptType': 'Gene', 'name': 'KIF5B', 'primaryCoding': {'id': 'hgnc:6324', 'system': 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/', 'code': 'HGNC:6324'}}, 'elementGenomicEnd': {'id': 'ga4gh:SL.nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U', 'type': 'SequenceLocation', 'digest': 'nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U', 'sequenceReference': {'id': 'refseq:NC_000010.11', 'type': 'SequenceReference', 'refgetAccession': 'SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB'}, 'start': 32017142}}, {'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>, 'transcript': 'refseq:NM_020975.6', 'exonStart': 11, 'exonStartOffset': 0, 'gene': {'conceptType': 'Gene', 'name': 'RET', 'primaryCoding': {'id': 'hgnc:9967', 'system': 'https://www.genenames.org/data/gene

In [7]:
matches[0][0].model_dump(exclude_none=True)

{'type': <FUSORTypes.CATEGORICAL_FUSION: 'CategoricalFusion'>,
 'structure': [{'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,
   'transcript': 'refseq:NM_004521.3',
   'exonEnd': 24,
   'exonEndOffset': 0,
   'gene': {'conceptType': 'Gene',
    'name': 'KIF5B',
    'primaryCoding': {'id': 'hgnc:6324',
     'system': 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/',
     'code': 'HGNC:6324'}},
   'elementGenomicEnd': {'id': 'ga4gh:SL.nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U',
    'type': 'SequenceLocation',
    'digest': 'nk8wv9yKzCFQ0n7Ph2JnJhOkf2Fzfh_U',
    'sequenceReference': {'id': 'refseq:NC_000010.11',
     'type': 'SequenceReference',
     'refgetAccession': 'SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB'},
    'start': 32017142}},
  {'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,
   'transcript': 'refseq:NM_020975.6',
   'exonStart': 11,
   'exonStartOffset': 0,
   'gene': {'conceptType': 'Gene',
    'name': 'RET',
    '

In [8]:
matches[1][0].model_dump(exclude_none=True)

{'type': <FUSORTypes.CATEGORICAL_FUSION: 'CategoricalFusion'>,
 'structure': [{'type': <FUSORTypes.MULTIPLE_POSSIBLE_GENES_ELEMENT: 'MultiplePossibleGenesElement'>},
  {'type': <FUSORTypes.GENE_ELEMENT: 'GeneElement'>,
   'gene': {'conceptType': 'Gene',
    'name': 'RET',
    'primaryCoding': {'id': 'hgnc:9967',
     'system': 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/',
     'code': 'HGNC:9967'}}}],
 'viccNomenclature': 'v::RET(hgnc:9967)',
 'civicMolecularProfiles': [<CIViC molecular_profile 1595>,
  <CIViC molecular_profile 4424>]}

Both matching categorical fusions are selected and are sorted in descending order 
by the degree of match.

In [9]:
# View evidence item linked to matched categorical fusion
matches[0][0].civicMolecularProfiles[0].evidence_items[0].__dict__

{'_assertions': [],
 '_therapies': [<CIViC therapy 117>],
 '_phenotypes': [],
 '_incomplete': {'therapies'},
 '_partial': False,
 'type': 'evidence',
 'id': 698,
 'variant_origin': 'SOMATIC',
 'therapy_interaction_type': None,
 'therapy_ids': [117],
 'status': 'accepted',
 'source_id': 378,
 'significance': 'SENSITIVITYRESPONSE',
 'rating': 2,
 'phenotype_ids': [],
 'name': 'EID698',
 'molecular_profile_id': 269,
 'evidence_type': 'PREDICTIVE',
 'evidence_level': 'C',
 'evidence_direction': 'SUPPORTS',
 'disease_id': 30,
 'description': 'A case study of a patient with EGFR, KRAS, BRAF, HER2, ALK, ROS1 and MET negative adenocarcinoma of the lung. FISH analysis revealed a KIF5B-RET fusion. The RET inhibitor Vandetanib led to remission in the patient.',
 'assertion_ids': [],
 '_include_status': ['accepted', 'submitted', 'rejected']}

In [10]:
# View evidence item linked to matched categorical fusion
matches[1][0].civicMolecularProfiles[0].evidence_items[0].__dict__

{'_assertions': [<CIViC assertion 78>],
 '_therapies': [<CIViC therapy 601>],
 '_phenotypes': [],
 '_incomplete': {'therapies'},
 '_partial': False,
 'type': 'evidence',
 'id': 8852,
 'variant_origin': 'SOMATIC',
 'therapy_interaction_type': None,
 'therapy_ids': [601],
 'status': 'accepted',
 'source_id': 3693,
 'significance': 'SENSITIVITYRESPONSE',
 'rating': 4,
 'phenotype_ids': [],
 'name': 'EID8852',
 'molecular_profile_id': 1595,
 'evidence_type': 'PREDICTIVE',
 'evidence_level': 'A',
 'evidence_direction': 'SUPPORTS',
 'disease_id': 16,
 'description': 'In this phase 1/2 trial (NCT03157128), patients with RET altered thyroid cancers were enrolled to receive the highly selective RET inhibitor selpercatinib. Among 19 patients with previously treated RET fusion positive thyroid cancer, the percentage who had a response was 79% (95% CI, 54 to 94), and 1-year progression-free survival was 64% (95% CI, 37 to 82). Responses included 8 of 11 (73%) papillary thyroid cancers, 2 of 3 poor