In [1]:
from swissisoform.genome import GenomeHandler
from swissisoform.visualize import GenomeVisualizer
from swissisoform.isoform import AlternativeIsoform
from swissisoform.mutations import MutationHandler
from swissisoform.utils import analyze_mutations
from typing import Optional, List
import pandas as pd
import os

In [2]:
genome = GenomeHandler(
    "../data/genome_data/hg38.fa", "../data/genome_data/hg38.ncbiRefSeq.gtf"
)

alt_isoforms = AlternativeIsoform()
alt_isoforms.load_bed(
    "../data/ribosome_profiling/RiboTISHV6_MD2025_AnnoToTruncation_exonintersect.bed"
)

mutation_handler = MutationHandler()

In [None]:
gene_name = "NTHL1"
print(f"Processing gene: {gene_name}")

os.makedirs(f"./{gene_name}", exist_ok=True)

# Get alternative isoform features
print("Getting alternative features...")
alt_features = alt_isoforms.get_visualization_features(gene_name)

if alt_features.empty:
    print("No alternative features found")
else:
    print(f"Found {len(alt_features)} alternative features")
    print("\nAlternative Features:")
    print(alt_features)

Processing gene: NTHL1
Getting alternative features...
Found 2 alternative features

Alternative Features:
    chromosome      source       feature_type    start      end  score strand  \
707      chr16  truncation  alternative_start  2047709  2047845      0      -   
708      chr16  truncation  alternative_start  2046202  2046366      0      -   

    frame            gene_id          transcript_id gene_name start_codon  
707     .  ENSG00000065057.7  ENSG00000065057.7_alt     NTHL1         AUG  
708     .  ENSG00000065057.7  ENSG00000065057.7_alt     NTHL1         AUG  


In [4]:
print("Getting transcript information...")
transcript_info = genome.get_transcript_ids(gene_name)

if transcript_info.empty:
    print("No transcript info found")
else:
    print(f"Found {len(transcript_info)} transcripts")
    print("\nTranscript Information:")
    print(transcript_info)

Getting transcript information...
Found 4 transcripts

Transcript Information:
          transcript_id chromosome    start      end strand
1360102  XM_047434171.1      chr16  2039820  2047834      -
1360119     NM_002528.7      chr16  2039820  2047834      -
1360136  NM_001318193.2      chr16  2039820  2047834      -
1360151  NM_001318194.2      chr16  2039820  2047834      -


In [5]:
mutations_unfiltered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
)

impact_types = {
    "clinvar": ["missense variant", "nonsense variant", "frameshift variant"],
}

mutations_filtered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
    impact_types=impact_types,
)

Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...


  df[col] = pd.to_numeric(df[col], errors='ignore')



Analyzing mutations in alternative features:
  ├─ Feature 708 (2047709-2047845): 59 mutations
  ├─ Feature 709 (2046202-2046366): 64 mutations

Total mutations in all features: 123
Found 123 mutations in truncation regions

Mutation Analysis:
Impact types: {'missense variant': 73, 'synonymous variant': 25, 'frameshift variant': 14, '5 prime UTR variant': 7, 'nonsense variant': 3, 'inframe variant': 1}
Clinical significance: {'Uncertain significance': 76, 'Likely benign': 27, 'Pathogenic': 15, 'Likely pathogenic': 3, 'Conflicting classifications of pathogenicity': 1, 'Pathogenic/Likely pathogenic': 1}
Truncation regions: ['2047709-2047845', '2046202-2046366']
Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...

Analyzing mutations in alternative features:
  ├─ Feature 708 (2047709-2047845): 59 mutations
  ├─ Feature 709 (2046202-2046366): 64 mutations

Total mutations in all features: 123
Filtering for impact types by source:
  - clinvar: missense va

In [6]:
visualizer = GenomeVisualizer(genome)

# For each transcript, create visualizations
for _, transcript in transcript_info.iterrows():
    transcript_id = transcript["transcript_id"]
    print(f"\nVisualizing transcript: {transcript_id}")

    # Define the directory where plots will be saved
    output_dir = f"./{gene_name}/"  # Assuming you already created this directory

    # Create visualization with unfiltered mutations
    if mutations_unfiltered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_unfiltered,
            output_file=f"{output_dir}{transcript_id}_unfiltered.png",
        )

    # Create visualization with filtered mutations
    if mutations_filtered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered.png",
        )

        # Create zoomed view for filtered mutations
        visualizer.visualize_transcript_zoomed(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered_zoom.png",
            padding=100,
        )


Visualizing transcript: XM_047434171.1

Visualizing transcript: NM_002528.7

Visualizing transcript: NM_001318193.2

Visualizing transcript: NM_001318194.2
