In [7]:
from swissisoform.genome import GenomeHandler
from swissisoform.visualize import GenomeVisualizer
from swissisoform.alternative_isoforms import AlternativeIsoform
from swissisoform.mutations import MutationHandler, analyze_mutations
import os

In [8]:
genome = GenomeHandler(
    "../data/genome_data/hg38.fa", "../data/genome_data/hg38.ncbiRefSeq.gtf"
)

alt_isoforms = AlternativeIsoform()
alt_isoforms.load_bed(
    "../data/ribosome_profiling/RiboTISHV6_MD2025_AnnoToTruncation_exonintersect.bed"
)

mutation_handler = MutationHandler()

In [9]:
gene_name = "GARS1"
print(f"Processing gene: {gene_name}")

os.makedirs(f"./{gene_name}", exist_ok=True)

# Get alternative isoform features
print("Getting alternative features...")
alt_features = alt_isoforms.get_visualization_features(gene_name)

if alt_features.empty:
    print("No alternative features found")
else:
    print(f"Found {len(alt_features)} alternative features")
    print("\nAlternative Features:")
    print(alt_features)

Processing gene: GARS1
Getting alternative features...
Found 1 alternative features

Alternative Features:
    chromosome      source       feature_type     start       end  score  \
202       chr7  truncation  alternative_start  30594922  30595011      0   

    strand frame             gene_id           transcript_id gene_name  \
202      +     .  ENSG00000106105.13  ENSG00000106105.13_alt     GARS1   

    start_codon  
202         CUG  


In [10]:
print("Getting transcript information...")
transcript_info = genome.get_transcript_ids(gene_name)

if transcript_info.empty:
    print("No transcript info found")
else:
    print(f"Found {len(transcript_info)} transcripts")
    print("\nTranscript Information:")
    print(transcript_info)

Getting transcript information...
Found 2 transcripts

Transcript Information:
          transcript_id chromosome     start       end strand
3059024     NM_002047.4       chr7  30594878  30634033      +
3059063  NM_001316772.1       chr7  30594735  30634033      +


In [11]:
mutations_unfiltered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
)

impact_types = {
    "clinvar": ["missense variant", "nonsense variant", "frameshift variant"],
}

mutations_filtered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
    impact_types=impact_types,
)

Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...


  df[col] = pd.to_numeric(df[col], errors='ignore')



Analyzing mutations in alternative features:
  ├─ Feature 203 (30594922-30595011): 25 mutations

Total mutations in all features: 25
Found 25 mutations in truncation regions

Mutation Analysis:
Impact types: {'missense variant': 17, 'synonymous variant': 7, 'frameshift variant': 1}
Clinical significance: {'Uncertain significance': 16, 'Likely benign': 8, 'Conflicting classifications of pathogenicity': 1}
Truncation regions: ['30594922-30595011']
Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...

Analyzing mutations in alternative features:
  ├─ Feature 203 (30594922-30595011): 25 mutations

Total mutations in all features: 25
Filtering for impact types by source:
  - clinvar: missense variant, nonsense variant, frameshift variant
Found 18 mutations in truncation regions

Mutation Analysis:
Impact types: {'missense variant': 17, 'frameshift variant': 1}
Clinical significance: {'Uncertain significance': 16, 'Likely benign': 1, 'Conflicting classific

In [12]:
visualizer = GenomeVisualizer(genome)

# For each transcript, create visualizations
for _, transcript in transcript_info.iterrows():
    transcript_id = transcript["transcript_id"]
    print(f"\nVisualizing transcript: {transcript_id}")

    # Define the directory where plots will be saved
    output_dir = f"./{gene_name}/"  # Assuming you already created this directory

    # Create visualization with unfiltered mutations
    if mutations_unfiltered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_unfiltered,
            output_file=f"{output_dir}{transcript_id}_unfiltered.png",
        )

    # Create visualization with filtered mutations
    if mutations_filtered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered.png",
        )

        # Create zoomed view for filtered mutations
        visualizer.visualize_transcript_zoomed(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered_zoom.png",
            padding=100,
        )


Visualizing transcript: NM_002047.4

Visualizing transcript: NM_001316772.1
