In [1]:
from swissisoform.genome import GenomeHandler
from swissisoform.visualize import GenomeVisualizer
from swissisoform.alternative_isoforms import AlternativeIsoform
from swissisoform.mutations import MutationHandler, analyze_mutations
import os

In [2]:
genome = GenomeHandler(
    "../data/genome_data/hg38.fa", "../data/genome_data/hg38.ncbiRefSeq.gtf"
)

alt_isoforms = AlternativeIsoform()
alt_isoforms.load_bed(
    "../data/ribosome_profiling/RiboTISHV6_MD2025_AnnoToTruncation_exonintersect.bed"
)

mutation_handler = MutationHandler()

In [3]:
gene_name = "GSR"
print(f"Processing gene: {gene_name}")

os.makedirs(f"./{gene_name}", exist_ok=True)

# Get alternative isoform features
print("Getting alternative features...")
alt_features = alt_isoforms.get_visualization_features(gene_name)

if alt_features.empty:
    print("No alternative features found")
else:
    print(f"Found {len(alt_features)} alternative features")
    print("\nAlternative Features:")
    print(alt_features)

Processing gene: GSR
Getting alternative features...
Found 1 alternative features

Alternative Features:
    chromosome      source       feature_type     start       end  score  \
284       chr8  truncation  alternative_start  30727706  30727833      0   

    strand frame             gene_id           transcript_id gene_name  \
284      -     .  ENSG00000104687.12  ENSG00000104687.12_alt       GSR   

    start_codon  
284         AUG  


In [4]:
print("Getting transcript information...")
transcript_info = genome.get_transcript_ids(gene_name)

if transcript_info.empty:
    print("No transcript info found")
else:
    print(f"Found {len(transcript_info)} transcripts")
    print("\nTranscript Information:")
    print(transcript_info)

Getting transcript information...
Found 6 transcripts

Transcript Information:
          transcript_id chromosome     start       end strand
2855439  XM_047421728.1       chr8  30678066  30708970      -
2855466  XM_047421727.1       chr8  30678066  30727021      -
2855497     NM_000637.5       chr8  30678066  30727846      -
2855528  NM_001195103.3       chr8  30678066  30727846      -
2855557  NM_001195102.3       chr8  30678066  30727846      -
2855586  NM_001195104.3       chr8  30678066  30727846      -


In [5]:
mutations_unfiltered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
)

impact_types = {
    "clinvar": ["missense variant", "nonsense variant", "frameshift variant"],
}

mutations_filtered = await analyze_mutations(
    gene_name=gene_name,
    mutation_handler=mutation_handler,
    alt_features=alt_features,
    sources=["clinvar"],
    impact_types=impact_types,
)

Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...


  df[col] = pd.to_numeric(df[col], errors='ignore')



Analyzing mutations in alternative features:
  ├─ Feature 285 (30727706-30727833): 14 mutations

Total mutations in all features: 14
Found 14 mutations in truncation regions

Mutation Analysis:
Impact types: {'missense variant': 12, 'synonymous variant': 1, 'nonsense variant': 1}
Clinical significance: {'Uncertain significance': 8, 'Benign': 3, 'Likely benign': 2, 'Conflicting classifications of pathogenicity': 1}
Truncation regions: ['30727706-30727833']
Fetching mutations from sources: clinvar...
Fetching mutations from sources: clinvar...

Analyzing mutations in alternative features:
  ├─ Feature 285 (30727706-30727833): 14 mutations

Total mutations in all features: 14
Filtering for impact types by source:
  - clinvar: missense variant, nonsense variant, frameshift variant
Found 13 mutations in truncation regions

Mutation Analysis:
Impact types: {'missense variant': 12, 'nonsense variant': 1}
Clinical significance: {'Uncertain significance': 8, 'Benign': 3, 'Conflicting classific

In [6]:
visualizer = GenomeVisualizer(genome)

# For each transcript, create visualizations
for _, transcript in transcript_info.iterrows():
    transcript_id = transcript["transcript_id"]
    print(f"\nVisualizing transcript: {transcript_id}")

    # Define the directory where plots will be saved
    output_dir = f"./{gene_name}/"  # Assuming you already created this directory

    # Create visualization with unfiltered mutations
    if mutations_unfiltered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_unfiltered,
            output_file=f"{output_dir}{transcript_id}_unfiltered.png",
        )

    # Create visualization with filtered mutations
    if mutations_filtered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered.png",
        )

        # Create zoomed view for filtered mutations
        visualizer.visualize_transcript_zoomed(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f"{output_dir}{transcript_id}_filtered_zoom.png",
            padding=100,
        )


Visualizing transcript: XM_047421728.1

Visualizing transcript: XM_047421727.1

Visualizing transcript: NM_000637.5

Visualizing transcript: NM_001195103.3

Visualizing transcript: NM_001195102.3

Visualizing transcript: NM_001195104.3
