In [1]:
from swissisoform.genome import GenomeHandler
from swissisoform.visualize import GenomeVisualizer
from swissisoform.isoform import AlternativeIsoform
from swissisoform.mutations import MutationHandler
from swissisoform.utils import analyze_mutations
from typing import Optional, List
import pandas as pd
import os

In [2]:
genome = GenomeHandler(
   '../data/genome_data/hg38.fa',
   '../data/genome_data/hg38.ncbiRefSeq.gtf'
)

alt_isoforms = AlternativeIsoform()
alt_isoforms.load_bed('../data/ribosome_profiling/RiboTISHV6_Ly2024_AnnoToTruncation_exonintersect.bed')

mutation_handler = MutationHandler()

In [3]:
gene_name = "GARS1"
print(f"Processing gene: {gene_name}")

os.makedirs(f"./{gene_name}", exist_ok=True)

# Get alternative isoform features
print("Getting alternative features...")
alt_features = alt_isoforms.get_visualization_features(gene_name)

if alt_features.empty:
    print("No alternative features found")
else:
    print(f"Found {len(alt_features)} alternative features")
    print("\nAlternative Features:")
    print(alt_features)

Processing gene: GARS1
Getting alternative features...
No alternative features found


In [4]:
print("Getting transcript information...")
transcript_info = genome.get_transcript_ids(gene_name)

if transcript_info.empty:
    print("No transcript info found")
else:
    print(f"Found {len(transcript_info)} transcripts")
    print("\nTranscript Information:")
    print(transcript_info)

Getting transcript information...
Found 2 transcripts

Transcript Information:
          transcript_id chromosome     start       end strand
3059024     NM_002047.4       chr7  30594878  30634033      +
3059063  NM_001316772.1       chr7  30594735  30634033      +


In [5]:
mutations_unfiltered = await analyze_mutations(
    gene_name=gene_name, 
    mutation_handler=mutation_handler,
    alt_features=alt_features, 
    sources=["clinvar"],
)

impact_types = {
    "clinvar": ["missense variant", "nonsense variant", "frameshift variant"],
}

mutations_filtered = await analyze_mutations(
    gene_name=gene_name, 
    mutation_handler=mutation_handler,
    alt_features=alt_features, 
    sources=["clinvar"], 
    impact_types=impact_types,
)


Fetching mutations from sources: clinvar...


  df[col] = pd.to_numeric(df[col], errors='ignore')



Source distribution before filtering:
source
ClinVar    500
Name: count, dtype: int64

Source distribution after duplicate removal:
source
ClinVar    474
Name: count, dtype: int64
Found 474 mutations in truncation regions


AttributeError: 'DataFrame' object has no attribute 'tolist'

In [6]:
visualizer = GenomeVisualizer(genome)

# For each transcript, create visualizations
for _, transcript in transcript_info.iterrows():
    transcript_id = transcript['transcript_id']
    print(f"\nVisualizing transcript: {transcript_id}")
    
    # Define the directory where plots will be saved
    output_dir = f"./{gene_name}/"  # Assuming you already created this directory
    
    # Create visualization with unfiltered mutations
    if mutations_unfiltered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_unfiltered,
            output_file=f'{output_dir}{transcript_id}_unfiltered.png'
        )
    
    # Create visualization with filtered mutations
    if mutations_filtered is not None:
        visualizer.visualize_transcript(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f'{output_dir}{transcript_id}_filtered.png'
        )
        
        # Create zoomed view for filtered mutations
        visualizer.visualize_transcript_zoomed(
            gene_name=gene_name,
            transcript_id=transcript_id,
            alt_features=alt_features,
            mutations_df=mutations_filtered,
            output_file=f'{output_dir}{transcript_id}_filtered_zoom.png',
            padding=100
        )


Visualizing transcript: NM_144772.3_2
