In [1]:
from swissisoform.genome import GenomeHandler
from swissisoform.visualize import GenomeVisualizer
from swissisoform.isoform import AlternativeIsoform
from swissisoform.mutations import MutationHandler
import matplotlib.pyplot as plt

# Initialize the genome handler
genome = GenomeHandler(
   '../data/genome_data/hg38.fa',
   '../data/genome_data/hg38.ncbiRefSeq.gtf'
)

# Get NAXE features
naxe_features = genome.find_gene_features('NAXE')
print("NAXE features:")
print(naxe_features)

# Get gene statistics
stats = genome.get_gene_stats('NAXE')
print("\nGene statistics:")
print(stats)

# Get available transcript IDs for NAXE
transcript_info = genome.get_transcript_ids('NAXE')
print("Available transcripts:")
print(transcript_info)

# Select a transcript ID (for example, the last one)
transcript_id = transcript_info.iloc[-1]['transcript_id']

NAXE features:
                  chromosome                 source feature_type      start  \
241841   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28   transcript     150333   
241842   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         exon     150333   
241843   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         5UTR     150333   
241844   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28          CDS     150362   
241845   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         exon     150658   
241846   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28          CDS     150658   
241847   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         exon     150922   
241848   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28          CDS     150922   
241849   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         exon     151114   
241850   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28          CDS     151114   
241851   chr1_MU273335v1_fix  ncbiRefSeq.2022-10-28         exon     151965   
241852   chr1_MU273335v1_fix  ncbiRef

In [2]:
# Initialize handlers
alt_isoforms = AlternativeIsoform()

# Load BED file
alt_isoforms.load_bed('../data/ribosome_profiling/RiboTISHV6_Ly2024_AnnoToTruncation_exonintersect.bed')

# Get the alternative isoform features
alt_features = alt_isoforms.get_visualization_features('NAXE')

In [3]:
# Initialize the visualizer
visualizer = GenomeVisualizer(genome)

# Visualize the gene without the alternative isoform
visualizer.visualize_transcript('NAXE', transcript_id, alt_features=None, output_file=f'naxe_transcript_{transcript_id}.png')

# Visualize the gene with the alternative isoform
visualizer.visualize_transcript('NAXE', transcript_id, alt_features=alt_features, output_file=f'naxe_transcript_alt_{transcript_id}.png')

In [4]:
# Initialize the handler
mutation_handler = MutationHandler()

# Define the gene you want to analyze
gene_name = "NAXE"

# Get the mutation data
gnomad_data = await mutation_handler.fetch_gnomad_data(gene_name)

# Get all variants (no filter)
gnomad_variants = mutation_handler.process_gnomad_variants(gnomad_data)
print("Total gnomad variants:", len(gnomad_variants))

# Get gnomAD summary
gnomad_summary = await mutation_handler.get_gnomad_summary(gene_name)
print("gnomAD summary:")
print(gnomad_summary)

Total gnomad variants: 2202
gnomAD summary:
{'total_variants': 2202, 'consequence_types': {'missense_variant': 646, 'intron_variant': 569, '5_prime_UTR_variant': 285, '3_prime_UTR_variant': 272, 'synonymous_variant': 187, 'frameshift_variant': 65, 'splice_region_variant': 57, 'stop_gained': 44, 'non_coding_transcript_exon_variant': 21, 'inframe_deletion': 13, 'splice_acceptor_variant': 12, 'splice_donor_variant': 11, 'stop_lost': 10, 'start_lost': 5, 'inframe_insertion': 3, 'stop_retained_variant': 2}, 'mean_allele_frequency': np.float64(0.004228829187649131), 'variants_by_impact': {'with_hgvsc': np.int64(2196), 'with_hgvsp': np.int64(984)}}


In [5]:
gnomad_variants

Unnamed: 0,position,variant_id,reference,alternate,consequence,transcript_id,transcript_version,hgvs,hgvsc,hgvsp,...,eas_ac_hom,eas_ac_hemi,afr_ac,afr_an,afr_ac_hom,afr_ac_hemi,ami_ac,ami_an,ami_ac_hom,ami_ac_hemi
0,156591753,1-156591753-CGCACATGCGCCG-C,CGCACATGCGCCG,C,5_prime_UTR_variant,ENST00000368234,7,,,,...,0,0,0,16462,0,0,,,,
1,156591754,1-156591754-GCACATGC-G,GCACATGC,G,non_coding_transcript_exon_variant,ENST00000679913,1,,,,...,0,0,0,24502,0,0,0.0,538.0,0.0,0.0
2,156591756,1-156591756-A-C,A,C,non_coding_transcript_exon_variant,ENST00000679913,1,n.1A>C,n.1A>C,,...,0,0,0,2374,0,0,,,,
3,156591756,1-156591756-A-G,A,G,non_coding_transcript_exon_variant,ENST00000679913,1,n.1A>G,n.1A>G,,...,0,0,0,2374,0,0,,,,
4,156591757,1-156591757-C-G,C,G,non_coding_transcript_exon_variant,ENST00000679913,1,n.2C>G,n.2C>G,,...,0,0,0,2028,0,0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2197,156601761,1-156601761-A-G,A,G,3_prime_UTR_variant,ENST00000680269,1,c.*269A>G,c.*269A>G,,...,0,0,0,41444,0,0,0.0,912.0,0.0,0.0
2198,156601761,1-156601761-A-C,A,C,3_prime_UTR_variant,ENST00000680269,1,c.*269A>C,c.*269A>C,,...,0,0,1,41444,0,0,0.0,912.0,0.0,0.0
2199,156601779,1-156601779-G-A,G,A,3_prime_UTR_variant,ENST00000680269,1,c.*287G>A,c.*287G>A,,...,0,0,114,41538,0,0,0.0,912.0,0.0,0.0
2200,156601797,1-156601797-T-A,T,A,3_prime_UTR_variant,ENST00000680269,1,c.*305T>A,c.*305T>A,,...,0,0,1,41146,0,0,0.0,912.0,0.0,0.0


In [6]:
# Initialize the handler
mutation_handler = MutationHandler()

# Define the gene you want to analyze
gene_name = "NAXE"

# Get ClinVar data
clinvar_variants = await mutation_handler.get_clinvar_variants(gene_name)
print("Total ClinVar variants:", len(clinvar_variants))

# Get ClinVar summary
clinvar_summary = await mutation_handler.get_clinvar_summary(gene_name)
print("\nClinVar summary:")
print(clinvar_summary)

  df[col] = pd.to_numeric(df[col], errors='ignore')


Total ClinVar variants: 142

ClinVar summary:
{'total_variants': 142, 'clinical_significance': {'Uncertain significance': 61, 'Likely benign': 35, 'Benign': 25, 'Pathogenic': 15, 'Likely pathogenic': 3, 'Conflicting classifications of pathogenicity': 2, 'Benign/Likely benign': 1}, 'variant_types': {'single nucleotide variant': 126, 'Deletion': 5, 'Indel': 4, 'Microsatellite': 3, 'Duplication': 2, 'Inversion': 1, 'Insertion': 1}, 'molecular_consequences': {'missense variant': 60, 'synonymous variant': 32, 'intron variant': 25, 'nonsense': 8, 'frameshift variant': 7, '': 5, 'inframe_deletion': 2, '3 prime UTR variant': 2, 'initiator_codon_variant': 1, 'splice donor variant': 1}, 'chromosome_distribution': {'1': 141, '': 1}, 'review_status': {'criteria provided, single submitter': 105, 'criteria provided, multiple submitters, no conflicts': 26, 'no assertion criteria provided': 9, 'criteria provided, conflicting classifications': 2}, 'metadata': {'submission_count': {'total': np.int64(194

In [7]:
clinvar_variants

Unnamed: 0,variant_id,obj_type,accession,title,gene_name,review_status,clinical_significance,last_evaluated,molecular_consequences,protein_change,...,stop,ref_allele,alt_allele,submission_count,gene_id,gene_strand,variant_name,cdna_change,canonical_spdi,cytogenic_location
0,3381841,Microsatellite,VCV003381841,"NAXE, (GGGCC)n REPEAT EXPANSION, PROMOTER",NAXE,no assertion criteria provided,Pathogenic,2024/11/26 00:00,,,...,,,,1,128240,+,"NAXE, (GGGCC)n REPEAT EXPANSION, PROMOTER","NAXE, (GGGCC)n REPEAT EXPANSION, PROMOTER",,
1,3177706,single nucleotide variant,VCV003177706,NM_144772.3(NAXE):c.589C>T (p.Arg197Trp),NAXE,"criteria provided, single submitter",Uncertain significance,2021/01/13 00:00,missense variant,R197W,...,156593480.0,,,1,128240,+,NM_144772.3(NAXE):c.589C>T (p.Arg197Trp),c.589C>T,NC_000001.11:156593479:C:T,1q22
2,3030803,single nucleotide variant,VCV003030803,NM_144772.3(NAXE):c.183-6C>T,NAXE,no assertion criteria provided,Likely benign,2022/03/16 00:00,intron variant,,...,156592095.0,,,1,128240,+,NM_144772.3(NAXE):c.183-6C>T,NM_144772.3(NAXE):c.183-6C>T,NC_000001.11:156592094:C:T,1q22
3,3003683,single nucleotide variant,VCV003003683,NM_144772.3(NAXE):c.862C>T (p.Gln288Ter),NAXE,"criteria provided, single submitter",Uncertain significance,2023/09/05 00:00,nonsense,Q288*,...,156594079.0,,,1,128240,+,NM_144772.3(NAXE):c.862C>T (p.Gln288Ter),c.862C>T,NC_000001.11:156594078:C:T,1q22
4,3001761,single nucleotide variant,VCV003001761,NM_144772.3(NAXE):c.807G>A (p.Lys269=),NAXE,"criteria provided, single submitter",Likely benign,2023/08/17 00:00,synonymous variant,,...,156594024.0,,,1,128240,+,NM_144772.3(NAXE):c.807G>A (p.Lys269=),c.807G>A,NC_000001.11:156594023:G:A,1q22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,268119,Indel,VCV000268119,NM_144772.3(NAXE):c.804_807delinsA (p.Lys270del),NAXE,"criteria provided, conflicting classifications",Conflicting classifications of pathogenicity,2024/05/16 00:00,inframe_deletion,K270del,...,156594024.0,,,4,128240,+,NM_144772.3(NAXE):c.804_807delinsA (p.Lys270del),c.804_807delinsA,NC_000001.11:156594020:GAAG:A,1q22
138,268118,single nucleotide variant,VCV000268118,NM_144772.3(NAXE):c.516+1G>A,NAXE,no assertion criteria provided,Pathogenic,2016/11/14 00:00,splice donor variant,,...,156592671.0,,,1,128240,+,NM_144772.3(NAXE):c.516+1G>A,NM_144772.3(NAXE):c.516+1G>A,NC_000001.11:156592670:G:A,1q22
139,268117,single nucleotide variant,VCV000268117,NM_144772.3(NAXE):c.196C>T (p.Gln66Ter),NAXE,no assertion criteria provided,Pathogenic,2016/11/14 00:00,nonsense,Q66*,...,156592114.0,,,1,128240,+,NM_144772.3(NAXE):c.196C>T (p.Gln66Ter),c.196C>T,NC_000001.11:156592113:C:T,1q22
140,268116,single nucleotide variant,VCV000268116,NM_144772.3(NAXE):c.177C>A (p.Tyr59Ter),NAXE,no assertion criteria provided,Pathogenic,2016/11/14 00:00,nonsense,Y59*,...,156591981.0,,,1,128240,+,NM_144772.3(NAXE):c.177C>A (p.Tyr59Ter),c.177C>A,NC_000001.11:156591980:C:A,1q22


In [8]:
# Instantiate the handler
mutation_handler = MutationHandler()

# Specify the path to your aggregator CSV (e.g. NAXE.csv)
aggregator_csv_path = "../data/mutation_data/NAXE.csv"

# Get the aggregator variants DataFrame
aggregator_variants = mutation_handler.get_aggregator_variants(aggregator_csv_path)
print("Total aggregator variants:", len(aggregator_variants))

# Get a summary of the aggregator data
aggregator_summary = mutation_handler.get_aggregator_summary(aggregator_csv_path)
print("Aggregator summary:", aggregator_summary)

Total aggregator variants: 383
Aggregator summary: {'csv_path': '../data/mutation_data/NAXE.csv', 'total_variants': 383, 'impact_counts': {'MODIFIER': 321, 'MODERATE': 36, 'LOW': 24, 'HIGH': 2}, 'consequence_counts': {'downstream_gene_variant': 224, 'intron_variant': 56, 'missense_variant': 35, 'upstream_gene_variant': 26, 'synonymous_variant': 14, '3_prime_UTR_variant': 14, 'splice_polypyrimidine_tract_variant,intron_variant': 5, 'splice_region_variant,intron_variant': 2, '5_prime_UTR_variant': 1, 'missense_variant,splice_region_variant': 1, 'splice_donor_region_variant,intron_variant': 1, 'stop_gained': 1, 'splice_region_variant,synonymous_variant': 1, 'splice_donor_variant': 1, 'splice_region_variant,splice_polypyrimidine_tract_variant,intron_variant': 1}, 'position_stats': {'chromosomes': ['1'], 'min_position': 156591106, 'max_position': 156599294}, 'frequency_stats': {'mean_frequency': np.float64(0.028479332375979115), 'median_frequency': np.float64(9.28e-05), 'max_frequency': np.

In [9]:
aggregator_variants

Unnamed: 0,varId,impact,consequence,hgvsc,hgvsp,allelecount,allelnumber,allelefrequency,homozygouscount,gnomADg_AC,gnomADg_AN,gnomADg_AF,chromosome,position,reference,alternate
0,1:156591106:G:A,MODIFIER,upstream_gene_variant,-,-,5,1732,0.002890,0,,,,1,156591106,G,A
1,1:156591114:G:C,MODIFIER,upstream_gene_variant,-,-,9,1748,0.005150,2,,,,1,156591114,G,C
2,1:156591131:T:TAG,MODIFIER,upstream_gene_variant,-,-,2,1748,0.001140,0,,,,1,156591131,T,TAG
3,1:156591244:G:C,MODIFIER,upstream_gene_variant,-,-,2,1900,0.001050,0,,,,1,156591244,G,C
4,1:156591269:T:C,MODIFIER,upstream_gene_variant,-,-,1,1848,0.000541,0,,,,1,156591269,T,C
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378,1:156599232:G:A,MODIFIER,downstream_gene_variant,-,-,2,21038,0.000095,0,,,,1,156599232,G,A
379,1:156599271:C:T,MODIFIER,downstream_gene_variant,-,-,2,15526,0.000129,0,,,,1,156599271,C,T
380,1:156599285:G:T,MODIFIER,downstream_gene_variant,-,-,20,15072,0.001330,2,,,,1,156599285,G,T
381,1:156599291:A:C,MODIFIER,downstream_gene_variant,-,-,31,14890,0.002080,2,,,,1,156599291,A,C


In [10]:
# Initialize handler
mutation_handler = MutationHandler()

# Get data from all sources
standardized_mutations = await mutation_handler.get_visualization_ready_mutations(
    gene_name='NAXE',
    aggregator_csv_path="../data/mutation_data/NAXE.csv"
)

  df[col] = pd.to_numeric(df[col], errors='ignore')


In [11]:
standardized_mutations

Unnamed: 0,position,variant_id,reference,alternate,source,impact,hgvsc,hgvsp,allele_frequency,clinical_significance,relative_position
0,156591753.0,1-156591753-CGCACATGCGCCG-C,CGCACATGCGCCG,C,gnomAD,5_prime_UTR_variant,.,.,0.000001,Unknown,647.0
1,156591754.0,1-156591754-GCACATGC-G,GCACATGC,G,gnomAD,non_coding_transcript_exon_variant,.,.,0.000011,Unknown,648.0
2,156591756.0,1-156591756-A-G,A,G,gnomAD,non_coding_transcript_exon_variant,n.1A>G,.,0.000000,Unknown,650.0
3,156591756.0,1-156591756-A-C,A,C,gnomAD,non_coding_transcript_exon_variant,n.1A>C,.,0.000028,Unknown,650.0
4,156591757.0,1-156591757-C-A,C,A,gnomAD,non_coding_transcript_exon_variant,n.2C>A,.,0.000000,Unknown,651.0
...,...,...,...,...,...,...,...,...,...,...,...
2722,156599232.0,1:156599232:G:A,G,A,Aggregator,downstream_gene_variant,-,-,0.000095,Unknown,8126.0
2723,156599271.0,1:156599271:C:T,C,T,Aggregator,downstream_gene_variant,-,-,0.000129,Unknown,8165.0
2724,156599285.0,1:156599285:G:T,G,T,Aggregator,downstream_gene_variant,-,-,0.001330,Unknown,8179.0
2725,156599291.0,1:156599291:A:C,A,C,Aggregator,downstream_gene_variant,-,-,0.002080,Unknown,8185.0
