In [None]:
import pandas as pd
import altair as alt

In [None]:
rna_output = '/Users/ivan/Desktop/20250708_BARD1_202505_data_RNAclassified_beta.xlsx'
alt.data_transformers.disable_max_rows()

In [None]:
def read_output(file):
    df = pd.read_excel(file)
    df = df.drop(columns = ['L2RNA/DNA'])
    
    df['AApos'] = df['AAsub'].transform(lambda x: x[1: -1])
    df = df.loc[~(df['AApos'].isin(['-']))]
    df['AApos'] = df['AApos'].astype(int)

    abnormal_df = df[df['RNA_classification'].isin(['high', 'low'])]
    normal_df = df[df['RNA_classification'].isin(['normal'])]
    
    return abnormal_df, normal_df, df

In [None]:
def all_rna_performance(df):

    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = alt.Color('functional_consequence', 
                          sort = ['functionally_normal', 'indeterminate', 'functionally_abnormal']
                         ),
        shape = 'RNA_classification',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AAsub', title = 'Amino Acid Substituion: ')
                  ]
    ).properties(
        width = 1200,
        height = 400, 
        title = 'Distribution of RNA/DNA Across BARD1'
    ).interactive()

    plot.display()

In [None]:
def nmd_across_gene(df):
    
    df = df.loc[df['Consequence'].isin(['stop_gained'])]

    color_scale = alt.Scale(
        domain =['functionally_normal', 'functionally_abnormal'],
        range = ['#1f77b4', '#d62728']
    )
    
    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = alt.Color('functional_consequence', 
                          scale = color_scale,
                         ),
        shape = 'RNA_classification',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AApos', title = 'Amino Acid Position: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'NMD Across BARD1'
    ).interactive()

    plot.display()
    

In [None]:
def missense_rna_performance(df):

    df = df.loc[df['Consequence'].isin(['missense_variant'])]
    
    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = alt.Color('functional_consequence', 
                          sort = ['functionally_normal', 'indeterminate', 'functionally_abnormal']
                         ),
        shape = 'RNA_classification',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AAsub', title = 'Amino Acid Substituion: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'RNA Performance of Missense Variants'
    ).interactive()

    plot.display()

In [None]:
def synonymous_rna_performance(df):
    
    df = df.loc[df['Consequence'].isin(['synonymous_variant'])]

    color_scale = alt.Scale(
        domain =['functionally_normal', 'functionally_abnormal'],
        range = ['#1f77b4', '#d62728']
    )
    
    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = alt.Color('functional_consequence', 
                          scale = color_scale,
                         ),
        shape = 'RNA_classification',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AApos', title = 'Amino Acid Position: '), 
                   alt.Tooltip('snv_score', title = 'SGE Score: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'RNA Performance of Synonymous Variants'
    ).interactive()

    plot.display()

In [None]:
def main():
    ab_data, norm_data, all_data = read_output(rna_output)
    all_rna_performance(all_data)
    nmd_across_gene(all_data)
    missense_rna_performance(ab_data)
    synonymous_rna_performance(ab_data)

In [None]:
main()