In [None]:
import pandas as pd
import altair as alt

In [None]:
rna_output = '../Data/RNAscoring_dev_data/20250425_BARD1_RNAabnormal.xlsx'

In [None]:
def read_output(file):
    df = pd.read_excel(file)
    df = df.drop(columns = ['L2RNA/DNA'])
    df['AApos'] = df['AAsub'].transform(lambda x: x[1: -1])
    df['AApos'] = df['AApos'].astype(int)
    
    return df

In [None]:
def nmd_across_gene(df):
    
    df = df.loc[df['Consequence'].isin(['stop_gained'])]

    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = 'RNA_classification',
        shape = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AApos', title = 'Amino Acid Position: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'NMD Across BARD1'
    ).interactive()

    plot.display()
    

In [None]:
def missense_rna_performance(df):

    df = df.loc[df['Consequence'].isin(['missense_variant'])]
    
    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = 'RNA_classification',
        shape = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AAsub', title = 'Amino Acid Substituion: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'RNA Performance of Missense Variants'
    ).interactive()

    plot.display()

In [None]:
def synonymous_rna_performance(df):
    
    df = df.loc[df['Consequence'].isin(['synonymous_variant'])]
    
    plot = alt.Chart(df).mark_point().encode(
        x = 'AApos',
        y = 'RNA/DNA',
        color = 'RNA_classification',
        shape = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Region: '),
                   alt.Tooltip('AApos', title = 'Amino Acid Position: '), 
                   alt.Tooltip('snv_score', title = 'SGE Score: ')
                  ]
    ).properties(
        width = 800,
        height = 400, 
        title = 'RNA Performance of Synonymous Variants'
    ).interactive()

    plot.display()

In [None]:
def main():
    data = read_output(rna_output)
    nmd_across_gene(data)
    missense_rna_performance(data)
    synonymous_rna_performance(data)

In [None]:
main()