In [None]:
import pandas as pd
import altair as alt

In [None]:
alt.data_transformers.disable_max_rows()
file = '../Data/20250508_BARD1scores_update_FILTERED.xlsx'

In [None]:
def read_data(file):
    df = pd.read_excel(file)
    df = df.rename(columns = {'simplified_consequence': 'Consequence'})
    df.loc[df['Consequence'].str.contains('missense'), 'Consequence'] = 'Missense'
    df.loc[df['Consequence'] == 'synonymous_variant', 'Consequence'] = 'Synonymous'
    df.loc[df['Consequence'] == 'intron_variant', 'Consequence'] = 'Intron'
    df.loc[df['Consequence'] == 'stop_gained', 'Consequence'] = 'Stop Gained'
    df.loc[df['Consequence'] == 'stop_lost', 'Consequence'] = 'Stop Lost'
    df.loc[df['Consequence'].str.contains('site'), 'Consequence'] = 'Canonical Splice'
    df.loc[df['Consequence'].str.contains('ing_var'), 'Consequence'] = 'Splice Region'
    df.loc[df['Consequence'].str.contains('UTR'), 'Consequence'] = 'UTR Variant'
    df.loc[df['Consequence'] == 'start_lost', 'Consequence'] = 'Start Lost'
    
    return df

In [None]:
def strip_plot(df):

    nf_line = alt.Chart(pd.DataFrame({'x': [-0.089]})).mark_rule(color = 'red').encode(
        x = 'x')

    func_lin = alt.Chart(pd.DataFrame({'x': [-0.077]})).mark_rule(color = 'blue').encode(
        x = 'x')

    sorted = ["Intron", "Missense", "Synonymous", "Stop Gained", "Splice", "Start Lost", 'Stop Lost', 'UTR']
    controls_sorted = ["Intron",  "Synonymous", "Stop Gained", "Missense", "Splice", "Start Lost", 'Stop Lost', 'UTR']


    # Get the category10 colors
    category10_colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
        '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]
    
    # Map each category to a specific color
    color_mapping = {
        "Intron": category10_colors[0],
        "Synonymous": category10_colors[1],
        "Stop Gained": category10_colors[2],
        "Missense": category10_colors[3],
        "Splice": category10_colors[4],
        "Start Lost": category10_colors[5],
        "Stop Lost": category10_colors[6],
        "UTR": category10_colors[7]
    }
    
    plot = alt.Chart(df).mark_tick(opacity = 1).encode(
        x = alt.X('score:Q',
                  axis = alt.Axis(title = '', 
                                  titleFontSize = 20,
                                 labelFontSize = 24)
                 ),
        y = alt.Y('Consequence:N', 
                  sort = sorted,
                  axis = alt.Axis(title = '',
                                 labelFontSize = 24)
                 ),
        color = alt.Color('Consequence:N',
                legend=None,
                sort = sorted,
                scale = alt.Scale(scheme = 'category10')
                         )
        ).properties(
            width = 800,
            height = 400
        )
    
    #plot = plot + nf_line + func_lin

    plot = plot.configure_axis(
        grid = False
    )
    
    #plot.save('/Users/ivan/Desktop/BARD1_draft_figs/strip_plot_no_thresholds_MSS.png', ppi = 500)
    plot.display()

In [None]:
def main():
    data = read_data(file)
    strip_plot(data)

In [None]:
main()