In [None]:
import pandas as pd
import altair as alt

In [None]:
file = '../Data/20250423_BARD1_snvscores_IGVFupload.tsv' #SGE Data File

In [None]:
def read_scores(file): #reads score from excel file
    df = pd.read_csv(file, sep = '\t')
    df = df[['exon','consequence','score']]

    df = df.rename(columns = {'score': 'snv_score', 'consequence': 'Consequence'})

    return df

In [None]:
def prep_data(df): #Renames categories to be nicer for legend
    
    df.loc[df['Consequence'].str.contains('missense'), 'Consequence'] = 'Missense'
    df.loc[df['Consequence'] == 'synonymous_variant', 'Consequence'] = 'Synonymous'
    df.loc[df['Consequence'] == 'intron_variant', 'Consequence'] = 'Intron'
    df.loc[df['Consequence'] == 'stop_gained', 'Consequence'] = 'Stop Gained'
    df.loc[df['Consequence'] == 'stop_lost', 'Consequence'] = 'Stop Lost'
    df.loc[df['Consequence'].str.contains('splic'), 'Consequence'] = 'Splice'
    df.loc[df['Consequence'].str.contains('UTR'), 'Consequence'] = 'UTR Variant'
    df.loc[df['Consequence'] == 'start_lost', 'Consequence'] = 'Start Lost'

    return df


In [None]:
def make_histogram(df): #makes histogram

    alt.data_transformers.disable_max_rows() #gets rid of max data length problem

    length = str(len(df)) #gets length of data for title

    final_tital = 'Distribution of BARD1 SGE Scores ' + '(n = ' + length + ')' #used to build title
    bins = 50 #number of bins

    sorted = ["Intron", "Missense", "Synonymous", "Stop Gained", "Splice", "Start Lost", 'Stop Lost', 'UTR'] #order for the legend

    #Builds histogram
    histogram = alt.Chart(df).mark_bar().encode(
            alt.X('snv_score', axis = alt.Axis(title = 'SGE Score', labelFontSize = 16, titleFontSize = 20), bin = alt.Bin(maxbins = bins)),
            alt.Y('count()', axis = alt.Axis(title = 'Number of Variants', labelFontSize = 16, titleFontSize = 20)),
            color = alt.Color('Consequence:N', scale = alt.Scale(scheme = 'category10'), sort = sorted, legend = alt.Legend(titleFontSize = 16, labelFontSize = 14))
    ).properties(
        width = 800,
        height = 400,
        title = alt.TitleParams(text = final_tital, fontSize = 22)
    ).configure_axis(
        grid = False
    ).interactive()

    #histogram.save('/Users/ivan/Desktop/BARD1_draft_figs/fig_1c.png', ppi = 500)
    histogram.display()


In [None]:
def main():
    data = read_scores(file)
    to_graph = prep_data(data)
    #to_graph.to_excel('test.xlsx')
    make_histogram(to_graph)

In [None]:
main()