In [None]:
import pandas as pd
import altair as alt
from scipy import stats
import numpy as np

In [None]:
bard1_data = '../Data/20250825_BARD1snvscores_filtered.xlsx'
brca1_data = '../Data/20240830_BRCA1_SGE_AllScores.xlsx'
brca1_int_cutoff = -0.748
brca1_path_cutoff = -1.328

domain = 'BRCT'

In [None]:
def read_data(bard1, brca1,brca1_int, brca1_path_max, domain):
    bard1_df = pd.read_excel(bard1)
    brca1_df = pd.read_excel(brca1)

    if domain == 'RING':
        brca1_region = list(range(1, 302))
        bard1_region = list(range(26, 123))

    elif domain == 'BRCT':
        brca1_region = list(range(4936, 5566))
        bard1_region = list(range(568, 778))

    brca1_df = brca1_df.loc[brca1_df['pos'].isin(brca1_region)]
    brca1_df['functional_consequence'] = None
    brca1_df.loc[brca1_df['snv_score_minmax'] <= brca1_path_max, 'functional_consequence'] = 'functionally_abnormal'
    brca1_df.loc[brca1_df['snv_score_minmax'] >= brca1_int, 'functional_consequence'] = 'functionally_normal'
    brca1_df = brca1_df.loc[brca1_df['Consequence'].isin(['missense_variant'])]

    bard1_df = bard1_df.loc[bard1_df['consequence'].isin(['missense_variant'])]
    bard1_df['AApos'] = bard1_df['amino_acid_change'].transform(lambda x: x[1:-1])
    bard1_df['AApos'] = bard1_df['AApos'].astype(int)
    bard1_df = bard1_df.loc[bard1_df['AApos'].isin(bard1_region)]
    

    
    return bard1_df, brca1_df

In [None]:
def missense_sensitivity(bard1, brca1):
    
    bard1_sensitive = bard1['functional_consequence'].value_counts().get('functionally_abnormal',0)
    bard1_normal = bard1['functional_consequence'].value_counts().get('functionally_normal',0)
    total_bard1_ring_missense = len(bard1)
    bard1_sensitivity = bard1_sensitive / total_bard1_ring_missense
    bard1_normal_percent = bard1_normal / total_bard1_ring_missense

    brca1_sensitive = brca1['functional_consequence'].value_counts().get('functionally_abnormal',0)
    brca1_normal = brca1['functional_consequence'].value_counts().get('functionally_normal',0)
    total_brca1_ring_missense = len(brca1)
    brca1_sensitivity = brca1_sensitive / total_brca1_ring_missense
    brca1_normal_percent = brca1_normal / total_brca1_ring_missense

    contingency_table = np.array([[brca1_sensitive, bard1_sensitive],
                                  [brca1_normal, bard1_normal]])


    odds_ratio, p_value = stats.fisher_exact(contingency_table)

    print(f"Fisher's exact test p-value: {p_value}")
    print(f"Odds ratio: {odds_ratio}")

    to_plot = pd.DataFrame({
        'Gene': ['BRCA1', 'BARD1','BRCA1', 'BARD1'],
        'Percent of Variants': [brca1_sensitivity,bard1_sensitivity, brca1_normal_percent, bard1_normal_percent],
        'Type': ['Abnormal Missense', 'Abnormal Missense', 'Normal Missense', 'Normal Missense']
    })

    display(to_plot)
    return to_plot
    

In [None]:
def bar_plot(df):

    df['Percent of Variants'] = df['Percent of Variants'] * 100

    plot = alt.Chart(df).mark_bar(color = 'white',
                                 stroke = 'black').encode(
        x = alt.X('Gene',
                  axis = alt.Axis(title = '',
                                 labelFontSize = 16,
                                 titleFontSize = 18,
                                  labelAngle = 0
                                 )
                 ),
        y = alt.Y('Percent of Variants',
                  axis = alt.Axis(title = '% LoF Missense Vars.',
                                  labelFontSize = 16,
                                  titleFontSize = 18
                                 )
                 ),
        column = alt.Column('Type', 
                            title = '',
                            header = alt.Header(
                                titleFontSize = 18
                            )
                           )
    ).properties(
        width = 125,
        height = 330
    ).configure_axis(
        grid = False
    ).configure_view(
        stroke = None
    )

    #plot.save('/Users/ivan/Desktop/BARD1_draft_figs/supp_figs/suppfig_BRCTcomparison_barplot.png', ppi = 500)
    plot.display()

In [None]:
def main():
    bard1, brca1 = read_data(bard1_data, brca1_data,brca1_int_cutoff, brca1_path_cutoff, domain)
    to_plot = missense_sensitivity(bard1,brca1)
    bar_plot(to_plot)

In [None]:
main()