This notebook builds figure 3D (odds ratio plot) and the odds ratio supplmentary table. Notebook also requires external case-control cohort data that must be provided in addition to the final data table Excel file

In [None]:
import pandas as pd
import altair as alt
import numpy as np
import statsmodels.api as sm
from scipy import stats

In [None]:
sge = '../Data/final_tables/BARD1_SGE_final_table.xlsx' #sge data tables

#External case-control data
carriers = '../Data/case_control_data/CARRIERS_data/20250303_CARRIERS_data.xlsx' #carriers data
bridges_all = '../Data/case_control_data/BRIDGES_data/20250815_BRIDGES_missense_all.xlsx' #bridges all missense variants
bridges_population = '../Data/case_control_data/BRIDGES_data/20250815_BRIDGES_missense_population.xlsx' #bridges population missense variants
bridges_all_ptv = '../Data/case_control_data/BRIDGES_data/20250815_BRIDGES_PTVs_all.xlsx' #bridges all PTVs
bridges_pop_ptv = '../Data/case_control_data/BRIDGES_data/20250815_BRIDGES_PTVs_pop.xlsx' #bridges population PTVs


#Total individuals sequenced for CARRIERS and BRIDGES studies.
#All denotes all individuals/patients sequenced by the study. 'pop' denotes the subset found in patients without considering family history of breast cancer
carriers_totals = {'cases_all': 39553, #Total number of cases sequenced
                   'controls_all': 35867, #Total number of controls sequenced
                   'cases_pop': 32247, #Nubmer of cases used for population-based estimates
                   'controls_pop': 32544, #Number of controls used for population-based estimates
                   'er_cases': 3805 #Number of estrogen receptor (ER) negative cases used in population-based estimates
                  }

#Annotations for are same, but numbers are pulled from the BRIDGES study
bridges_totals = {'cases_all': 60466, 
                  'controls_all': 53461,
                  'cases_pop': 48826,
                  'controls_pop': 50703
                 }

In [None]:
def read_sge(sge): #Reads SGE data
    
    sge = pd.read_excel(sge, sheet_name = 'scores') #Reads SGE data
    sge = sge.loc[~sge['variant_qc_flag'].isin(['WARN'])] #Removes 'WARN' variants
    sge = sge.loc[sge['var_type'].isin(['snv'])] #Filters for SNVs only
    
    sge = sge.loc[~(sge['functional_consequence'].isin(['indeterminate']))] #Removes variants with indeterminate functional classification

    sge = sge.rename(columns = {'functional_consequence': 'Classification', 
                               'consequence': 'Consequence'}) #Renames for downstream code

    #Creates classifcation column based on SGE function class
    sge.loc[sge['Classification'] == 'functionally_normal', 'Classification'] = 'F'
    sge.loc[sge['Classification'] == 'functionally_abnormal', 'Classification'] = 'NF'

    
    sge_all = sge 
    
    sge_miss = sge[sge['Consequence'].isin(['missense_variant'])] #missense variants only 
    sge_miss = sge_miss.copy()
    sge_miss['AApos'] = sge_miss['amino_acid_change'].str.extract(r'([0-9]+)', expand=False).astype(int) #Makes column for amino acid position for subsetting on BARD1 region

    '''
    not_domain_coords = list(range(1, 26)) + list(range(123, 425)) + list(range(546, 568))
    sge_ring = sge_miss.loc[sge_miss['AApos'].isin(list(range(26,123)))] #RING missense variants only
    sge_ard = sge_miss.loc[sge_miss['AApos'].isin(list(range(425, 546)))] #ARD missense variants only
    sge_brct = sge_miss.loc[sge_miss['AApos'].isin(list(range(568, 778)))] #BRCT missense variants only 
    sge_structured = sge_miss.loc[~(sge_miss['AApos'].isin(not_domain_coords))] #All structured missense variants only
        
    sge_dict = {'miss': sge_miss, 'all': sge_all, 'ring': sge_ring,
                'ard': sge_ard, 'brct': sge_brct, 'structured': sge_structured
               }
    '''
    
    not_domain_coords =  list(range(1, 26))  + list(range(546, 568)) + list(range(123, 425)) #Positions not in a domain
    x4_idr = list(range(123, 425)) #X4 IDR 
    sge_ring = sge_miss.loc[sge_miss['AApos'].isin(list(range(26,123)))] #RING missense variants only
    sge_ard = sge_miss.loc[sge_miss['AApos'].isin(list(range(425, 546)))] #ARD missense variants only
    sge_brct = sge_miss.loc[sge_miss['AApos'].isin(list(range(568, 778)))] #BRCT missense variants only
    sge_idr = sge_miss.loc[sge_miss['AApos'].isin(x4_idr)] #X3 IDR missense variants onlly
    sge_ptv = sge.loc[sge['Consequence'].isin(['stop_gained'])]

    sge_structured = sge_miss.loc[~(sge_miss['AApos'].isin(not_domain_coords))] #All structured missense variants only
        
    sge_dict = {'miss': sge_miss,  'ring': sge_ring,
                'ard': sge_ard, 'brct': sge_brct, 'structured': sge_structured,
                'idr': sge_idr, 'ptv': sge_ptv
               } #Final datadict

    sge_keys = list(sge_dict.keys()) #Gets keys

    return sge_dict, sge_keys

In [None]:
def read_carriers_data(cc): #Reads CARRIERS data
    
    cc = pd.read_excel(cc) #Reads case-control data
    cc_all_raw = cc[cc['CAVA_GENE'].isin(['BARD1'])] #Filters only for BARD1
    cc_all_raw = cc_all_raw.copy() #Raw df for all variants
    cc_pop_raw = cc_all_raw[cc_all_raw['CARRIERS_PROJECT'].isin(['population-based'])] #Raw df for population-based variants only

    raw_dfs = [cc_all_raw, cc_pop_raw] #Makes list for iteration

    processed_dfs = []
    for cc in raw_dfs: #Iterates through each df and generates a pos_id column for merging with SGE data
        cc = cc[['Classification', '#CHROM', 'REF', 'ALT', 'CAVA_GENE', 'CAVA_CSN', 'CAVA_SO', 'Sample_AAF', 'Sample_ID', 'CaseControl','ER_status1', 'hg38_start']].copy() #Keeps necessary columns
        
        cc['pos_id'] = None #Creates emtpy pos_id column
        cc = cc[cc['ALT'].str.len() == 1].copy()
        cc['hg38_start'] = cc['hg38_start'].astype(str) #Sets hg38 coordinates as str data type
        cc['pos_id'] = cc['hg38_start'] + ':' + cc['ALT'] #Creates position ID

        processed_dfs.append(cc)

    #All variants and population-based variants extracted
    cc_all = processed_dfs[0]
    cc_pop = processed_dfs[1]


    cc_pop_er_cases = cc_pop.loc[(cc_pop['ER_status1'].isin([0,777])) & (cc_pop['CaseControl'].isin(['Case']))] #pulls ER negative variants
    

    carriers_data = {'all': cc_all,
                     'pop': cc_pop,
                     'cc_pop_er_cases': cc_pop_er_cases
                    } #Data dictionary to return


    carriers_keys = list(carriers_data.keys())
    
    return carriers_data, carriers_keys

In [None]:
def read_bridges(all, population, all_ptv, pop_ptv): #Read BRIDGES Data

    #Pulls BARD1 sheets
    bridges_all = pd.read_excel(all, sheet_name = 'BARD1') 
    bridges_pop = pd.read_excel(population, sheet_name = 'BARD1')
    bridges_all_ptv = pd.read_excel(all_ptv, sheet_name = 'BARD1')
    bridges_pop_ptv = pd.read_excel(pop_ptv, sheet_name = 'BARD1')

    raw_dfs = [bridges_all, bridges_pop, bridges_all_ptv, bridges_pop_ptv] #List for iteration
    cleaned_dfs = []
    
    for df in raw_dfs: #Cleans and builds pos_id column in each df
        df = df[['Cases', 'Controls', 'chr', 'ref', 'alt', 'hg38_pos']]
        df = df.loc[(df['ref'].str.len() == 1) & (df['alt'].str.len() == 1)]
        
        df = df.rename(columns = {'hg38_pos': 'pos'})

        df['pos_id'] = df['pos'].astype(str) + ':' + df['alt']

        df = df[['Cases', 'Controls', 'pos_id']]
        cleaned_dfs.append(df)


    #Concatenates dfs for all variants and population-based variants only
    bridges_all = pd.concat([cleaned_dfs[0], cleaned_dfs[2]])
    bridges_pop = pd.concat([cleaned_dfs[1], cleaned_dfs[3]])

    dfs = [bridges_all, bridges_pop]
    
    bridges_data = {'all': bridges_all,
                    'pop': bridges_pop
                   } #Data dicitonary to return

    bridges_keys = list(bridges_data.keys())
    
    return bridges_data, bridges_keys

In [None]:
def count_carriers(carriers_data, carriers_keys, sge_data, sge_keys): #Gets counts of functionally abnormal and functionally normal variants in cases and controls

    #Lists to hold values for returned dataframe
    analysis = []
    carrier_dataset = []
    case_nf = []
    control_nf = []
    case_f = []
    control_f = []
    case_denom = []
    control_denom = []
    
    for key in sge_keys: #Iterates through each SGE dataset
        sge_df = sge_data[key] #Gets SGE df

        for carrier_key in carriers_keys: #Iterates through each CARRIERS dataset
            carriers_df = carriers_data[carrier_key]

            merged = pd.merge(carriers_df, sge_df, on = 'pos_id', how = 'inner') #Merges case-control and SGE data
            merged = merged.dropna(subset = ['Classification_y']) #drops any columsn without a classification
    
            contingency_tab = merged[['CaseControl', 'Classification_y']] #Creates dataframe for contingency table
            contingency_tab = pd.crosstab(merged['CaseControl'], merged['Classification_y']) #Creates contingency table
            contingency_tab = contingency_tab[contingency_tab.columns[::-1]]
            
            columns = list(contingency_tab.columns)
            if 'F' not in columns:
                contingency_tab['F'] = 0
                
            if key == 'ring' and carrier_key == 'cc_pop_er_cases': #Exception for ER- RING subset as there are 0 LoF variants
                contingency_tab['NF'] = 0

            if carrier_key == 'cc_pop_er_cases': #For ER- negative subsets, number of LoF vars. seen in the population-based control set is fixed
                cases_nf = contingency_tab['NF']['Case'] #Gets number of LoF variants
                cases_f = contingency_tab['F']['Case'] #Gets number of functionally normal variants
                controls_nf = 19 #Number of LoF variants seen in the missense vars. only population-based control set
                controls_f = 864  #Number of functionally normal variants seen in the missense vars. only population-based control set

                analysis.append(key) #Appends SGE dataset key
                carrier_dataset.append(carrier_key) #Appends CARRIERS data dictionary key
                case_nf.append(cases_nf) #Appends number of LoF variants in cases
                case_f.append(cases_f) #Appends number of functionally normal variants in cases
                control_nf.append(controls_nf) #Appends number of LoF variants in controls 
                control_f.append(controls_f) #Appends number of functionally normal variants in controls
            else: #Handles all other cases
                #Gets number of LoF variants in cases and controls
                cases_nf = contingency_tab['NF']['Case']
                controls_nf = contingency_tab['NF']['Control']

                #Gets number of functionally_normal variants in cases and controls
                cases_f = contingency_tab['F']['Case']
                controls_f = contingency_tab['F']['Control']

                #Appends to dataframe lists
                analysis.append(key)
                carrier_dataset.append(carrier_key)
                case_nf.append(cases_nf)
                case_f.append(cases_f)
                control_nf.append(controls_nf)
                control_f.append(controls_f)

            #Appends correct total number of individuals sequenced
            if carrier_key == 'all':
                case_denom.append(carriers_totals['cases_all'])
                control_denom.append(carriers_totals['controls_all'])
            elif carrier_key == 'pop':
                case_denom.append(carriers_totals['cases_pop'])
                control_denom.append(carriers_totals['controls_pop'])
            elif carrier_key == 'cc_pop_er_cases':
                case_denom.append(carriers_totals['er_cases'])
                control_denom.append(carriers_totals['controls_pop'])


    #Builds final dataframe
    df = pd.DataFrame({'region': analysis,
                       'dataset': carrier_dataset,
                        'case_nf': case_nf,
                        'control_nf': control_nf,
                        'case_f': case_f,
                        'control_f': control_f,
                       'case_total': case_denom,
                       'control_total': control_denom
                      })

    df['cohort'] = 'carriers' #Sets cohort identifer
    df['full_data_id'] = df['cohort'] + '_' + df['dataset'] #Builds full data identifier linking cohort and specific dataset used
    
    return df

In [None]:
def count_bridges(bridges_data, bridges_keys, sge_data, sge_keys): #Analagous code as previous function but for BRIDGES datasets

    region = []
    bridges_dataset = []
    case_nf = []
    control_nf = []
    case_f = []
    control_f = []
    case_denom = []
    control_denom = []

    for key in sge_keys:
        sge_df = sge_data[key]

        for bridge_key in bridges_keys:
            bridges_df = bridges_data[bridge_key]
    
            merged = pd.merge(bridges_df, sge_df, on = 'pos_id', how = 'inner')

            contingency_tab = merged.pivot_table(
                values = ['Cases', 'Controls'],
                index = 'Classification',
                aggfunc = 'sum'
            )

            contingency_tab = contingency_tab.transpose()

            columns = list(contingency_tab.columns)
            if 'F' not in columns:
                contingency_tab['F'] = 0
                
            cases_nf = contingency_tab['NF']['Cases']
            controls_nf = contingency_tab['NF']['Controls']
    
            cases_f = contingency_tab['F']['Cases']
            controls_f = contingency_tab['F']['Controls']

            region.append(key)
            bridges_dataset.append(bridge_key)
            case_nf.append(cases_nf)
            case_f.append(cases_f)
            control_nf.append(controls_nf)
            control_f.append(controls_f)

            if bridge_key == 'all':
                case_denom.append(bridges_totals['cases_all'])
                control_denom.append(bridges_totals['controls_all'])
            elif bridge_key == 'pop':
                case_denom.append(bridges_totals['cases_pop'])
                control_denom.append(bridges_totals['controls_pop'])

    df = pd.DataFrame({'region': region,
                       'dataset': bridges_dataset,
                        'case_nf': case_nf,
                        'control_nf': control_nf,
                        'case_f': case_f,
                        'control_f': control_f,
                       'case_total': case_denom,
                       'control_total': control_denom
                      })

    df['cohort'] = 'bridges'
    df['full_data_id'] = df['cohort'] + '_' + df['dataset']
    
    return df

In [None]:
def odds_testing(row): #Custom function to do odds ratio testing

    #Extracts variant counts
    case_nf_count = row['case_nf']
    control_nf_count = row['control_nf']
    case_f_count = row['case_f']
    control_f_count = row['control_f']

    #Extracts total individauls sequenced
    case_total = row['case_total']
    control_total = row['control_total']

    #Builds array for LoF variants
    nf_array = np.array([[case_nf_count, case_total],
                        [control_nf_count, control_total]])

    #Builds array for functionally normal variants
    f_array = np.array([[case_f_count, case_total],
                        [control_f_count, control_total]])


    oddsratio, nf_p_value = stats.fisher_exact(nf_array) #Tabulates odds-ratio and p-value from Fischer's exact test
    nf_table = sm.stats.Table2x2(nf_array) #Generates confidence intervals

    #Gets stats for LoF array
    nf_or = nf_table.oddsratio
    nf_lwr_ci = nf_table.oddsratio_confint()[0]
    nf_upper_ci = nf_table.oddsratio_confint()[1]
    nf_p_val = nf_p_value

    oddsratio, f_p_value = stats.fisher_exact(f_array) #Tabulates odds-ratio and p-value from Fischer's exact test
    f_table = sm.stats.Table2x2(f_array) #Generates confidence intervals

    #Gets stats for functionally normal array
    f_or = f_table.oddsratio
    f_lwr_ci = f_table.oddsratio_confint()[0]
    f_upper_ci = f_table.oddsratio_confint()[1]
    f_p_val = f_p_value

    return nf_or, nf_lwr_ci, nf_upper_ci, nf_p_val, f_or, f_lwr_ci, f_upper_ci, f_p_val

In [None]:
def make_odds_plot(df): #Builds vizualization

    df = df.loc[(df['dataset'].isin(['cc_pop_er_cases'])) | (df['full_data_id'].isin(['carriers_pop+bridges_pop', 'carriers_pop', 'bridges_pop']))]
    df = df.loc[~(df['region'].isin(['miss','idr']))]

    df['region_data_id'] = df['region'] + '_' + df['full_data_id']

    label_dict = {'ring_carriers_pop': 'CARRIERS RING Mis.',
                  'ring_carriers_cc_pop_er_cases': 'CARRIERS (ER-) RING Mis.',
                  'ard_carriers_pop': 'CARRIERS ARD Mis.',
                  'ard_carriers_cc_pop_er_cases': 'CARRIERS (ER-) ARD Mis.',
                  'brct_carriers_pop': 'CARRIERS BRCT Mis.',
                  'brct_carriers_cc_pop_er_cases': 'CARRIERS (ER-) BRCT Mis.',
                  'structured_carriers_pop': 'CARRIERS Structured Mis.',
                  'structured_carriers_cc_pop_er_cases': 'CARRIERS (ER-) Structured Mis.',
                  'ring_bridges_pop': 'BRIDGES RING Mis.',
                  'ard_bridges_pop': 'BRIDGES ARD Mis.',
                   'brct_bridges_pop': 'BRIDGES BRCT Mis.',
                  'structured_bridges_pop': 'BRIDGES Structured Mis.',
                  'ard_carriers_pop+bridges_pop': 'BRIDGES + CARRIERS ARD Mis.',
                  'brct_carriers_pop+bridges_pop': 'BRIDGES + CARRIERS BRCT Mis.',
                  'ring_carriers_pop+bridges_pop': 'BRIDGES + CARRIERS RING Mis.',
                  'structured_carriers_pop+bridges_pop': 'BRIDGES + CARRIERS Structured Mis.',
                  'ptv_carriers_pop': 'CARRIERS PTV',
                  'ptv_carriers_cc_pop_er_cases': 'CARRIERS (ER-) PTV',
                  'ptv_bridges_pop': 'BRIDGES PTV',
                  'ptv_carriers_pop+bridges_pop': 'BRIDGES + CARRIERS PTV'
                 }

    df['labels'] = df['region_data_id'].map(label_dict)
    df = df.loc[~df['labels'].isin(['CARRIERS (ER-) RING Mis.'])]
                  
                  
    sort_order = ['BRIDGES + CARRIERS PTV', 'BRIDGES PTV', 'CARRIERS PTV', 'CARRIERS (ER-) PTV',
                  'BRIDGES + CARRIERS Structured Mis.', 'BRIDGES Structured Mis.', 'CARRIERS Structured Mis.','CARRIERS (ER-) Structured Mis.',
                  'BRIDGES + CARRIERS RING Mis.', 'BRIDGES RING Mis.','CARRIERS RING Mis.','CARRIERS (ER-) RING Mis.',
                  'BRIDGES + CARRIERS ARD Mis.', 'BRIDGES ARD Mis.', 'CARRIERS ARD Mis.', 'CARRIERS (ER-) ARD Mis.',
                  'BRIDGES + CARRIERS BRCT Mis.', 'BRIDGES BRCT Mis.', 'CARRIERS BRCT Mis.','CARRIERS (ER-) BRCT Mis.'
                 ]



    palette = [
    '#61ade6', # RING
    '#8bb38b', # ARD
    '#f2a364', # BRCT 
    '#989898',
    'black',
    ]


    domains = [
        'ring',
        'ard',
        'brct',
        'structured',
        'ptv'
    ]


    base = alt.Chart(df)
    points = base.mark_point(
        filled = True,
        size = 50, 
        color = 'black'
        ).encode(
        y = alt.Y('labels:O',
                  scale = alt.Scale(domain = sort_order),
                 axis = alt.Axis(title = '',
                                 labelFontSize = 16, 
                                 labelLimit = 1000
                                )
                 ),
        x = alt.X('nf_or',
                 axis = alt.Axis(
                     title = 'Odds Ratio',
                     labelFontSize = 16,
                     titleFontSize = 18,
                     values = list(range(0, 14, 2))
                                 ),
                  scale = alt.Scale(domain = [0, 8]
                                   )
                 ),
        color = alt.Color('region',
                           scale = alt.Scale(
                               range = palette,
                               domain = domains
                           ),
                          legend = None
                          ),
                           
        tooltip = ['nf_or']
        )
    
    ci_bars = base.mark_errorbar().encode(
        y = 'labels',
        x = alt.Y('nf_lwr_ci:Q', axis = alt.Axis(title = '')),
        x2 = 'nf_upper_ci:Q',
        color = alt.Color('region',
                          scale = alt.Scale(
                              range = palette,
                              domain = domains
                          )
        )
    )

    line = alt.Chart(pd.DataFrame({'nf_or': [1]})).mark_rule(color = 'red').encode(
        x = 'nf_or')


    plot = (points + ci_bars + line).configure_view(
        stroke = None
        ).configure_axis(
        grid = False
        ).interactive()

    
    plot.display()

    return plot

In [None]:
def main():
    sge_data, sge_keys = read_sge(sge)
    carriers_data, carriers_keys = read_carriers_data(carriers)
    bridges_data, bridges_keys = read_bridges(bridges_all, bridges_population, bridges_all_ptv, bridges_pop_ptv)
    carriers_counted = count_carriers(carriers_data, carriers_keys, sge_data, sge_keys)
    bridges_counted = count_bridges(bridges_data, bridges_keys, sge_data, sge_keys)

    df = pd.concat([carriers_counted, bridges_counted]).reset_index(drop = True) #Counted data frames concatenated
    
    to_combo = df.loc[~df['dataset'].isin(['cc_pop_er_cases'])] #ER- rows dropped

    #Combines CARRIERS and BRIDGES datasets into new columns
    combo = to_combo.groupby(['region', 'dataset']).agg({
        'case_nf': 'sum',
        'control_nf': 'sum',
        'case_f': 'sum',
        'control_f': 'sum',
        'case_total': 'sum',
        'control_total': 'sum',
        'cohort': lambda x: '+'.join(x),
        'full_data_id': lambda x: '+'.join(x)
    }).reset_index()
    
    final_df = pd.concat([df, combo]).reset_index(drop = True) #Concatenated to end of original dataframe

    final_df[['nf_or', 'nf_lwr_ci', 'nf_upper_ci', 'nf_p', 'f_or', 'f_lwr_ci', 'f_upper_ci', 'f_p']] = final_df.apply(odds_testing, axis = 1, result_type = 'expand') #Odds ratios calculated

    final_df['significant'] = 'FALSE' #Builds significance column

    final_df.loc[(final_df['nf_p'] < 0.05) & (final_df['nf_lwr_ci'] > 1) & (final_df['f_lwr_ci'] < 1), 'significant'] = 'TRUE' #Rows where the LoF OR's lower CI does not pass 1 and p < 0.05 and functionally normal variants do not associate with disease are marked as significant
    

    or_plot = make_odds_plot(final_df)
    
    final_df = final_df.loc[:, ['cohort', 'dataset', 'full_data_id', 'region', 'case_nf', 'control_nf', 'case_f', 'control_f', 'case_total', 'control_total', 'nf_or', 'nf_lwr_ci', 'nf_upper_ci', 'nf_p', 'f_or',
                                'f_lwr_ci', 'f_upper_ci', 'f_p', 'significant']] #Columns reordered for excel output

    final_df['nf_or'] = final_df['nf_or'].round(2).astype(str) + ' (' + final_df['nf_lwr_ci'].round(2).astype(str) + '-' + final_df['nf_upper_ci'].round(2).astype(str) + ')' 
    final_df['f_or'] = final_df['f_or'].round(2).astype(str) + ' (' + final_df['f_lwr_ci'].round(2).astype(str) + '-' + final_df['f_upper_ci'].round(2).astype(str) + ')' 


    final_df = final_df[['cohort', 'dataset', 'region', 'case_nf', 'control_nf', 'case_f', 'control_f', 'case_total', 'control_total', 'nf_or', 'nf_p', 'f_or',
                                 'f_p', 'significant']]

    
    final_df = final_df.rename(columns = {'cohort': 'Cohort', 'case_nf': 'Case # LoF', 'control_nf': 'Control # LoF',
                                          'case_f': 'Case # Normal', 'control_f': 'Control # Normal', 'case_total': '# Cases Tested',
                                          'control_total': '# Controls Tested', 'nf_or': 'LoF OR', 'nf_p': 'LoF P-value',
                                          'f_or': 'Normal OR', 'f_p': 'Normal P-value', 'significant': 'Significant', 
                                         'dataset': 'Dataset', 'region': 'Region'}
                                          )


    final_df.to_excel('../Data/final_tables/BARD1_OddsRatios_table.xlsx', index = False)
    #or_plot.save('/Users/ivan/Desktop/BARD1_draft_figs/fig_4d_NewORplot.png', ppi = 500)
    print(final_df)

In [None]:
main()