In [None]:
import pandas as pd
import altair as alt

from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats

In [None]:
bard1_data = '../Data/20250508_BARD1scores_update_FILTERED.xlsx'
depth_file = '/Users/ivan/Desktop/test_excel_outputs/20250717_BARD1_CollapsedMedianDepth.xlsx'

In [None]:
def process_data(file):
    df = pd.read_excel(file)
    df = df.loc[(df['exon'].isin(['BARD1_X4'])) & (df['simplified_consequence'].isin(['missense_variant'])) & (df['functional_consequence'].isin(['functionally_normal', 'functionally_abnormal', 'indeterminate']))]
    df['overlapping'] = 'N'
    df.loc[df['snvlib_lib2'].notna(), 'overlapping'] = 'Y'

    df['consequence_overlap'] = df['functional_consequence'] + '+' + df['overlapping']

    not_normal = df.loc[(df['functional_consequence'].isin(['functionally_abnormal', 'indeterminate'])) & (df['snvlib_lib2'].notna())]
    
    return df, not_normal

In [None]:
def std_error(df, depth):
    df['min_tc_count'] = df[['D05_R1_lib1', 'D05_R1_lib2', 'D05_R2_lib1', 'D05_R2_lib2', 'D05_R3_lib1', 'D05_R3_lib2',
                   'D13_R1_lib1','D13_R1_lib2', 'D13_R2_lib1', 'D13_R2_lib2', 'D13_R3_lib1', 'D13_R3_lib2']].min(axis = 1)
    
    chart = alt.Chart(df).mark_point().encode(
        x = alt.X('pos',
                  axis = alt.Axis(labels = False),
                  scale = alt.Scale(domain = [214780602,214781509],
                                   reverse = True)
                 ),
        y = 'standard_error',
        color = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Target: '),
                   alt.Tooltip('amino_acid_change', title = 'Amino Acid Sub.: '),
                   alt.Tooltip('score', title = 'SGE Score: '),
                   alt.Tooltip('functional_consequence', title = 'Functional Class: ')
                  ]
                           
    ).properties(
        width = 2000,
        height = 400,
        title = 'Std. Error vs. Position'
    )

    count_chart = alt.Chart(df).mark_point().encode(
        x = alt.X('pos',
                  axis = alt.Axis(labels = False),
                  scale = alt.Scale(domain = [214780602,214781509],
                                   reverse = True)
                 ),
        y = alt.Y('min_tc_count',
                  scale = alt.Scale(domain = [0, 1000])
                 ),
        color = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Target: '),
                   alt.Tooltip('amino_acid_change', title = 'Amino Acid Sub.: '),
                   alt.Tooltip('score', title = 'SGE Score: '),
                   alt.Tooltip('functional_consequence', title = 'Functional Class: '),
                   alt.Tooltip('min_tc_count', title = 'Min. Count in TC Library: ')
                  ]
                           
    ).properties(
        width = 2000,
        height = 400,
        title = 'Min. Count vs. Position'
    )

    score_chart = alt.Chart(df).mark_point().encode(
        x = alt.X('pos',
                  axis = alt.Axis(labels = False),
                  scale = alt.Scale(domain = [214780602,214781509],
                                   reverse = True)
                 ),
        y = 'score',
        color = 'functional_consequence',
        tooltip = [alt.Tooltip('target', title = 'SGE Target: '),
                   alt.Tooltip('amino_acid_change', title = 'Amino Acid Sub.: '),
                   alt.Tooltip('score', title = 'SGE Score: '),
                   alt.Tooltip('functional_consequence', title = 'Functional Class: ')
                  ]
                           
    ).properties(
        width = 2000,
        height = 400,
        title = 'SGE Score vs. Position'
    )


    depth_df = pd.read_excel(depth)
    depth_df = depth_df.loc[depth_df['pos'].isin(list(range(214780602,214781509 + 1)))]
    
    depth_chart = alt.Chart(depth_df).mark_line().encode(
        x = alt.X('pos',
                  axis = alt.Axis(labels = False),
                  scale = alt.Scale(domain = [214780602,214781509],
                                   reverse = True)
                 ),
        y = 'median_depth',
        color = 'target',
        tooltip = [alt.Tooltip('target', title = 'SGE Target: '),
                   alt.Tooltip('amino_acid', title = 'Amino Acid: ')
                  ]
                           
    ).properties(
        width = 2000,
        height = 400,
        title = 'Median Normalized Depth vs. Position'        
    )


    final_plot = chart & depth_chart

    final_plot = final_plot & score_chart & count_chart

    final_plot = final_plot.interactive() 
    final_plot.display() 

In [None]:
def counts_for_abnormal(ab_df):
    df = ab_df[['snvlib_lib1', 'snvlib_lib2',
                   'D05_R1_lib1', 'D05_R1_lib2', 'D05_R2_lib1', 'D05_R2_lib2', 'D05_R3_lib1', 'D05_R3_lib2',
                   'D13_R1_lib1','D13_R1_lib2', 'D13_R2_lib1', 'D13_R2_lib2', 'D13_R3_lib1', 'D13_R3_lib2', 'amino_acid_change'
                  ]]
    df_long = df.melt(
        id_vars=['amino_acid_change'], 
        value_vars=[
                   'D05_R1_lib1', 'D05_R1_lib2', 'D05_R2_lib1', 'D05_R2_lib2', 'D05_R3_lib1', 'D05_R3_lib2',
                   'D13_R1_lib1','D13_R1_lib2', 'D13_R2_lib1', 'D13_R2_lib2', 'D13_R3_lib1', 'D13_R3_lib2'],
        var_name='library', 
        value_name='count'
    )

    chart = alt.Chart(df_long).mark_bar().encode(
        x=alt.X('library:N', axis=alt.Axis(labelAngle=-45)),
        y='count:Q',
        color='library:N',
        tooltip = [alt.Tooltip('library', title = 'Library: '),
                   alt.Tooltip('count', title = 'Count: ')
                  ]
    ).properties(
        height = 150,
        width = 200
    ).facet(
        facet='amino_acid_change:N',
        columns=14  # Number of columns in the grid
    )

    chart.display()

In [None]:
def overlap_nonoverlap_barchart(df):

    summary_df = df.groupby(['functional_consequence', 'overlapping']).size().reset_index(name='count')
    summary_df['percentage_normal/ab'] = summary_df.groupby('functional_consequence')['count'].transform(lambda x: (x / x.sum()) * 100)
    summary_df['percentage_overlapping'] = summary_df.groupby('overlapping')['count'].transform(lambda x: (x / x.sum()) * 100)
    

    totals = summary_df.groupby('overlapping')['count'].sum().reset_index()
    totals['functional_consequence'] = 'All Variants'
    totals['percentage_normal/ab'] = 100  # or 100.0 if you prefer
    totals['percentage_overlapping'] = (totals['count'] / totals['count'].sum()) * 100

    summary_df = pd.concat([summary_df, totals], ignore_index=True)
    print(summary_df)

    chart = alt.Chart(summary_df).mark_bar().encode(
        x = alt.X('functional_consequence',
                  sort = ['All Variants', 'functionally_normal', 'functionally_abnormal'],
                  axis = alt.Axis(labelAngle = 0
                                 )
                 ),
        y = alt.Y('count'
                 ),
        color = alt.Color('overlapping',
                          sort = ['Y', 'N'],
                          legend = alt.Legend(title = 'In Overlap?')
                         )
    ).properties(
        width = 300,
        height = 300,
        title = 'X4 Missense Variants'
    )

    chart.display()


In [None]:
def overlapping_scatter(df):

    df = df.loc[(df['snvlib_lib2'].notna()) & (df['functional_consequence'].isin(['functionally_abnormal']))]
   

    d5_reps_pairs_lib1 = [('D13_R1_lib1', 'D13_R2_lib1'),
                     ('D13_R1_lib1', 'D13_R3_lib1'),
                     ('D13_R2_lib1', 'D13_R3_lib1')
                    ]

    d5_reps_pairs_lib2 = [('D13_R1_lib2', 'D13_R2_lib2'),
                     ('D13_R1_lib2', 'D13_R3_lib2'),
                     ('D13_R2_lib2', 'D13_R3_lib2')
                    ]

    print('Library 1: ')
    for elem in d5_reps_pairs_lib1:
        rep1, rep2 = elem
        d5_chart_lib1 = alt.Chart(df).mark_point().encode(
            x = rep1,
            y = rep2
        ).interactive()

       

        d5_chart_lib1 = d5_chart_lib1.facet('target')
        '''
        d5_chart_lib1 = d5_chart_lib1.resolve_scale(
            x = 'independent',
            y = 'independent'
        )
        '''
        d5_chart_lib1.display()

    
        
    print('Library 2: ')
    for elem in d5_reps_pairs_lib2:
        rep1, rep2 = elem
        d5_chart_lib1 = alt.Chart(df).mark_point().encode(
            x = rep1,
            y = rep2
        ).interactive()

       

        d5_chart_lib1 = d5_chart_lib1.facet('target')

        '''
        d5_chart_lib1 = d5_chart_lib1.resolve_scale(
            x = 'independent',
            y = 'independent'
        )
        '''
        d5_chart_lib1.display()
        

In [None]:
def main():
    df,abnormal_df = process_data(bard1_data)
    std_error(df,depth_file)
    #counts_for_abnormal(abnormal_df)
    overlap_nonoverlap_barchart(df)
    #overlapping_scatter(df)


In [None]:
main()