In [None]:
import pandas as pd
import altair as alt
import numpy as np

In [None]:
atg_scores = '../Data/ATG_lib_data/20250409_BARD1_X1A_ATG_scored.xlsx'

In [None]:
def read_scores(file): #Reads all scores
    
    df = pd.read_excel(file)
    df = df[['pos', 'allele', 'canonical_start', 'start_pos_id', 'Consequence', 'AAsub', 'score']] #Pulls out these columns

    return df

In [None]:
def process_df(df): #Pre-processes dataframe for heat map
    df['cds'] = df['pos'].transform(lambda x: 214809490 - (214809410 - (214809490 - x))) #adds column for CDS position

    #Dictionary to get base on coding strand
    rev_comp_dict = {'A': 'T', 
                     'C': 'G', 
                     'T': 'A',
                     'G': 'C'
                    }
    
    df['sense_allele'] = df['allele'].transform(lambda x: rev_comp_dict[x]) #Creates column with base change on coding strand
    
    rename_pairs = [('stop_gained', 'Stop Gained'), ('missense_variant', 'Missense'), ('synonymous_variant', 'Synonymous')]
    for pair in rename_pairs:
        original, new = pair
        df.loc[df['Consequence'] == original, 'Consequence'] = new

    return df

In [None]:
def heatmap(df): #Creates all heatmaps

    all_starts = ['ATG', 'CTG', 'AGG', 'TAG', 'ATA', 'GTG', 'ACG'] #List of all possible codons at Met1
    non_atg = ['CTG', 'AGG', 'TAG', 'ATA', 'GTG', 'ACG'] #List of all codons except Met at Met1
    
    for start in all_starts: #Iterates through all_starts and creates heatmaps for all possible codons
        current_df = df.copy() #Creates dataframe copy
        current_df = current_df.loc[current_df['canonical_start'].isin([start])] #Gets subset that contains only a single codon

        file_name = '/Users/ivan/Desktop/BARD1_draft_figs/' + 'fig_3d_' +'BARD1_X1A_ATG_' + start + '_map.png' #Final file name for saving
        
        #Builds heat map
        map = alt.Chart(current_df).mark_rect(strokeWidth = 7.5, width = 50, height = 50).encode(
            x = alt.X('cds:O',
                     axis = alt.Axis(
                         orient = 'top',
                         labelAngle = 0,
                         labelFontSize = 20,
                         domain = False,
                         ticks = False,
                         labelPadding = 15
                     )
                     ),
            y = alt.Y('sense_allele:O',
                     axis = alt.Axis(title = '',
                                     labelFontSize = 20,
                                     domain = False,
                                     ticks = False,
                                     labelPadding = 15
                                    )
                     ),
            color = alt.Color('max(score)',
                              scale = alt.Scale(
                                  domain = [-0.3, 0],
                                  scheme = 'magma'
                              ),
                              title = 'SGE Score'
                             ),
            stroke = alt.Stroke('Consequence', 
                                scale = alt.Scale(domain = ['Stop Gained', 'Synonymous', 'Missense'], 
                                    range = ['#ffcd3a','#006616','#81B4C7']
                                ),
                                sort = ['Stop Gained', 'Synonymous', 'Missense'],
                                legend = alt.Legend(
                                    symbolFillColor = 'white',
                                    labelFontSize = 12,
                                    titleFontSize = 12
                                )
                               )
        ).properties(
            width = 400,
            height = 200,
            title = start + ' map'
        ).configure_view(
            strokeWidth = 0
        )

        map.save(file_name, ppi = 500) #For figure saving
        
        map.display()
        
        #Creates heat map with all non-ATG codons aggregated together
        non_atg_df = df.loc[df['canonical_start'].isin(non_atg)]
        non_atg_map = alt.Chart(non_atg_df).mark_rect(strokeWidth = 7.5, width = 50, height = 50).encode(
            x = alt.X('cds:O',
                     axis = alt.Axis(
                         orient = 'top',
                         labelAngle = 0,
                         labelFontSize = 20,
                         domain = False,
                         ticks = False,
                         labelPadding = 15
                     )
                     ),
            y = alt.Y('sense_allele:O',
                     axis = alt.Axis(title = '',
                                     labelFontSize = 20,
                                     domain = False,
                                     ticks = False,
                                     labelPadding = 15
                                    )
                     ),
            color = alt.Color('max(score)',
                              scale = alt.Scale(
                                  domain = [-0.3, 0],
                                  scheme = 'magma'
                              ),
                              title = 'SGE Score'
                             ),
            stroke = alt.Stroke('Consequence', 
                                scale = alt.Scale(
                                    domain = ['Stop Gained', 'Synonymous', 'Missense'], 
                                    range = ['#ffcd3a','#006616','#81B4C7']
                                ),
                                sort = ['Stop Gained', 'Synonymous', 'Missense'],
                                legend = alt.Legend(
                                    symbolFillColor = 'white',
                                    labelFontSize = 12,
                                    titleFontSize = 12
                                )
                               )
        ).properties(
            width = 400,
            height = 200,
            title = 'All Non-ATG'
        ).configure_view(
            strokeWidth = 0
        )
    non_atg_map.save('/Users/ivan/Desktop/BARD1_draft_figs/fig_3d_BARD1_X1A_ATG_nonATG_map.png', ppi = 500)
    
    non_atg_map.display()

In [None]:
def main():
    raw_df = read_scores(atg_scores)
    final_df = process_df(raw_df)
    heatmap(final_df)

In [None]:
main()