In [None]:
import pandas as pd
import altair as alt
import numpy as np

In [None]:
sge_scores = '../Data/filtered_ppj_data/SGE/BARD1.xlsx'

In [None]:
def read_sge(sge):
    df = pd.read_excel(sge)

    df.loc[df['consequence'].str.contains('missense'), 'consequence'] = 'Missense'
    df.loc[df['consequence'] == 'synonymous_variant', 'consequence'] = 'Synonymous'
    df.loc[df['consequence'] == 'intron_variant', 'consequence'] = 'Intron'
    df.loc[df['consequence'] == 'stop_gained', 'consequence'] = 'Stop Gained'
    df.loc[df['consequence'] == 'stop_lost', 'consequence'] = 'Stop Lost'
    df.loc[df['consequence'].str.contains('site'), 'consequence'] = 'Canonical Splice'
    df.loc[df['consequence'].str.contains('ing_var'), 'consequence'] = 'Splice Region'
    df.loc[df['consequence'].str.contains('UTR'), 'consequence'] = 'UTR Variant'
    df.loc[df['consequence'] == 'start_lost', 'consequence'] = 'Start Lost'


    return df

In [None]:
def build_map(exon, sge_df):

    palette = [
    '#006616', # dark green,
    '#81B4C7', # dusty blue
    '#ffcd3a', # yellow
    '#6AA84F', # med green
    '#888888', # med gray
    '#1170AA', # darker blue
    '#CFCFCF' # light gray
        
    ]
    
    
    variant_types = [
        'Synonymous',
        'Missense',  
        'Stop Gained',
        'Intron', 
        'Stop Lost',
        'Canonical Splice', 
        'Splice Region',
    ]
    

    df = sge_df.loc[sge_df['exon'].isin([exon])]
    df = df.loc[df['pos'] <= 214797141]

    ref_df = df.groupby('pos').agg({
        'ref': 'first'
    }).reset_index()

    ref_df['alt'] = ref_df['ref']

    end = df['pos'].max()
    start = df['pos'].min()

    exon_domain = [start, end]

    bins = (end - start) + 1

    rect_size = 15
    spacing = 7.5

    total_width = (rect_size + spacing) * bins - spacing
    total_height = (rect_size + spacing) * 4 - spacing

    

    map = alt.Chart(df).mark_square().encode(
        x = alt.X('pos:O', 
                  title = '',
                  axis = alt.Axis(values = list(range(start, end + 1)),
                                  labels = False, 
                                  ticks = False
                                 ),
                  scale = alt.Scale(domain = list(range(start, end + 1)),
                                    reverse = True
                                   )
                 ),
        y = alt.Y('alt:N',
                 title = '',
                 axis = alt.Axis(labelFontSize = 18)
                 ),
        size = alt.value(rect_size * rect_size),
        color = alt.Color('score:Q',
                          scale = alt.Scale(
                              scheme = 'bluepurple',
                              domain = [-0.2, 0],
                              clamp = True,
                              reverse = True
                          )
                         ),
        stroke = alt.Stroke('consequence',
                            scale = alt.Scale(
                                range = palette,
                                domain = variant_types
                            ),
                            legend = alt.Legend(
                                symbolFillColor = 'white',
                                title = 'Consequence',
                                titleFontSize = 20,
                                labelFontSize = 18
                            )
                           )
    ).properties(
        width = total_width,
        height = total_height
    )

    text = alt.Chart(ref_df).mark_text(fontSize = 20).encode(
        x = 'pos:O',
        y = 'alt',
        text = 'ref'
    )

    map = alt.layer(map, text).configure_view(
        stroke = 'black'
    ).configure_axis(
        domainColor = 'black',
        tickColor = 'black'
    )
    
    map.display()
                  
    #map.save('/Users/ivan/Desktop/pillar_project_figs/20250918_BARD1_X2_draft_SeqFunc_map.png', dpi = 500)

In [None]:
def main():
    sge_df = read_sge(sge_scores)

    build_map('BARD1_X2', sge_df)

In [None]:
main()