In [None]:
import pandas as pd
import altair as alt
import numpy as np

In [None]:
sge_scores = '../Data/filtered_ppj_data/SGE/BARD1.xlsx'

In [None]:
def read_sge(sge):
    df = pd.read_excel(sge)

    ref_df = df[['exon', 'target','pos', 'ref']]

    ref_df = ref_df.groupby('pos').agg({
        'exon': 'first',
        'target': 'first',
        'ref': 'first'
    }).reset_index()

    return df, ref_df

In [None]:
def build_map(exon, sge_df, ref_df):

    df = sge_df.loc[sge_df['exon'].isin([exon])]

    end = df['pos'].max()
    start = df['pos'].min()

    exon_domain = [start, end]

    bins = (end - start) + 1

    rect_size = 15
    spacing = 7.5

    total_width = (rect_size + spacing) * bins - spacing
    total_height = (rect_size + spacing) * 4 - spacing

    

    map = alt.Chart(df).mark_rect().encode(
        x = alt.X('pos:Q', 
                  axis = alt.Axis(
                      values = list(range(start, end + 1)),
                  ),
                  scale = alt.Scale(domain = exon_domain,
                                   reverse = True),
                  bin = alt.Bin(maxbins = bins)
                 ),
        y = alt.Y('alt:N'),
        color = alt.Color('score:Q'),
        stroke = 'consequence'
    ).properties(
        width = total_width,
        height = total_height
    )

    map.display()
                  


In [None]:
def main():
    sge_df, reference_df = read_sge(sge_scores)

    build_map('BARD1_X2', sge_df, reference_df)

In [None]:
main()