In [None]:
import pandas as pd
import altair as alt

In [None]:
file = '../Data/filtered_ppj_data/VAMPseq/G6PD_scores_consequence.csv'
alt.data_transformers.disable_max_rows()

In [None]:
def read_data(file):
    df = pd.read_csv(file)

    df.loc[df['Mut'] == 'Stop', 'Mut'] = '*'
    df = df.dropna(subset = ['Mut'])

    return df

In [None]:
def map(df):

    protein_length = 515
    order = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*']

    annotation_data = pd.DataFrame([
        {'start': 35, 'end': 210, 'label': 'NAD Binding Domain', 'color': '#B9DBF4'},
        {'start': 212, 'end': 503, 'label': 'C-Terminal Domain', 'color': '#C8DBC8'},
        {'start': 1, 'end': 35, 'label': '', 'color': 'grey'},
        {'start': 210, 'end': 212, 'label': '', 'color': 'grey'},
        {'start': 503, 'end': 515, 'label': '', 'color': 'grey'}
        # Add more annotations as needed
    ])

    rect_colors = ['#B9DBF4','#C8DBC8','grey', 'grey', 'grey']
    domains = ['NAD Binding Domain', 'C-Terminal Domain', '', '', '']

    # Calculate center positions for text
    annotation_data['center'] = (annotation_data['start'] + annotation_data['end']) / 2

    
    # Create domain rectangles
    annotation_rect = alt.Chart(annotation_data).mark_rect(height=25, 
                                                           stroke = 'black',
                                                          strokeWidth = 2 ).encode(
        x=alt.X('start:Q',
                axis = None,
                scale=alt.Scale(domain=[0, 515])),
        x2='end:Q',
        color=alt.Color('label:N', 
                        scale = alt.Scale(domain = domains,
                                          range = rect_colors
                                         ),
                        legend= None),
        tooltip=['label', 'start', 'end']
    ).properties(
        width=1750,
        height=20
    )

    #Domain text labels
    annotation_text = alt.Chart(annotation_data).mark_text(
        color='black',
        fontSize=22,
        fontWeight='bold',
        baseline='middle',
        dy = -10 # This helps with vertical centering
    ).encode(
        x=alt.X('center:Q', 
                scale=alt.Scale(domain=[0,515]),
                axis=None
        ), # Position text in the middle of the 50px height
        text='label:N'
    )

    annotations = alt.layer(annotation_rect, annotation_text).properties(
        width=1750,
        height=20
    )

    map = alt.Chart(df).mark_rect().encode(
        x = alt.X('position:Q',
                  title = 'Amino Acid Position',
                  bin = alt.Bin(maxbins = protein_length + 1),
                  axis = alt.Axis(values = list(range(0, 530, 50)),
                                 labelFontSize = 18,
                                 titleFontSize = 20)
                 ),
        y = alt.Y('Mut',
                  title = '',
                  sort = order,
                  axis = alt.Axis(labelFontSize = 18
                                 )
                 ),
        color = alt.Color('average_score',
                          title = 'Score',
                          scale = alt.Scale(
                              scheme = 'bluepurple',
                              domain = [0, 1],
                              clamp = True,
                              reverse = True
                          ),
                          legend = alt.Legend(
                              titleFontSize = 20,
                              labelFontSize = 18
                          )
                         )
    ).properties(
        width = 1750,
        height = 600
    )

    map = alt.vconcat(annotations, map, spacing = -5).configure_view(
        stroke = None
    )

    map.display()

    #map.save('/Users/ivan/Desktop/pillar_project_figs/20250918_G6PD_Heatmap.png', dpi = 400)

In [None]:
def main():
    df = read_data(file)
    map(df)

In [None]:
main()