In [1]:
import altair as alt
import pandas as pd
import httpimport

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()


In [2]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import heatmap_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return heatmap_theme.heatmap_theme()


import_theme_new()


In [3]:
def make_empty_df(
    df,
    amino_acids=[
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
    ],
):
    """
    Due to sometimes having missing data, we need to construct an empty data frame with all
    possible combinations of sites and amino acids. This function first makes an empty data frame
    with all combinations of sites and amino acids called 'empty_df'. Then merges the DMS data
    with this empty data frame 'merged_df'.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing the DMS data
    amino_acids : list
        List of amino acids to include in the empty DataFrame

    Returns
    -------
    merged_df : pandas.DataFrame
    """
    # Get the minimum and maximum site numbers
    min_site = min(df["site"])
    max_site = max(df["site"])

    # Create a list of all sites in the range of min and max from the DMS data
    sites = range(min_site, max_site + 1)

    # Create the combination of each site with each amino acid
    data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]
    empty_df = pd.DataFrame(data)

    # Merge the empty DataFrame with the functional effects DataFrame
    merged_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
    # Need to modify the NaN values to be empty strings for later plotting
    merged_df = merged_df.fillna("")
    return merged_df


In [6]:
df = pd.read_csv('../../results/filtered_data/antibody_escape/1A9_escape_filtered_no_effect_cutoff.csv')
display(df.head(2))

tmp_df = make_empty_df(df)
display(tmp_df)

Unnamed: 0,epitope,site,wildtype,mutant,mutation,escape_mean,escape_median,escape_std,n_models,times_seen_ab,frac_models,LibA-250326-1A9,LibB-250312-1A9,effect,effect_std,times_seen_entry,n_selections
0,1,29,H,A,H29A,-0.464,-0.464,0.015,2,2.5,1.0,-0.45,-0.479,-3.298,0.095,4.75,8.0
1,1,29,H,C,H29C,0.288,0.288,0.0,1,2.0,0.5,,0.288,-3.289,0.0,2.625,4.0


Unnamed: 0,site,mutant,epitope,wildtype,mutation,escape_mean,escape_median,escape_std,n_models,times_seen_ab,frac_models,LibA-250326-1A9,LibB-250312-1A9,effect,effect_std,times_seen_entry,n_selections
0,29,R,1.0,H,H29R,-0.081,-0.081,0.193,2.0,5.0,1.0,-0.274,0.112,-0.631,0.364,5.375,8.0
1,29,K,,,,,,,,,,,,,,,
2,29,H,,,,,,,,,,,,,,,
3,29,D,1.0,H,H29D,-0.212,-0.212,0.139,2.0,6.5,1.0,-0.073,-0.351,-0.718,0.778,7.0,8.0
4,29,E,1.0,H,H29E,-0.4,-0.4,0.22,2.0,2.0,1.0,-0.18,-0.62,-1.158,0.492,2.875,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9055,481,M,1.0,L,L481M,0.003,0.003,0.0,1.0,8.0,0.5,,0.003,-0.38,0.17,5.0,4.0
9056,481,V,1.0,L,L481V,0.203,0.203,0.16,2.0,4.0,1.0,0.043,0.363,-1.519,0.552,4.625,8.0
9057,481,G,,,,,,,,,,,,,,,
9058,481,P,1.0,L,L481P,0.185,0.185,0.581,2.0,2.0,1.0,-0.396,0.766,-3.556,0.0,6.375,8.0
