## Producing heatmaps of escape for known antibody-rabies G contacts

#### Authored by Arjun Aditham on 19-Nov-2024


In [23]:
#pull in dependencies.

import itertools
import math

import altair as alt

import dms_variants.utils

import pandas as pd

import polyclonal
import polyclonal.plot
import seaborn as sb

import numpy as np

# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# Plotting settings
alphabet = ['R','K','H','D','E','Q','N','S','T','Y','W','F','A','I','L','M','V','G','P','C']

addtl_tooltip_stats = [
    "n_selections",
    "times_seen",
    "effect on cell entry"
]

addtl_slider_stats = {
    "times_seen" : 2,
    "n_selections" : 2,
    "effect on cell entry":-5
}

In [24]:
#pull in data; reformat
func_effects_csv = "../results/func_effects/averages/HEK293T_entry_func_effects.csv"
df = pd.read_csv(func_effects_csv)

df['phenotype'] = 'effect';
df=df.rename(columns={'effect':'effect on cell entry'})



# RVA122

In [26]:
escape_df = pd.read_csv('../results/antibody_escape/averages/RVA122_mut_effect.csv');
escape_df['escape_mean'] = escape_df['escape_mean'].clip(lower=0)

merged_df = pd.merge(escape_df,df,how='right',on=['site','wildtype','mutant'])

site_contacts = [  1,   2,   3,  29,  31,  33,  35,  36,  38, 198, 213, 214, 215,
       216, 309, 331, 333, 334, 337];

contacts = merged_df[merged_df['site'].isin(site_contacts)==True];
contacts = contacts.rename(columns={'times_seen_x':'times_seen'})

#the original dataframe omits wildtype from results, but the escape_mean needs to be 0. Here I'll just set that to 0 so 
#it is not NaN. I cannot just use the setna function because some data is actually missing.

contacts['is_wt_residue']= contacts['wildtype'] == contacts['mutant'];
contacts.loc[contacts['is_wt_residue'] == True, 'escape_mean'] = 0;

polyclonal.plot.lineplot_and_heatmap(
    data_df=contacts,
    stat_col="escape_mean",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=True,
    show_zoombar=False,
    show_lineplot=False,
    init_site_statistic="mean",
    plot_title="contacts of RVA122 antibody",
    heatmap_max_at_least=2,
    addtl_slider_stats_hide_not_filter=["effect on cell entry", "n_selections", "times_seen"],
    heatmap_max_fixed = 6.127
)

# RVC20

In [27]:
escape_df = pd.read_csv('../results/antibody_escape/averages/RVC20_mut_effect.csv');
escape_df['escape_mean'] = escape_df['escape_mean'].clip(lower=0)


merged_df = pd.merge(escape_df,df,how='right',on=['site','wildtype','mutant'])

site_contacts = [ 42,  44,  47, 186, 187, 188, 189, 190, 191, 192, 194, 226, 227,
       228, 229, 230];

contacts = merged_df[merged_df['site'].isin(site_contacts)==True];
contacts = contacts.rename(columns={'times_seen_x':'times_seen'})

#the original dataframe omits wildtype from results, but the escape_mean needs to be 0. Here I'll just set that to 0 so 
#it is not NaN. I cannot just use the setna function because some data is actually missing.

contacts['is_wt_residue']= contacts['wildtype'] == contacts['mutant'];
contacts.loc[contacts['is_wt_residue'] == True, 'escape_mean'] = 0;


polyclonal.plot.lineplot_and_heatmap(
    data_df=contacts,
    stat_col="escape_mean",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=True,
    show_zoombar=False,
    show_lineplot=False,
    init_site_statistic="mean",
    plot_title="contacts of RVC20 antibody",
    heatmap_max_at_least=2,
    addtl_slider_stats_hide_not_filter=["effect on cell entry", "n_selections", "times_seen"],
    heatmap_max_fixed = 6.127
)

# 17C7

In [28]:
escape_df = pd.read_csv('../results/antibody_escape/averages/17C7_mut_effect.csv');
escape_df['escape_mean'] = escape_df['escape_mean'].clip(lower=0)

merged_df = pd.merge(escape_df,df,how='right',on=['site','wildtype','mutant'])

site_contacts = [212, 333, 334, 336, 337, 340, 341, 342, 344, 346, 348, 349, 351,
       370, 380, 382];

contacts = merged_df[merged_df['site'].isin(site_contacts)==True];
contacts = contacts.rename(columns={'times_seen_x':'times_seen'})

#the original dataframe omits wildtype from results, but the escape_mean needs to be 0. Here I'll just set that to 0 so 
#it is not NaN. I cannot just use the setna function because some data is actually missing.

contacts['is_wt_residue']= contacts['wildtype'] == contacts['mutant'];
contacts.loc[contacts['is_wt_residue'] == True, 'escape_mean'] = 0;


polyclonal.plot.lineplot_and_heatmap(
    data_df=contacts,
    stat_col="escape_mean",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=True,
    show_zoombar=False,
    show_lineplot=False,
    init_site_statistic="mean",
    plot_title="contacts of 17C7 antibody",
    heatmap_max_at_least=2,
    addtl_slider_stats_hide_not_filter=["effect on cell entry", "n_selections", "times_seen"],
    heatmap_max_fixed = 6.127
)


# CR57

In [29]:
escape_df = pd.read_csv('../results/antibody_escape/averages/CR57_mut_effect.csv');
escape_df = escape_df[escape_df['times_seen']>=2]; 
escape_df['escape_mean'] = escape_df['escape_mean'].clip(lower=0)


merged_df = pd.merge(escape_df,df,how='right',on=['site','wildtype','mutant'])

site_contacts = [ 44, 187, 188, 189, 192, 193, 194, 226, 228, 229, 230, 231, 242,
       244, 245, 251];

contacts = merged_df[merged_df['site'].isin(site_contacts)==True];
contacts = contacts.rename(columns={'times_seen_x':'times_seen'})

#the original dataframe omits wildtype from results, but the escape_mean needs to be 0. Here I'll just set that to 0 so 
#it is not NaN. I cannot just use the setna function because some data is actually missing.

contacts['is_wt_residue']= contacts['wildtype'] == contacts['mutant'];
contacts.loc[contacts['is_wt_residue'] == True, 'escape_mean'] = 0;


polyclonal.plot.lineplot_and_heatmap(
    data_df=contacts,
    stat_col="escape_mean",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=True,
    show_zoombar=False,
    show_lineplot=False,
    init_site_statistic="mean",
    plot_title="contacts of CR57 antibody",
    heatmap_max_at_least=2,
    addtl_slider_stats_hide_not_filter=["effect on cell entry", "n_selections", "times_seen"],
    heatmap_max_fixed = 6.127
)

In [30]:
#I found the max escape from this dataframe since it had the strongest escape score range 
#and used that to set the others manually (heatmap_max_fixed = 6.127).

max(merged_df['escape_mean'])

6.127