# Correlate effects of mutations on escape from antibodies

Make correlation plots of the effects of mutations on antibody escape. 

Import python modules: 

In [None]:
import pandas as pd
from scipy import stats
import altair as alt

Read the data: 

In [None]:
# only correlate non stop codon mutations at sites that were targeted for mutation
sites_to_show = [str(x) for x in range(30, 702)]
df_1_18 = (pd.read_csv('results/antibody_escape/averages/1-18_mut_effect.csv')
           .query('site in @sites_to_show')
           .query('mutant!="*"')
           [['escape_mean', 'mutation', 'times_seen']]
          )
df_3BNC117 = (pd.read_csv('results/antibody_escape/averages/3BNC117_mut_effect.csv')
              .query('site in @sites_to_show')
              .query('mutant!="*"')
              [['escape_mean', 'mutation', 'times_seen']]
             )
df_04A06 = (pd.read_csv('results/antibody_escape/averages/04-A06_mut_effect.csv')
            .query('site in @sites_to_show')
            .query('mutant!="*"')
            [['escape_mean', 'mutation', 'times_seen']]
           )

Correlate 1-18 and 04-A06 escape:

In [None]:
df_1_18['1-18 escape effect'] = df_1_18['escape_mean']
df_04A06['04-A06 escape effect'] = df_04A06['escape_mean']
A06_vs_1_18 = (df_04A06
               .drop(columns=['escape_mean'])
               .copy()
               .merge(df_1_18.drop(columns=['escape_mean']), on=['mutation'])
              )

slope, intercept, r_value, p_value, std_err = stats.linregress(
        A06_vs_1_18.query('times_seen_x>3')['1-18 escape effect'].astype(float),
        A06_vs_1_18.query('times_seen_x>3')['04-A06 escape effect'].astype(float))

corr_chart = (
alt.Chart(A06_vs_1_18.query('times_seen_x>3'))
    .encode(
        x=alt.X('04-A06 escape effect',
                scale=alt.Scale(domain=[-1.1, 2.5]),
               ),
        y=alt.Y(
            '1-18 escape effect',
            scale=alt.Scale(domain=[-1.1, 2.5]),
        ),
        tooltip=[
            alt.Tooltip(c, format=".3g") if A06_vs_1_18[c].dtype == float
            else c
            for c in A06_vs_1_18.columns]
    )
.mark_circle(filled=True, size=60, opacity=0.4)
.configure_axis(grid=False)
.properties(width=200, height=200)
)
print(f"R^2: {r_value**2}")
print(f"R: {r_value}")
corr_chart

Correlate 3BNC117 and 04-A06 escape: 

In [None]:
df_3BNC117['3BNC117 escape effect'] = df_3BNC117['escape_mean']
A06_vs_3BNC117 = (df_04A06
               .drop(columns=['escape_mean'])
               .copy()
               .merge(df_3BNC117.drop(columns=['escape_mean']), on=['mutation'])
              )

slope, intercept, r_value, p_value, std_err = stats.linregress(
        A06_vs_3BNC117.query('times_seen_x>3')['3BNC117 escape effect'].astype(float),
        A06_vs_3BNC117.query('times_seen_x>3')['04-A06 escape effect'].astype(float))

corr_chart = (
alt.Chart(A06_vs_3BNC117.query('times_seen_x>3'))
    .encode(
        x=alt.X('04-A06 escape effect',
            scale=alt.Scale(domain=[-1.1, 2.5]),
               ),
        y=alt.Y(
            '3BNC117 escape effect',
            scale=alt.Scale(domain=[-1.1, 2.5]),
        ),
        tooltip=[
            alt.Tooltip(c, format=".3g") if A06_vs_3BNC117[c].dtype == float
            else c
            for c in A06_vs_3BNC117.columns]
    )
.mark_circle(filled=True, size=60, opacity=0.4)
.configure_axis(grid=False)
.properties(width=200, height=200)
)
print(f"R^2: {r_value**2}")
print(f"R: {r_value}")
corr_chart