# Compare escape in spike and RBD deep mutational scans
Compare antibody-escape in spike and RBD deep mutational scanning libraries:

In [1]:
# this cell is tagged `parameters` for `papermill` parameterization
site_numbering_map_csv = None
func_effects_csv = None
spike_escape_csvs = None
rbd_escape_csvs = None
init_min_func_effect = None
init_min_times_seen = None
init_floor_at_zero = None
init_site_escape_stat = None
chart_html = None

In [2]:
# Parameters
init_min_func_effect = -2
init_min_times_seen = 3
init_floor_at_zero = False
init_site_escape_stat = "sum"
spike_escape_csvs = [
    "results/antibody_escape/averages/sera_493C_highACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_498C_highACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_500C_highACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_503C_highACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_493C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_498C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_500C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_501C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_503C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_287C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_288C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_343C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_497C_mediumACE2_mut_effect.csv",
    "results/antibody_escape/averages/sera_505C_mediumACE2_mut_effect.csv",
]
rbd_escape_csvs = [
    "https://raw.githubusercontent.com/dms-vep/SARS-CoV-2_XBB.1.5_RBD_DMS/main/results/antibody_escape/averages/sera_493C_mediumACE2_mut_effect.csv",
    "https://raw.githubusercontent.com/dms-vep/SARS-CoV-2_XBB.1.5_RBD_DMS/main/results/antibody_escape/averages/sera_498C_mediumACE2_mut_effect.csv",
    "https://raw.githubusercontent.com/dms-vep/SARS-CoV-2_XBB.1.5_RBD_DMS/main/results/antibody_escape/averages/sera_500C_mediumACE2_mut_effect.csv",
    "https://raw.githubusercontent.com/dms-vep/SARS-CoV-2_XBB.1.5_RBD_DMS/main/results/antibody_escape/averages/sera_503C_mediumACE2_mut_effect.csv",
    "https://raw.githubusercontent.com/dms-vep/SARS-CoV-2_XBB.1.5_RBD_DMS/main/results/antibody_escape/averages/sera_343C_mediumACE2_mut_effect.csv",
]
site_numbering_map_csv = "data/site_numbering_map.csv"
func_effects_csv = "results/func_effects/averages/293T_high_ACE2_entry_func_effects.csv"

import os
os.chdir("../")

Import Python modules:

In [3]:
import collections
import os
import re

import altair as alt

import pandas as pd

_ = alt.data_transformers.disable_max_rows()

## Read in the data


In [8]:
escape = []
for spike_escape_csv in spike_escape_csvs:
    if m := re.match(
        "sera_(?P<serum>[\dA-z]+)_mediumACE2",
        os.path.basename(spike_escape_csv),
    ):
        serum = m.group("serum")
        for rbd_escape_csv in rbd_escape_csvs:
            if os.path.basename(rbd_escape_csv).startswith(f"sera_{serum}_mediumACE2"):
                print(f"Serum {serum} has matches in both library types")
                escape += [
                    pd.read_csv(f).assign(serum="serum " + serum, library_type=libtype)
                    for (f, libtype) in [
                        (spike_escape_csv, "full_spike"), (rbd_escape_csv, "RBD only")
                    ]
                ]

escape = (
    pd.concat(escape, ignore_index=True)
    .rename(columns={"escape_median": "escape"})
    [["epitope", "serum", "library_type", "site", "wildtype", "mutant", "escape", "times_seen"]]
    .merge(
        pd.read_csv(site_numbering_map_csv)
        .rename(columns={"reference_site": "site"})
        [["site", "sequential_site"]],
        validate="many_to_one",
    )
    .merge(
        pd.read_csv(func_effects_csv)[["site", "mutant", "effect"]],
        validate="many_to_one",
    )
)

assert escape["epitope"].nunique() == 1, "code only works for one epitope"
escape = escape.drop(columns="epitope")

assert escape["escape"].notnull().all()

escape

Serum 493C has matches in both library types
Serum 498C has matches in both library types
Serum 500C has matches in both library types
Serum 503C has matches in both library types
Serum 343C has matches in both library types


Unnamed: 0,serum,library_type,site,wildtype,mutant,escape,times_seen,sequential_site,effect
0,serum 493C,full_spike,1,M,I,-1.599000,1.3330,1,-6.3230
1,serum 498C,full_spike,1,M,I,-1.803000,1.0000,1,-6.3230
2,serum 500C,full_spike,1,M,I,-0.797300,1.0000,1,-6.3230
3,serum 503C,full_spike,1,M,I,-0.976000,1.0000,1,-6.3230
4,serum 343C,full_spike,1,M,I,-2.059000,1.0000,1,-6.3230
...,...,...,...,...,...,...,...,...,...
48518,serum 503C,full_spike,928,N,D,0.001228,0.3333,924,-1.6600
48519,serum 343C,full_spike,928,N,K,-0.005277,2.1670,924,-6.4660
48520,serum 343C,full_spike,928,N,-,0.000471,0.3333,924,0.1183
48521,serum 343C,full_spike,665,P,L,-0.029230,1.0000,661,-3.1490
