# Neutralization of WT variants in mAb selections

Try to identify rough IC99 values for mAb's 1C04 and 5G04 from recent selection sequencing data

In [1]:
import os

import dms_variants
import dms_variants.codonvarianttable

import pandas as pd

import yaml

import warnings
warnings.filterwarnings('ignore')

### Load data

In [2]:
os.chdir('../')

with open("config.yaml") as f:
    config = yaml.safe_load(f)

In [3]:
antibody_selections = pd.read_csv(config["antibody_selections"])

In [4]:
selection_groups = antibody_selections["selection_group"].unique()

prob_escape = pd.concat(
    [
        pd.read_csv(
            os.path.join(
                config["prob_escape_dir"], f"{selection_group}_prob_escape.csv"
            ),
            keep_default_na=False,
            na_values="nan",
        )
        for selection_group in selection_groups
    ],
    ignore_index=True,
)

In [5]:
ab_counts = (
    prob_escape[
        [
            "library",
            "antibody_sample",
            "no-antibody_sample",
            "aa_substitutions_sequential",
            "antibody_count",
            "no-antibody_count",
            "antibody_neut_standard_count",
            "no-antibody_neut_standard_count",
            "barcode",
            "no_antibody_count_threshold",
        ]
    ]
    .drop_duplicates()
    .assign(
        variant_above_threshold=lambda x: (
            x["no-antibody_count"] >= x["no_antibody_count_threshold"]
        ).astype(int),
        count_above_threshold=lambda x: x["no-antibody_count"]
        * x["variant_above_threshold"],
    ))

ab_counts['date'] = ab_counts['antibody_sample'].str.split('_', expand=True)[0]

### Cut to data for mAb selections (i.e. 7-20-22 seq run)

In [6]:
mAb_run = ab_counts.loc[ab_counts['date'] == '220720']
mAb_run = mAb_run.fillna('')

# get column listing number substitutions
mAb_run['number_substitutions'] = mAb_run.apply(
    lambda x: len(x['aa_substitutions_sequential'].split()), axis=1
)

mAb_run.head()

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,barcode,no_antibody_count_threshold,variant_above_threshold,count_above_threshold,date,number_substitutions
2579155,libA,220720_1_antibody_1C04_0.05075_1,220720_1_no-antibody_control_1,K297I,396858,1822390,22062,99159,ATAACACAAAAAAGTA,15,1,1822390,220720,1
2579156,libA,220720_1_antibody_1C04_0.05075_1,220720_1_no-antibody_control_1,R111S V366M R402S,172599,750934,22062,99159,TATCTACCTAACGAAA,15,1,750934,220720,3
2579157,libA,220720_1_antibody_1C04_0.05075_1,220720_1_no-antibody_control_1,K46A I301V E409M,116711,518056,22062,99159,AAAAATCTGAGACAAA,15,1,518056,220720,3
2579158,libA,220720_1_antibody_1C04_0.05075_1,220720_1_no-antibody_control_1,,116465,528874,22062,99159,AGCCTATTAGGATTCG,15,1,528874,220720,0
2579159,libA,220720_1_antibody_1C04_0.05075_1,220720_1_no-antibody_control_1,Y113N T150K L263H V342Y,106533,506611,22062,99159,AGACACTAAATACAGC,15,1,506611,220720,4


### Calculate number of wt variants that are neutralized in each mAb selection condition

In [7]:
# reduce to just WT variants that were represented above the no-Ab counts threshold
mAb_wt_neut = mAb_run.loc[(mAb_run['number_substitutions'] == 0) &
                          (mAb_run['variant_above_threshold'] == 1)
                         ]

# Assign 'True' if wt variant has 0 counts in the selection sample
mAb_wt_neut['is_neutralized'] = (mAb_wt_neut['antibody_count'] == 0)

# get number of neutralized / non-neutralized variants
mAb_wt_neut = (mAb_wt_neut.groupby(['antibody_sample', 'is_neutralized'])
               .size()
               .reset_index(name='num_variants')
              )

mAb_wt_neut

Unnamed: 0,antibody_sample,is_neutralized,num_variants
0,220720_1_antibody_1C04_0.05075_1,False,2086
1,220720_1_antibody_1C04_0.05075_1,True,15
2,220720_1_antibody_1C04_0.203_1,False,2085
3,220720_1_antibody_1C04_0.203_1,True,16
4,220720_1_antibody_1C04_2.03_1,False,1472
5,220720_1_antibody_1C04_2.03_1,True,629
6,220720_1_antibody_1C04_6.09_1,False,1491
7,220720_1_antibody_1C04_6.09_1,True,610
8,220720_1_antibody_5G04_11.2_1,False,1572
9,220720_1_antibody_5G04_11.2_1,True,529


In [8]:
# calculate fraction of neutralized WT variants in each antibody selection
mAb_wt_neut_tidy = (mAb_wt_neut
               .pivot(
                   index = 'antibody_sample',
                   columns = 'is_neutralized',
                   values= 'num_variants')
               .reset_index()
               .rename_axis(None, axis=1)
              )

mAb_wt_neut_tidy = mAb_wt_neut_tidy.rename(columns={False: 'wt_nonzero_counts', True: 'wt_zero_counts'})

mAb_wt_neut_tidy['fraction_neutralized'] = (mAb_wt_neut_tidy['wt_zero_counts'] / 
                                            (mAb_wt_neut_tidy['wt_nonzero_counts'] + mAb_wt_neut_tidy['wt_zero_counts'])
                                            )
mAb_wt_neut_tidy

Unnamed: 0,antibody_sample,wt_nonzero_counts,wt_zero_counts,fraction_neutralized
0,220720_1_antibody_1C04_0.05075_1,2086,15,0.007139
1,220720_1_antibody_1C04_0.203_1,2085,16,0.007615
2,220720_1_antibody_1C04_2.03_1,1472,629,0.299381
3,220720_1_antibody_1C04_6.09_1,1491,610,0.290338
4,220720_1_antibody_5G04_11.2_1,1572,529,0.251785
5,220720_1_antibody_5G04_112.0_1,1638,463,0.220371
6,220720_1_antibody_5G04_2.8_1,1679,422,0.200857
7,220720_1_antibody_5G04_336.0_1,1595,506,0.240838
