In [1]:
import pandas as pd
import altair as alt
import polyclonal

import warnings
warnings.filterwarnings('ignore')

from IPython.utils import io

In [2]:
import os
os.chdir('../../')

In [3]:
# set up function for mean prob escape chart to avoid clutter from large block of code

def plot_avg_escape(prob_escape):
    max_aa_subs = 4  # group if >= this many substitutions
    
    mean_prob_escape = (
        prob_escape.assign(
            n_subs=lambda x: (
                x["aa_substitutions_reference"]
                .str.split()
                .map(len)
                .clip(upper=max_aa_subs)
                .map(lambda n: str(n) if n < max_aa_subs else f">{max_aa_subs - 1}")
            )
        )
        .groupby(["antibody_concentration", "n_subs"], as_index=False)
        .aggregate({"prob_escape": "mean", "prob_escape_uncensored": "mean"})
        .rename(
            columns={
                "prob_escape": "censored to [0, 1]",
                "prob_escape_uncensored": "not censored",
            }
        )
        .melt(
            id_vars=["antibody_concentration", "n_subs"],
            var_name="censored",
            value_name="probability escape",
        )
    )

    mean_prob_escape_chart = (
        alt.Chart(mean_prob_escape)
        .encode(
            x=alt.X("antibody_concentration"),
            y=alt.Y(
                "probability escape",
                scale=alt.Scale(type="symlog", constant=0.05),
            ),
            column=alt.Column("censored", title=None),
            color=alt.Color("n_subs", title="n substitutions"),
            tooltip=[
                alt.Tooltip(c, format=".3g") if mean_prob_escape[c].dtype == float else c
                for c in mean_prob_escape.columns
            ],
        )
        .mark_line(point=True, size=0.5)
        .properties(width=200, height=125)
        .configure_axis(grid=False)
    )

    return mean_prob_escape_chart

In [5]:
spatial_distances = polyclonal.pdb_utils.inter_residue_distances(
    "scratch_notebooks/221227_model_fitting/4o5n_renumbered_1chain.pdb",
    target_chains=["A"],
)

spatial_distances

Unnamed: 0,site_1,site_2,distance,chain_1,chain_2
0,9,10,1.328212,A,A
1,9,11,3.850353,A,A
2,9,12,6.449567,A,A
3,9,13,9.701373,A,A
4,9,14,12.647217,A,A
...,...,...,...,...,...
254536,721,499,15.731319,A,A
254537,721,500,19.078522,A,A
254538,722,499,67.375801,A,A
254539,722,500,55.555973,A,A


In [4]:
reference_sites = pd.read_csv("data/site_map.csv")["reference_site"].tolist()

def generate_model(
    prob_escape_df,
    n_epitopes=1,
#     reg_uniqueness_weight=0,
#     reg_uniqueness2_weight=1,
#     reg_spatial_weight=0.0,
#     reg_spatial2_weight=0.0005,
):
    
    model = polyclonal.Polyclonal(
        n_epitopes=n_epitopes,
        data_to_fit=prob_escape_df.rename(
            columns={
                "antibody_concentration": "concentration",
                "aa_substitutions_reference": "aa_substitutions",
            }
        ),
        alphabet=polyclonal.AAS_WITHSTOP_WITHGAP,
        sites=reference_sites,
#         spatial_distances=spatial_distances,
    )

    # fit model, suppressing output text to avoid clutter in notebook
    with io.capture_output() as captured:
        opt_res = model.fit(
            logfreq=200,
            reg_escape_weight=0.1,
#             reg_uniqueness_weight=reg_uniqueness_weight,
#             reg_uniqueness2_weight=reg_uniqueness2_weight,
#             reg_spatial_weight=reg_spatial_weight,
#             reg_spatial2_weight=reg_spatial2_weight,
        )


    mut_escape_plot = model.mut_escape_plot(addtl_slider_stats={"times_seen": 3}, init_floor_at_zero=False)
    
    return model

### cocktail comparison

In [6]:
prob_escape_libA = pd.read_csv(
    "results/prob_escape/libA_221108_1_1C04-5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libA.notnull().all().all()

In [7]:
display(
    prob_escape_libA.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
1.37,26585
2.05,26585
3.08,26585
4.62,26585
6.93,26585
10.4,26585


In [6]:
plot_avg_escape(prob_escape_libA)

In [8]:
prob_escape_libB_old = pd.read_csv(
    "results/prob_escape/libB_221108_1_1C04-5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libB_old.notnull().all().all()

display(
    prob_escape_libB_old.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
1.37,28219
2.05,28219
3.08,28219
4.62,28219
6.93,28219
10.4,28219


In [12]:
plot_avg_escape(prob_escape_libB_old)

In [37]:
prob_escape_libB_old_filtered = prob_escape_libB_old.loc[prob_escape_libB_old['antibody_concentration'] != 1.37]

In [44]:
plot_avg_escape(prob_escape_libB_old_filtered)

In [9]:
prob_escape_libB = pd.read_csv(
    "results/prob_escape/libB_230118_1_1C04-5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libB.notnull().all().all()

display(
    prob_escape_libB.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
1.3169,27933
1.9753,27933
2.963,27933
4.4444,27933
6.6667,27933
10.0,27933


In [10]:
prob_escape_libB.sort_values('prob_escape_uncensored', ascending=False).head(20)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
425688,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,K208Y R241F K297V,3,TAGAACAAATGCAAAA,1.0,52.8375,11365,20,329706,30657,7355580,15,K189Y R222F K278V,1C04-5G04,1.3169
425816,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,,0,TGAACAGAAAATTACA,1.0,16.9601,3648,20,329706,30657,7355580,15,,1C04-5G04,1.3169
425715,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,P122Q T206A I211V A495Y,4,TGGAGTCAAATGCCAT,1.0,15.8222,7317,43,329706,30657,7355580,15,P103Q T187A I192V A476Y,1C04-5G04,1.3169
425925,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,T407G,1,GAACGTGACACAGTCA,1.0,14.1936,2595,17,329706,30657,7355580,15,T388G,1C04-5G04,1.3169
425833,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,K190Y S218T V366T I393V,4,AATTAACAGAAACGCA,1.0,10.8108,3488,30,329706,30657,7355580,15,K171Y S199T V347T I374V,1C04-5G04,1.3169
340576,libB,230118_1_antibody_1C04-5G04_1.975308642_1,230118_1_no-antibody_control_1,N235H K387V,2,GCGTCCGTGGCCGGAT,1.0,10.1358,13522,25,1635959,30657,7355580,15,N216H K368V,1C04-5G04,1.9753
340589,libB,230118_1_antibody_1C04-5G04_1.975308642_1,230118_1_no-antibody_control_1,K297F K469D,2,AGTTCGGTTTTAAAAT,1.0,10.0462,10722,20,1635959,30657,7355580,15,K278F K450D,1C04-5G04,1.9753
426032,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,R52E G97C S217K,3,AACCTCATTGTTCCAT,1.0,10.0324,2050,19,329706,30657,7355580,15,R33E G78C S198K,1C04-5G04,1.3169
426106,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,V316L N331R G398D,3,ACATTAAACATAATCG,1.0,10.0015,1721,16,329706,30657,7355580,15,V297L N312R G379D,1C04-5G04,1.3169
426078,libB,230118_1_antibody_1C04-5G04_1.316872428_1,230118_1_no-antibody_control_1,K101V N516F,2,ATCCTAGAAACAACGC,1.0,9.5049,1840,18,329706,30657,7355580,15,K82V N497F,1C04-5G04,1.3169


In [11]:
prob_escape_libB_old.sort_values('prob_escape_uncensored', ascending=False).head(20)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
426946,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,I301V,1,TATCAAACGTAAAATA,1.0,2.9681,371,27,16152,3489,9397483,19,I282V,1C04-5G04,1.37
426999,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,D123H Q216H S217P G405D K491A,5,AATAATCCATCTATCT,1.0,2.7618,358,28,16152,3489,9397483,19,D104H Q197H S198P G386D K472A,1C04-5G04,1.37
340969,libB,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,G205P G405W,2,TGGTCGAGTACTATGT,1.0,2.4119,1422,23,89435,3489,9397483,19,G186P G386W,1C04-5G04,2.05
427438,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,L515I,1,GTTGAACCAAGACAAA,1.0,2.2598,272,26,16152,3489,9397483,19,L496I,1C04-5G04,1.37
427884,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,N64L I404V K469N,3,AACATGGACCGAATAA,1.0,2.2527,219,21,16152,3489,9397483,19,N45L I385V K450N,1C04-5G04,1.37
341047,libB,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,S164K R220G Q520D,3,CTTTCCATAGGCGTCT,1.0,2.2396,1263,22,89435,3489,9397483,19,S145K R201G Q501D,1C04-5G04,2.05
427186,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,L176G R227V I233Y R402G,4,ACCTGCCAGAACCTAA,1.0,2.2298,320,31,16152,3489,9397483,19,L157G R208V I214Y R383G,1C04-5G04,1.37
426732,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,R326T,1,TAGCGAAGAAACGTAG,1.0,2.2115,430,42,16152,3489,9397483,19,R307T,1C04-5G04,1.37
427447,libB,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,K295R,1,TGGAACCACTAGCTAA,1.0,2.083,270,28,16152,3489,9397483,19,K276R,1C04-5G04,1.37
340770,libB,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,Q330C,1,AGCATATAACTGGAGT,1.0,1.9542,2154,43,89435,3489,9397483,19,Q311C,1C04-5G04,2.05


In [13]:
plot_avg_escape(prob_escape_libB)

In [88]:
prob_escape_libA_filtered = prob_escape_libA.loc[prob_escape_libA['antibody_concentration'] != 1.37]

prob_escape_libB_filtered = prob_escape_libB.loc[prob_escape_libB['antibody_concentration'] != 1.3169]

In [89]:
plot_avg_escape(prob_escape_libA_filtered)

In [12]:
plot_avg_escape(prob_escape_libB_filtered)

In [14]:
cocktail_replicate_df = pd.concat([prob_escape_libA_filtered, prob_escape_libB_filtered])

corr = polyclonal.utils.tidy_to_corr(
    cocktail_replicate_df,
    sample_col='library',
    label_col='barcode',
    value_col='prob_escape',
    group_cols = ['antibody_concentration']
)
    

In [16]:
corr

Unnamed: 0,antibody_concentration,library_1,library_2,correlation
0,2.05,libA,libA,1.0
1,3.08,libA,libA,1.0
2,4.62,libA,libA,1.0
3,6.93,libA,libA,1.0
4,10.4,libA,libA,1.0
5,1.9753,libB,libB,1.0
6,2.963,libB,libB,1.0
7,4.4444,libB,libB,1.0
8,6.6667,libB,libB,1.0
9,10.0,libB,libB,1.0


In [30]:
model_libA_filtered = generate_model(prob_escape_libA_filtered)
model_libB_filtered = generate_model(prob_escape_libB_filtered)
model_libA = generate_model(prob_escape_libA)
model_libB = generate_model(prob_escape_libB)

In [32]:
libs = ['libA', 'libA', 'libB', 'libB']
replicates = ['full', 'filtered', 'full', 'filtered']
models = [model_libA, model_libA_filtered, model_libB, model_libB_filtered]
models_df = pd.DataFrame({
    'library': libs,
    'replicate': replicates,
    'model': models
})

models_df

Unnamed: 0,library,replicate,model
0,libA,full,<polyclonal.polyclonal.Polyclonal object at 0x...
1,libA,filtered,<polyclonal.polyclonal.Polyclonal object at 0x...
2,libB,full,<polyclonal.polyclonal.Polyclonal object at 0x...
3,libB,filtered,<polyclonal.polyclonal.Polyclonal object at 0x...


In [33]:
avg_model = polyclonal.PolyclonalAverage(
    models_df
)

In [34]:
avg_model.mut_escape_corr_heatmap()

In [38]:

model_libB_old_filtered = generate_model(prob_escape_libB_old_filtered)

model_libB_old = generate_model(prob_escape_libB_old)

In [39]:
libs = ['libA', 'libA', 'libB', 'libB']
replicates = ['full', 'filtered', 'full', 'filtered']
models = [model_libA, model_libA_filtered, model_libB_old, model_libB_old_filtered]
models_df_old = pd.DataFrame({
    'library': libs,
    'replicate': replicates,
    'model': models
})

models_df_old

Unnamed: 0,library,replicate,model
0,libA,full,<polyclonal.polyclonal.Polyclonal object at 0x...
1,libA,filtered,<polyclonal.polyclonal.Polyclonal object at 0x...
2,libB,full,<polyclonal.polyclonal.Polyclonal object at 0x...
3,libB,filtered,<polyclonal.polyclonal.Polyclonal object at 0x...


In [40]:
avg_model_old = polyclonal.PolyclonalAverage(models_df_old)
avg_model_old.mut_escape_corr_heatmap()

In [42]:
avg_model.mut_escape_corr_heatmap()

In [90]:
model_libA = generate_model(prob_escape_libA_filtered)
model_libA.mut_escape_plot()

In [45]:
prob_escape_libA = pd.read_csv(
    "results/prob_escape/libA_221021_1_1C04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libA.notnull().all().all()

display(
    prob_escape_libA.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
0.05,26662
0.1,26662
0.2,26662
0.4,26662
0.8,26662


In [46]:
plot_avg_escape(prob_escape_libA)

In [47]:
prob_escape_libB = pd.read_csv(
    "results/prob_escape/libB_230118_1_1C04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libB.notnull().all().all()

display(
    prob_escape_libB.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
0.1333,27933
0.2,27933
0.3,27933
0.45,27933
0.675,27933


In [48]:
plot_avg_escape(prob_escape_libB)

In [50]:
prob_escape_libA_filtered = prob_escape_libA.loc[(prob_escape_libA['antibody_concentration'] != 0.05) &
                                                 (prob_escape_libA['antibody_concentration'] != 0.10)
                                                ]
plot_avg_escape(prob_escape_libA_filtered)

In [51]:
prob_escape_libB_filtered = prob_escape_libB.loc[(prob_escape_libB['antibody_concentration'] != 0.1333) 
#                                                  (prob_escape_libA['antibody_concentration'] != 0.10)
                                                ]

plot_avg_escape(prob_escape_libB_filtered)

In [53]:
model_libA = generate_model(prob_escape_libA_filtered)
model_libB = generate_model(prob_escape_libB_filtered)

libs = ['libA', 'libB']
models = [model_libA, model_libB]
models_df_old = pd.DataFrame({
    'library': libs,
    'model': models
})

models_df_old

Unnamed: 0,library,model
0,libA,<polyclonal.polyclonal.Polyclonal object at 0x...
1,libB,<polyclonal.polyclonal.Polyclonal object at 0x...


In [54]:
avg_model = polyclonal.PolyclonalAverage(models_df_old)
avg_model.mut_escape_corr_heatmap()

In [55]:
model_libB.mut_escape_plot(addtl_slider_stats={"times_seen": 3}, init_floor_at_zero=False)

In [56]:
model_libA.mut_escape_plot(addtl_slider_stats={"times_seen": 3}, init_floor_at_zero=False)

In [57]:
prob_escape_libA = pd.read_csv(
    "results/prob_escape/libA_221027_1_AUSAB-13_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libA.notnull().all().all()

display(
    prob_escape_libA.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
0.0007,26636
0.001,26636
0.0015,26636
0.0022,26636
0.0033,26636
0.005,26636
0.0075,26636


In [58]:
plot_avg_escape(prob_escape_libA)

In [59]:
prob_escape_libB = pd.read_csv(
    "results/prob_escape/libB_230118_1_AUSAB-13_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libB.notnull().all().all()

display(
    prob_escape_libB.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
0.0006,27933
0.0009,27933
0.0013,27933
0.0019,27933
0.0029,27933
0.0043,27933
0.0065,27933


In [60]:
plot_avg_escape(prob_escape_libB)

In [61]:
prob_escape_libB_filtered = prob_escape_libB.loc[(prob_escape_libB['antibody_concentration'] != 0.0006) 
#                                                  (prob_escape_libA['antibody_concentration'] != 0.10)
                                                ]

plot_avg_escape(prob_escape_libB_filtered)

In [62]:
model_libA = generate_model(prob_escape_libA)
model_libB = generate_model(prob_escape_libB_filtered)

libs = ['libA', 'libB']
models = [model_libA, model_libB]
models_df_old = pd.DataFrame({
    'library': libs,
    'model': models
})

models_df_old

Unnamed: 0,library,model
0,libA,<polyclonal.polyclonal.Polyclonal object at 0x...
1,libB,<polyclonal.polyclonal.Polyclonal object at 0x...


In [63]:
avg_model = polyclonal.PolyclonalAverage(models_df_old)
avg_model.mut_escape_corr_heatmap()

In [64]:
prob_escape_libA = pd.read_csv(
    "results/prob_escape/libA_221021_1_5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libA.notnull().all().all()

display(
    prob_escape_libA.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
1.5,26662
3.0,26662
6.0,26662
12.0,26662
24.0,26662


In [65]:
plot_avg_escape(prob_escape_libA)

In [68]:
prob_escape_libA_filtered = prob_escape_libA.loc[(prob_escape_libA['antibody_concentration'] != 1.5) &
                                                 (prob_escape_libA['antibody_concentration'] != 3.0)
                                                ]
plot_avg_escape(prob_escape_libA_filtered)

In [70]:
model = generate_model(prob_escape_libA_filtered)
model.mut_escape_plot()

In [71]:
prob_escape_libB = pd.read_csv(
    "results/prob_escape/libB_230118_1_5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_libB.notnull().all().all()

display(
    prob_escape_libB.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
3.3333,27933
5.0,27933
7.5,27933
11.25,27933
16.875,27933


In [73]:
plot_avg_escape(prob_escape_libB)

In [85]:
prob_escape_libB_filtered = prob_escape_libB.loc[(prob_escape_libB['antibody_concentration'] != 16.8750) &
                                                 (prob_escape_libB['antibody_concentration'] != 11.2500)
                                                ]
plot_avg_escape(prob_escape_libB_filtered)

In [86]:
model = generate_model(prob_escape_libB_filtered)
model.mut_escape_plot()

In [15]:
prob_escape_ser11 = pd.read_csv(
    "results/prob_escape/libA_221223_1_AUSAB-11_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_ser11.notnull().all().all()

display(
    prob_escape_ser11.groupby("antibody_concentration").aggregate(
        n_variants=pd.NamedAgg("barcode", "nunique")
    )
)

Unnamed: 0_level_0,n_variants
antibody_concentration,Unnamed: 1_level_1
0.003,26308
0.0045,26308
0.0067,26308
0.01,26308
0.015,26308
0.0225,26308
0.0338,26308


In [16]:
plot_avg_escape(prob_escape_ser11)

In [19]:
ser11_filtered = prob_escape_ser11.loc[
#     (prob_escape_ser11['antibody_concentration'] == 0.0067) |
                                       (prob_escape_ser11['antibody_concentration'] == 0.0100) |
                                       (prob_escape_ser11['antibody_concentration'] == 0.0150)
                                      ]

In [20]:
plot_avg_escape(ser11_filtered)

In [23]:
prob_escape_ser11.sort_values(['prob_escape_uncensored'], ascending=False)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
199771,libA,221223_1_antibody_AUSAB-11_0.010014815_1,221223_1_no-antibody_control_1,G24W L176H K345N R380Q,4,AATATAGTAAAAACTT,1.0,151.4498,10954,21,97842,28408,10675748,21,G5W L157H K326N R361Q,AUSAB-11,0.0100
199799,libA,221223_1_antibody_AUSAB-11_0.010014815_1,221223_1_no-antibody_control_1,F98L K208S A231T M481I,4,AAGAAAGTATGACTTT,1.0,70.7959,7315,30,97842,28408,10675748,21,F79L K189S A212T M462I,AUSAB-11,0.0100
401244,libA,221223_1_antibody_AUSAB-11_0.002967353_1,221223_1_no-antibody_control_1,S212E K297T,2,ATACATACTGCTAATC,1.0,45.0416,268,21,8049,28408,10675748,21,S193E K278T,AUSAB-11,0.0030
199868,libA,221223_1_antibody_AUSAB-11_0.010014815_1,221223_1_no-antibody_control_1,L400V,1,CTAACTAAGCGGTAAG,1.0,34.4487,4034,34,97842,28408,10675748,21,L381V,AUSAB-11,0.0100
200016,libA,221223_1_antibody_AUSAB-11_0.010014815_1,221223_1_no-antibody_control_1,K154N I211S S212E D244N I261S R280K T347D,7,CATTCAGAGATTCATT,1.0,33.7631,2442,21,97842,28408,10675748,21,K135N I192S S193E D225N I242S R261K T328D,AUSAB-11,0.0100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290912,libA,221223_1_antibody_AUSAB-11_0.006676543_1,221223_1_no-antibody_control_1,T150N S281N,2,AATAATCCGATTGCGT,0.0,0.0000,0,71,16663,28408,10675748,21,T131N S262N,AUSAB-11,0.0067
290913,libA,221223_1_antibody_AUSAB-11_0.006676543_1,221223_1_no-antibody_control_1,S218I E299A P308H T320S,4,AATATTGAAGTCACAA,0.0,0.0000,0,34,16663,28408,10675748,21,S199I E280A P289H T301S,AUSAB-11,0.0067
290916,libA,221223_1_antibody_AUSAB-11_0.006676543_1,221223_1_no-antibody_control_1,G24T T179S S217M,3,ACACCTACGAAAAACG,0.0,0.0000,0,59,16663,28408,10675748,21,G5T T160S S198M,AUSAB-11,0.0067
290918,libA,221223_1_antibody_AUSAB-11_0.006676543_1,221223_1_no-antibody_control_1,Q99E E429M,2,ACTAAAACCATTATTG,0.0,0.0000,0,37,16663,28408,10675748,21,Q80E E410M,AUSAB-11,0.0067


In [24]:
prob_escape_cocktail = pd.read_csv(
    "results/prob_escape/libA_221108_1_1C04-5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_cocktail.notnull().all().all()

plot_avg_escape(prob_escape_cocktail)

In [25]:
prob_escape_cocktail.sort_values(['prob_escape_uncensored'], ascending=False)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
266350,libA,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,E363M D394G K522R,3,ACAGTTGGCACTTGCA,1.0,12.4331,15629,26,297196,6147,10886757,22,E344M D375G K503R,1C04-5G04,2.05
333224,libA,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,A88I K102N,2,TGTTATTTGTAGCCCA,1.0,8.0918,2070,23,68369,6147,10886757,22,A69I K83N,1C04-5G04,1.37
333069,libA,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,G161K S163R D209N K472A G498H,5,CACCGTGTACTTGATA,1.0,4.6726,3482,67,68369,6147,10886757,22,G142K S144R D190N K453A G479H,1C04-5G04,1.37
333680,libA,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,R280M A391S,2,CCCTACCCAGAGAACA,1.0,4.1319,1057,23,68369,6147,10886757,22,R261M A372S,1C04-5G04,1.37
333659,libA,221108_1_antibody_1C04-5G04_1.37_1,221108_1_no-antibody_control_1,K190T,1,GACAAAATTACTAACG,1.0,4.1021,1095,24,68369,6147,10886757,22,K171T,1C04-5G04,1.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326272,libA,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,S217C L400F E468V G523E,4,TATAGTGCGATGAACT,0.0,0.0000,0,29,297196,6147,10886757,22,S198C L381F E449V G504E,1C04-5G04,2.05
326266,libA,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,N141L G161N S163R R220I T435V L447V H454E,7,TATAGTCAATCTAAAG,0.0,0.0000,0,45,297196,6147,10886757,22,N122L G142N S144R R201I T416V L428V H435E,1C04-5G04,2.05
326255,libA,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,I22Y H202M S298L K311L,4,TATAGGATGGTATAGA,0.0,0.0000,0,28,297196,6147,10886757,22,I3Y H183M S279L K292L,1C04-5G04,2.05
326245,libA,221108_1_antibody_1C04-5G04_2.05_1,221108_1_no-antibody_control_1,K208S Q216E L270F S419C S461E,5,TATAGCGAAACTGAAC,0.0,0.0000,0,25,297196,6147,10886757,22,K189S Q197E L251F S400C S442E,1C04-5G04,2.05


In [26]:
prob_escape_ser13 = pd.read_csv(
    "results/prob_escape/libA_221027_1_AUSAB-13_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_ser13.notnull().all().all()

plot_avg_escape(prob_escape_ser13)

In [27]:
prob_escape_ser13.sort_values(['prob_escape_uncensored'], ascending=False)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
66609,libA,221027_1_antibody_AUSAB-13_0.00501333_1,221027_1_no-antibody_control_1,T29M E69S L89I F156A N331A E363D S377T,7,ACAAAATCGCTATTGT,1.0,8.7738,13825,44,224073,6257,16477214,33,T10M E50S L70I F137A N312A E344D S358T,AUSAB-13,0.0050
266354,libA,221027_1_antibody_AUSAB-13_0.00148543_1,221027_1_no-antibody_control_1,T56I L105I,2,TTGATAGGTGCTTGTT,1.0,5.3131,15386,155,116899,6257,16477214,33,T37I L86I,AUSAB-13,0.0015
133199,libA,221027_1_antibody_AUSAB-13_0.00334222_1,221027_1_no-antibody_control_1,P122S Q453E,2,ATACGGGGTTCAGTAG,1.0,4.6195,16621,69,326272,6257,16477214,33,P103S Q434E,AUSAB-13,0.0033
333280,libA,221027_1_antibody_AUSAB-13_0.00099029_1,221027_1_no-antibody_control_1,K140N K154T F156S S163K Y178S T179K K190N,7,TCCCTTAGCTCCTTCG,1.0,4.3036,1243,38,47558,6257,16477214,33,K121N K135T F137S S144K Y159S T160K K171N,AUSAB-13,0.0010
266524,libA,221027_1_antibody_AUSAB-13_0.00148543_1,221027_1_no-antibody_control_1,H203E,1,TGGATACCTTAAGGCT,1.0,3.7031,2629,38,116899,6257,16477214,33,H184E,AUSAB-13,0.0015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361486,libA,221027_1_antibody_AUSAB-13_0.00099029_1,221027_1_no-antibody_control_1,L176K Q192H R227I R280S,4,TTCGCTGGACGTCCAT,0.0,0.0000,0,95,47558,6257,16477214,33,L157K Q173H R208I R261S,AUSAB-13,0.0010
361485,libA,221027_1_antibody_AUSAB-13_0.00099029_1,221027_1_no-antibody_control_1,R227T D310A R318M,3,TTCCTACGATTAATCA,0.0,0.0000,0,210,47558,6257,16477214,33,R208T D291A R299M,AUSAB-13,0.0010
361483,libA,221027_1_antibody_AUSAB-13_0.00099029_1,221027_1_no-antibody_control_1,N141P T179E R239E,3,TTCATTGCAAAGTATC,0.0,0.0000,0,208,47558,6257,16477214,33,N122P T160E R220E,AUSAB-13,0.0010
361478,libA,221027_1_antibody_AUSAB-13_0.00099029_1,221027_1_no-antibody_control_1,N41Y K295Q K410E,3,TTATTTATCTGAAAAC,0.0,0.0000,0,38,47558,6257,16477214,33,N22Y K276Q K391E,AUSAB-13,0.0010


In [28]:
prob_escape_ser05 = pd.read_csv(
    "results/prob_escape/libA_221223_1_AUSAB-05_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
assert prob_escape_ser05.notnull().all().all()

plot_avg_escape(prob_escape_ser05)

In [30]:
prob_escape_ser05.sort_values(['prob_escape_uncensored'], ascending=False)

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
199745,libA,221223_1_antibody_AUSAB-05_0.016592593_1,221223_1_no-antibody_control_1,K102S A391S,2,ATATATTGTAAACCGC,1.0,70.6604,62871,34,743423,28408,10675748,21,K83S A372S,AUSAB-05,0.0166
266330,libA,221223_1_antibody_AUSAB-05_0.011061729_1,221223_1_no-antibody_control_1,K491S,1,CTTAATCAGTAAGAGC,1.0,55.2549,52074,44,608469,28408,10675748,21,K472S,AUSAB-05,0.0111
333016,libA,221223_1_antibody_AUSAB-05_0.007374486_1,221223_1_no-antibody_control_1,Q151E A391N,2,CTGCATAGATGCAGCG,1.0,30.6018,5094,36,131356,28408,10675748,21,Q132E A372N,AUSAB-05,0.0074
333170,libA,221223_1_antibody_AUSAB-05_0.007374486_1,221223_1_no-antibody_control_1,S28T R243F M481I,3,TGGTTCCGCTCCTATA,1.0,29.2256,2973,22,131356,28408,10675748,21,S9T R224F M462I,AUSAB-05,0.0074
399692,libA,221223_1_antibody_AUSAB-05_0.004916324_1,221223_1_no-antibody_control_1,I159N I211T M339I,3,AGAACGTTGCCTAATA,1.0,26.0482,2328,65,39060,28408,10675748,21,I140N I192T M320I,AUSAB-05,0.0049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100297,libA,221223_1_antibody_AUSAB-05_0.037333333_1,221223_1_no-antibody_control_1,S377Q N464C,2,AAAGCGCCGGAGACCC,0.0,0.0000,0,53,175377,28408,10675748,21,S358Q N445C,AUSAB-05,0.0373
100289,libA,221223_1_antibody_AUSAB-05_0.037333333_1,221223_1_no-antibody_control_1,A38T I211N,2,AAAGCCGACTTATAAG,0.0,0.0000,0,44,175377,28408,10675748,21,A19T I192N,AUSAB-05,0.0373
100282,libA,221223_1_antibody_AUSAB-05_0.037333333_1,221223_1_no-antibody_control_1,N27I S73M I198V I211L S281Q K295R Q330T I393Q,8,AAAGCATTACTCCCTA,0.0,0.0000,0,52,175377,28408,10675748,21,N8I S54M I179V I192L S262Q K276R Q311T I374Q,AUSAB-05,0.0373
100271,libA,221223_1_antibody_AUSAB-05_0.037333333_1,221223_1_no-antibody_control_1,A147T I233A L403M,3,AAAGCAACATAGTCAT,0.0,0.0000,0,28,175377,28408,10675748,21,A128T I214A L384M,AUSAB-05,0.0373


In [32]:
prob_escape_05_above1 = prob_escape_ser05.loc[prob_escape_ser05['prob_escape_uncensored'] > 1]
prob_escape_05_above1

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
2445,libA,221223_1_antibody_AUSAB-05_0.056_1,221223_1_no-antibody_control_1,S238F,1,TTACGAGGCACCAAGG,1.0,1.0404,492,180,74631,28408,10675748,21,S219F,AUSAB-05,0.0560
2482,libA,221223_1_antibody_AUSAB-05_0.056_1,221223_1_no-antibody_control_1,K329L,1,CGAGGAAGGCAACCAA,1.0,1.0414,487,178,74631,28408,10675748,21,K310L,AUSAB-05,0.0560
2980,libA,221223_1_antibody_AUSAB-05_0.056_1,221223_1_no-antibody_control_1,I242R D512N,2,TCCGTATTCTATAAGA,1.0,6.0464,413,26,74631,28408,10675748,21,I223R D493N,AUSAB-05,0.0560
3278,libA,221223_1_antibody_AUSAB-05_0.056_1,221223_1_no-antibody_control_1,Q76S K140I Q216L S217T F277Y,5,TCCATATATGCTAGGG,1.0,1.0159,379,142,74631,28408,10675748,21,Q57S K121I Q197L S198T F258Y,AUSAB-05,0.0560
3546,libA,221223_1_antibody_AUSAB-05_0.056_1,221223_1_no-antibody_control_1,S289T P343R G405N,3,TTGTAAACCGTAATGT,1.0,1.1102,350,120,74631,28408,10675748,21,S270T P324R G386N,AUSAB-05,0.0560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407145,libA,221223_1_antibody_AUSAB-05_0.004916324_1,221223_1_no-antibody_control_1,R241A R280S R326T E462K,4,ACCAAAATAGTACCGA,1.0,1.2468,36,21,39060,28408,10675748,21,R222A R261S R307T E443K,AUSAB-05,0.0049
407199,libA,221223_1_antibody_AUSAB-05_0.004916324_1,221223_1_no-antibody_control_1,Y178N T179C K430N A449L,4,AGATACCGAATAGAAG,1.0,1.1067,35,23,39060,28408,10675748,21,Y159N T160C K411N A430L,AUSAB-05,0.0049
407249,libA,221223_1_antibody_AUSAB-05_0.004916324_1,221223_1_no-antibody_control_1,F98L K140L R326S I393S K469V,5,AGATAGGAACTGATAG,1.0,1.0751,34,23,39060,28408,10675748,21,F79L K121L R307S I374S K450V,AUSAB-05,0.0049
407254,libA,221223_1_antibody_AUSAB-05_0.004916324_1,221223_1_no-antibody_control_1,P343K,1,ATAGGACCAAAACCAA,1.0,1.1775,34,21,39060,28408,10675748,21,P324K,AUSAB-05,0.0049


In [33]:
prob_escape_13_above1 = prob_escape_ser13.loc[prob_escape_ser13['prob_escape_uncensored'] > 1]
prob_escape_13_above1

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,total_no_antibody_count,no_antibody_count_threshold,aa_substitutions_reference,antibody,antibody_concentration
9,libA,221027_1_antibody_AUSAB-13_0.00752_1,221027_1_no-antibody_control_1,N141S,1,GATAGCATAGTACTAG,1.0,1.5596,29243,279,420515,6257,16477214,33,N122S,AUSAB-13,0.0075
23,libA,221027_1_antibody_AUSAB-13_0.00752_1,221027_1_no-antibody_control_1,T49K K102N F156A V342L,4,TATAAAATACTTAATT,1.0,1.6866,13262,117,420515,6257,16477214,33,T30K K83N F137A V323L,AUSAB-13,0.0075
35,libA,221027_1_antibody_AUSAB-13_0.00752_1,221027_1_no-antibody_control_1,G205A G237F W253H,3,TTTACACGAACAAATA,1.0,1.5987,9885,92,420515,6257,16477214,33,G186A G218F W234H,AUSAB-13,0.0075
59,libA,221027_1_antibody_AUSAB-13_0.00752_1,221027_1_no-antibody_control_1,K154T F156S S163K Y178S T179K K190N,6,TCAAGAGTAACGAGAT,1.0,1.3301,5900,66,420515,6257,16477214,33,K135T F137S S144K Y159S T160K K171N,AUSAB-13,0.0075
117,libA,221027_1_antibody_AUSAB-13_0.00752_1,221027_1_no-antibody_control_1,S110N R111K K140M K154T F156S S163K D310N N494D,8,CCGTATAAAGTGCGTT,1.0,1.2999,3582,41,420515,6257,16477214,33,S91N R92K K121M K135T F137S S144K D291N N475D,AUSAB-13,0.0075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
402036,libA,221027_1_antibody_AUSAB-13_0.00066019_1,221027_1_no-antibody_control_1,S162M R220I,2,CTTTTGGATAAGGACA,1.0,1.3016,215,35,29529,6257,16477214,33,S143M R201I,AUSAB-13,0.0007
402181,libA,221027_1_antibody_AUSAB-13_0.00066019_1,221027_1_no-antibody_control_1,H175G,1,TACAGCCAATTGAGTG,1.0,1.0242,203,42,29529,6257,16477214,33,H156G,AUSAB-13,0.0007
402480,libA,221027_1_antibody_AUSAB-13_0.00066019_1,221027_1_no-antibody_control_1,N317H Q330C,2,ATAAAACTAATCTGAG,1.0,1.0712,182,36,29529,6257,16477214,33,N298H Q311C,AUSAB-13,0.0007
402603,libA,221027_1_antibody_AUSAB-13_0.00066019_1,221027_1_no-antibody_control_1,R52W K154N R318H,3,CCTCCGACCTTACGAC,1.0,1.0022,175,37,29529,6257,16477214,33,R33W K135N R299H,AUSAB-13,0.0007
