In [115]:
import pandas as pd
import numpy as np
import torch

from scipy.stats import gaussian_kde

In [160]:
# How many strains were analyzed? What were the experimental conditions? 

escape_date = '2025-06-26'
sb = .6

escape_results = torch.load(f'model_covdb/results.{escape_date}.sb{sb}.pt')
escape_traces = escape_results['traces']
escape_data = escape_results['data']

nstrains, nmuts = escape_data['X'].unique(dim=0).shape
print('Number of total strains: ', escape_data['X'].shape[0])
print('Number of unique strains: ', nstrains)
print('Number of unique mutations: ',nmuts)
print('Number of sera: ', len(escape_data['serum_type_map']))
print('Number of assays: ', len(escape_data['assay_type_map']))
print('Number of months: ', len(escape_data['months_map']))
print('Number of exposures: ', len(escape_data['exposures_map']))

Number of total strains:  5223
Number of unique strains:  383
Number of unique mutations:  793
Number of sera:  3
Number of assays:  10
Number of months:  3
Number of exposures:  2


In [161]:
# Top 10 mutations?
escape_df = pd.DataFrame({'Mutation':escape_data['features'], 'β':escape_traces['β'].mean(dim=0)})
print(escape_df.sort_values(by='β', ascending=False).reset_index(drop=True).head(15))
print(escape_df.sort_values(by='β', ascending=True).reset_index(drop=True).head(15))

   Mutation         β
0     F486S  0.303801
1     E484K  0.245032
2     F486P  0.241950
3     T716F  0.227867
4     D574V  0.203008
5   Y144del  0.200333
6     K444T  0.178386
7     L452R  0.177670
8     Q493S  0.170417
9     F490S  0.166615
10    S494R  0.163291
11    G485R  0.148573
12    Q493R  0.144083
13     Δ144  0.140940
14    K417N  0.134874
   Mutation         β
0       Δ19 -0.291042
1     T478Q -0.256561
2    D1139Y -0.213858
3     S155R -0.148056
4     E156G -0.142559
5    P1112Q -0.141738
6     W152C -0.111033
7      Δ145 -0.103741
8      Δ142 -0.099835
9     N658S -0.098271
10    A222V -0.092502
11     Δ156 -0.087561
12    T859N -0.086326
13  R214ins -0.083787
14    Y489H -0.079262


In [162]:
# What are the bias values?
for a, m in zip(escape_data['assay_type_map'], escape_traces['A'].mean(dim=0)):
    print(f'{a}: {m:.2f}')
print()
print()
for a, m in zip(escape_data['exposures_map'], escape_traces['E'].mean(dim=0)):
    print(f'{a}: {m:.2f}')
print()
print()
for a, m in zip(escape_data['months_map'], escape_traces['M'].mean(dim=0)):
    print(f'{a}: {m:.2f}')
print()
print()
for a, m in zip(escape_data['serum_type_map'], escape_traces['S'].mean(dim=0)):
    print(f'{a}: {m:.2f}')
print()
print()

Pseudovirus: 0.20
Pseudovirus (HIV): -0.06
Pseudovirus (MLV): -0.05
Pseudovirus (VSV): 0.01
Pseudovirus (lentivirus): -0.00
SARS-CoV-2 recombinant: -0.12
VLP: -0.01
VSV chimeric virus: -0.16
Virus isolate: 0.04
hiVNT: 0.16


<=2: 0.20
>2: -0.26


1m: -0.01
2-6m: 0.05
≥6m: -0.14


convalescent: -0.13
mrna: 0.07
other: 0.05




In [163]:
pool_specific_effects = pd.DataFrame(columns=escape_data['features'], index=escape_data['pools_map'], data=escape_traces['β_ω'].mean(dim=0)
                                    ).reset_index(
                                    ).melt(id_vars='index', value_vars=None, var_name='Mutation', value_name='Effect'
                                    ).rename(columns={'index':'Pool'})

# Subset ranges to where we have ample data on each mutation
min_n = 1  # mutation must have been seen at least once
subset = []
for _, row in pool_specific_effects.iterrows():
    pool_idx = (escape_data['pools_map']==row['Pool']).nonzero()[0][0]
    feat_idx = (np.array(escape_data['features'])==row['Mutation']).nonzero()[0][0]
    num_obs = escape_data['X'][escape_data['pools']==pool_idx, feat_idx].sum().item()
    row = dict(row)
    row['N'] = int(num_obs)
    if num_obs >= min_n:
        subset.append(row)
subset = pd.DataFrame(subset)

pool_specific_effects = pd.merge(pool_specific_effects, subset[['Mutation', 'Pool']], on=['Mutation', 'Pool'], how='inner')
pool_escape_df = pool_specific_effects.pivot(columns='Pool', index='Mutation')
pool_escape_df.columns = [multicol[1] for multicol in pool_escape_df.columns]

In [175]:
# Top Alpha muts
pool_escape_df.sort_values(by='Alpha', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
T716F,0.593,0.354224,,,0.473494,0.213212,
E484K,0.500568,0.344016,0.267386,0.232809,0.317118,0.488768,0.248529
Q493R,0.236146,0.28048,0.171182,0.179869,0.19214,0.150784,0.162944
L452R,0.227733,0.174241,0.366311,0.261384,0.201084,0.241936,0.249669
F490S,0.18937,0.175535,0.242108,0.205616,0.146026,0.193431,0.389871
Δ157,0.169612,-0.011203,0.13347,,0.135413,0.201003,0.154662
G339D,0.1611,0.108732,0.199902,0.16789,0.229041,0.221774,0.133722
Y505H,0.159411,-0.001094,0.038252,0.046037,0.075043,0.139641,0.027805
Δ24,0.158161,0.171942,-0.002463,,-0.081912,0.09658,0.053205
T19R,0.139747,0.173412,0.117816,0.112108,0.107568,0.042267,0.096151


In [176]:
# Bottom Alpha muts
pool_escape_df.sort_values(by='Alpha', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Δ145,-0.218293,-0.296634,0.067882,,-0.119654,-0.10347,-0.093531
E156G,-0.18299,-0.26246,-0.129572,-0.095193,-0.151519,-0.336731,-0.110472
Q52R,-0.140676,-0.105797,,,,-0.151974,
D215G,-0.095715,-0.061118,-0.042103,-0.043418,0.037151,-0.141622,-0.011143
F888L,-0.082156,-0.056645,,,,-0.148639,
Δ27,-0.081723,-0.026458,-0.018997,,0.005087,-0.034054,-0.073296
D80A,-0.074653,0.018311,-0.08597,-0.019632,0.098559,-0.035784,-0.037823
L5F,-0.070158,-0.033827,,,-0.006362,-0.199914,
Δ69,-0.069863,-0.175977,-0.107641,,-0.079539,0.04146,-0.032856
T95I,-0.069307,-0.103949,-0.103438,-0.050689,-0.109833,0.008059,0.008503


In [177]:
# Top B.1 muts
pool_escape_df.sort_values(by='B.1', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Y144del,,0.556906,0.144323,0.246895,,0.102365,0.340485
F486S,,0.487044,0.451552,0.428954,,0.511028,0.336414
G485R,,0.43428,,,,,
Q493S,,0.425891,0.140581,0.284574,,0.181304,0.303699
F486P,,0.41567,0.486613,0.288896,,0.402083,0.256748
A243del,,0.401434,0.151436,0.137801,,0.113088,0.121787
F486V,0.025524,0.387028,0.190175,0.078976,0.107543,-0.065056,0.134801
D574V,,0.373663,,,,,0.485826
Q493K,,0.364473,0.10237,0.162533,,0.044729,0.039973
T716F,0.593,0.354224,,,0.473494,0.213212,


In [178]:
# Bottom B.1 muts
pool_escape_df.sort_values(by='B.1', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Δ19,,-0.934869,,,,,
T478Q,,-0.930317,,,,,
D1139Y,,-0.604254,,,,,
P1112Q,,-0.399273,,,,,
V1176F,-0.043233,-0.341414,,,-0.12842,0.061784,-0.014404
V83A,,-0.335072,-0.052551,-0.063012,,-0.010626,-0.113404
A222V,,-0.297508,,,,,
Δ145,-0.218293,-0.296634,0.067882,,-0.119654,-0.10347,-0.093531
Y489H,,-0.29466,,,,,
Q613H,,-0.281617,,,,0.078617,


In [179]:
# Top BA.1 muts
pool_escape_df.sort_values(by='BA.1', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F486P,,0.41567,0.486613,0.288896,,0.402083,0.256748
F486S,,0.487044,0.451552,0.428954,,0.511028,0.336414
L452R,0.227733,0.174241,0.366311,0.261384,0.201084,0.241936,0.249669
K417N,0.104846,0.026078,0.352478,0.257543,0.009026,0.36304,0.275036
K444T,,0.130785,0.350566,0.296715,,0.397273,0.279077
S494R,,0.220872,0.33222,0.180162,,0.260685,0.184442
G142D,0.007663,0.032184,0.287303,0.044306,0.142595,-0.099858,-0.035148
E484K,0.500568,0.344016,0.267386,0.232809,0.317118,0.488768,0.248529
F490S,0.18937,0.175535,0.242108,0.205616,0.146026,0.193431,0.389871
Δ144,0.010021,0.266933,0.241712,,0.088277,0.111845,0.377535


In [180]:
# Bottom BA.1 muts
pool_escape_df.sort_values(by='BA.1', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
N658S,,-0.077654,-0.477669,,,,-0.122546
Δ142,,-0.142942,-0.361166,,,,
L212I,,0.008577,-0.341787,-0.033337,,-0.026559,0.01711
Δ211,,0.027415,-0.30754,,,,
S155R,,-0.240184,-0.275939,-0.151544,,-0.219262,-0.200336
R214ins,-0.066902,-0.178541,-0.250692,,-0.090941,-0.075136,-0.133816
Δ156,-0.001455,-0.24879,-0.181579,,-0.081693,-0.117702,-0.045061
G339H,,0.104152,-0.160889,-0.131825,,-0.086483,-0.065609
E156G,-0.18299,-0.26246,-0.129572,-0.095193,-0.151519,-0.336731,-0.110472
E484A,0.03489,-0.035825,-0.111486,0.02112,0.064189,0.023755,0.044261


In [187]:
# Top BA.2.12.1 muts
pool_escape_df.sort_values(by='BA.2.12.1', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F486S,,0.487044,0.451552,0.428954,,0.511028,0.336414
K444T,,0.130785,0.350566,0.296715,,0.397273,0.279077
F486P,,0.41567,0.486613,0.288896,,0.402083,0.256748
Q493S,,0.425891,0.140581,0.284574,,0.181304,0.303699
L452R,0.227733,0.174241,0.366311,0.261384,0.201084,0.241936,0.249669
K417N,0.104846,0.026078,0.352478,0.257543,0.009026,0.36304,0.275036
Y144del,,0.556906,0.144323,0.246895,,0.102365,0.340485
E484K,0.500568,0.344016,0.267386,0.232809,0.317118,0.488768,0.248529
R346T,,0.162932,0.18043,0.221799,,0.105446,0.147021
F490S,0.18937,0.175535,0.242108,0.205616,0.146026,0.193431,0.389871


In [188]:
# Bottom BA.2.12.1 muts
pool_escape_df.sort_values(by='BA.2.12.1', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
S155R,,-0.240184,-0.275939,-0.151544,,-0.219262,-0.200336
G339H,,0.104152,-0.160889,-0.131825,,-0.086483,-0.065609
V213G,-0.041083,0.006915,-0.072854,-0.098204,-0.042001,-0.029021,-0.077549
E156G,-0.18299,-0.26246,-0.129572,-0.095193,-0.151519,-0.336731,-0.110472
S371L,-0.027371,-0.182226,-0.074494,-0.093005,-0.040694,-0.043364,-0.138758
S704L,0.006055,0.075453,0.102487,-0.082006,,-0.077321,-0.102757
G496S,-0.066326,-0.163641,-0.102209,-0.076376,-0.065196,-0.051268,-0.070438
V83A,,-0.335072,-0.052551,-0.063012,,-0.010626,-0.113404
N440K,0.054282,-0.097045,0.019025,-0.061639,0.000625,0.086875,-0.001761
P26S,-0.064273,-0.172418,-0.048469,-0.060861,-0.013104,-0.066985,-0.027942


In [181]:
# Top Beta muts
pool_escape_df.sort_values(by='Beta', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
T716F,0.593,0.354224,,,0.473494,0.213212,
E484K,0.500568,0.344016,0.267386,0.232809,0.317118,0.488768,0.248529
Δ241,,-0.018354,,,0.256304,-0.303937,0.23246
G339D,0.1611,0.108732,0.199902,0.16789,0.229041,0.221774,0.133722
L452R,0.227733,0.174241,0.366311,0.261384,0.201084,0.241936,0.249669
Q493R,0.236146,0.28048,0.171182,0.179869,0.19214,0.150784,0.162944
L18F,0.118255,0.097708,0.05199,0.093788,0.165612,0.12915,0.132216
Q1071H,0.062413,0.28471,,,0.15585,,
F490S,0.18937,0.175535,0.242108,0.205616,0.146026,0.193431,0.389871
G142D,0.007663,0.032184,0.287303,0.044306,0.142595,-0.099858,-0.035148


In [182]:
# Bottom Beta muts
pool_escape_df.sort_values(by='Beta', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
P681H,0.063931,-0.024855,-0.057236,0.018504,-0.19346,0.062585,0.039662
Y144S,0.006042,0.132269,,,-0.174143,0.177689,0.034638
R346K,-0.019536,0.02166,0.147028,0.000441,-0.167932,-0.060611,0.085414
C136F,,-0.030395,,,-0.166504,-0.181643,
Y145N,0.079197,0.100537,,,-0.153035,0.159623,
E156G,-0.18299,-0.26246,-0.129572,-0.095193,-0.151519,-0.336731,-0.110472
T859N,,-0.174844,,,-0.136441,-0.193251,
V1176F,-0.043233,-0.341414,,,-0.12842,0.061784,-0.014404
Δ145,-0.218293,-0.296634,0.067882,,-0.119654,-0.10347,-0.093531
Δ242,0.032911,-0.068937,-0.042852,,-0.1151,0.025625,-0.047762


In [183]:
# Top Delta muts
pool_escape_df.sort_values(by='Delta', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F486S,,0.487044,0.451552,0.428954,,0.511028,0.336414
E484K,0.500568,0.344016,0.267386,0.232809,0.317118,0.488768,0.248529
F486P,,0.41567,0.486613,0.288896,,0.402083,0.256748
K444T,,0.130785,0.350566,0.296715,,0.397273,0.279077
K417N,0.104846,0.026078,0.352478,0.257543,0.009026,0.36304,0.275036
R158del,,0.144682,-0.046042,0.114778,,0.268796,0.003413
T1027I,0.116557,0.280724,0.104306,0.121235,0.121922,0.266757,0.13518
F157del,,0.165606,-0.052126,0.094345,,0.265205,0.026327
S494R,,0.220872,0.33222,0.180162,,0.260685,0.184442
L452R,0.227733,0.174241,0.366311,0.261384,0.201084,0.241936,0.249669


In [184]:
# Bottom Delta muts
pool_escape_df.sort_values(by='Delta', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
E156G,-0.18299,-0.26246,-0.129572,-0.095193,-0.151519,-0.336731,-0.110472
Δ241,,-0.018354,,,0.256304,-0.303937,0.23246
W152C,-0.054932,-0.242148,,,-0.113103,-0.300425,
P681R,-0.011749,-0.099364,0.036376,0.005606,0.076568,-0.289019,0.019168
S155R,,-0.240184,-0.275939,-0.151544,,-0.219262,-0.200336
L5F,-0.070158,-0.033827,,,-0.006362,-0.199914,
P9L,,0.017009,,,-0.084725,-0.196405,
T859N,,-0.174844,,,-0.136441,-0.193251,
A701V,-0.069175,0.013318,-0.084016,0.040141,0.113161,-0.187621,-0.023449
C136F,,-0.030395,,,-0.166504,-0.181643,


In [185]:
# Top Omicron BA.4/5 muts
pool_escape_df.sort_values(by='Omicron BA.4/5', ascending=False).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
D574V,,0.373663,,,,,0.485826
F490S,0.18937,0.175535,0.242108,0.205616,0.146026,0.193431,0.389871
Δ144,0.010021,0.266933,0.241712,,0.088277,0.111845,0.377535
Y144del,,0.556906,0.144323,0.246895,,0.102365,0.340485
F486S,,0.487044,0.451552,0.428954,,0.511028,0.336414
Q493S,,0.425891,0.140581,0.284574,,0.181304,0.303699
K356T,,0.119226,0.071662,,,,0.300082
K444T,,0.130785,0.350566,0.296715,,0.397273,0.279077
K417N,0.104846,0.026078,0.352478,0.257543,0.009026,0.36304,0.275036
F486P,,0.41567,0.486613,0.288896,,0.402083,0.256748


In [186]:
# Bottom Omicron BA.4/5 muts
pool_escape_df.sort_values(by='Omicron BA.4/5', ascending=True).head(10)

Unnamed: 0_level_0,Alpha,B.1,BA.1,BA.2.12.1,Beta,Delta,Omicron BA.4/5
Mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F486I,,0.089265,,,,,-0.571416
T478R,,-0.101397,,,,,-0.258508
E180V,,0.012569,,,,,-0.248384
F456L,,-0.04724,0.143176,,,,-0.23735
S155R,,-0.240184,-0.275939,-0.151544,,-0.219262,-0.200336
H69del,,-0.131874,-0.005478,-0.046746,,-0.035753,-0.15308
S371L,-0.027371,-0.182226,-0.074494,-0.093005,-0.040694,-0.043364,-0.138758
R214ins,-0.066902,-0.178541,-0.250692,,-0.090941,-0.075136,-0.133816
N658S,,-0.077654,-0.477669,,,,-0.122546
Δ26,-0.06276,-0.108129,-0.062402,,-0.035376,-0.064956,-0.120009


In [171]:
# Top differences
mutation_effect_differences = pool_specific_effects[
                                      ['Mutation', 'Effect']
                                      ].groupby('Mutation'
                                      ).agg(lambda x: x.max() - x.min()
                                      ).reset_index(
                                      ).rename(columns={'Effect':'Effect difference'})
top_10_diffs = mutation_effect_differences.sort_values(by='Effect difference', ascending=False).head(8)['Mutation'].tolist()
top_10_diffs

['F486I', 'Δ241', 'Y144del', 'F486V', 'V1176F', 'N658S', 'G142D', 'F456L']

In [172]:
mutation_effect_differences.sort_values(by='Effect difference', ascending=False).head(10)

Unnamed: 0,Mutation,Effect difference
137,F486I,0.66068
759,Δ241,0.560241
713,Y144del,0.454542
141,F486V,0.452084
647,V1176F,0.403198
389,N658S,0.400015
155,G142D,0.387161
134,F456L,0.380526
635,T716F,0.379788
753,Δ158,0.372676
