# Fit model to simulated data with epitope similarity regularization

In [1]:
import pandas as pd

import polyclonal

noisy_data = (
    pd.read_csv("RBD_variants_escape_noisy.csv", na_filter=None)
    .query('library == "avg2muts"')
    .query("concentration in [0.25, 1, 4]")
    .reset_index(drop=True)
)

noisy_data

Unnamed: 0,library,barcode,concentration,prob_escape,aa_substitutions,IC90
0,avg2muts,AAAAAGCACAGTACAC,0.25,0.05044,,0.1128
1,avg2muts,AAAAAGCGAACAGATC,0.25,0.14310,,0.1128
2,avg2muts,AAAACAGCGCGCACCG,0.25,0.05452,,0.1128
3,avg2muts,AAAACGACCCATCCGC,0.25,0.08473,,0.1128
4,avg2muts,AAAACGCGATCACCTT,0.25,0.04174,,0.1128
...,...,...,...,...,...,...
89995,avg2muts,CCGGGTACTAAAAAGG,0.25,0.06873,Y508W,0.2285
89996,avg2muts,AACGGTTGCTTATAGA,1.00,0.01435,Y508W,0.2285
89997,avg2muts,CCGGGTACTAAAAAGG,1.00,0.01787,Y508W,0.2285
89998,avg2muts,AACGGTTGCTTATAGA,4.00,0.03754,Y508W,0.2285


In [2]:
poly_abs = polyclonal.Polyclonal(
    data_to_fit=noisy_data,
    activity_wt_df=pd.DataFrame.from_records(
        [
            ("1", 1.0),
            ("2", 3.0),
            ("3", 2.0),
        ],
        columns=["epitope", "activity"],
    ),
    site_escape_df=pd.DataFrame.from_records(
        [
            ("1", 417, 10.0),
            ("2", 484, 10.0),
            ("3", 444, 10.0),
        ],
        columns=["epitope", "site", "escape"],
    ),
    data_mut_escape_overlap="fill_to_data",
)

In [3]:
# NBVAL_IGNORE_OUTPUT
opt_res = poly_abs.fit(logfreq=100, reg_similarity_weight=1e-2)

# First fitting site-level model.
# Starting optimization of 522 parameters at Wed Oct  5 21:22:18 2022.
         step     time_sec         loss     fit_loss   reg_escape   reg_spreadreg_similarity reg_activity
            0     0.053612       9392.9       9386.6      0.59403            0            0       5.7092
          100       5.4977       1392.1       1375.6       6.6052            0       4.4342       5.5279
          200       10.543       1369.9       1350.4       7.7752            0       6.1406       5.5826
          300       15.551       1360.3       1339.3       8.6231            0        6.767         5.59
          400       20.643       1357.5       1336.1       8.7913            0       7.0793       5.5963
          500         25.6       1356.9       1335.4       8.8256            0       7.0713       5.5964
          600       30.478       1356.7       1335.1       8.8471            0       7.1448       5.5968
          609       30.876       1356.7       1335.1  

In [4]:
# NBVAL_IGNORE_OUTPUT
poly_abs.activity_wt_barplot()

  for col_name, dtype in df.dtypes.iteritems():


In [5]:
# NBVAL_IGNORE_OUTPUT
poly_abs.mut_escape_plot()

  for col_name, dtype in df.dtypes.iteritems():
