# Test `PolyclonalAverage`

First we create some models to average.
They should all be similar, but we add random noise and flip the epitope labels for some of them:

In [1]:
# NBVAL_IGNORE_OUTPUT

import numpy

import pandas as pd

import polyclonal


activity_wt_df = pd.DataFrame({"epitope": [1, 2], "activity": [2.0, 1.0]})

mut_escape_df = pd.DataFrame(
    {
        "mutation": [
            "M1C",
            "M1C",
            "G2A",
            "G2A",
            "A4K",
            "A4K",
            "A4L",
            "A4L",
            "A4Q",
            "A4Q",
        ],
        "epitope": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
        "escape": [2.0, 0.0, 3.0, 0.0, 0.0, 2.5, 0.0, 1.5, 0.0, 3.5],
    }
)


models = []
flip_epitopes = {1: 2, 2: 1}
keep_epitopes = {1: 1, 2: 2}
n_muts = mut_escape_df["mutation"].nunique()
for i in range(5):
    numpy.random.seed(i)
    a_df = activity_wt_df.assign(
        activity=lambda x: x["activity"] + numpy.random.random(len(x)),
        epitope=lambda x: x["epitope"].map(flip_epitopes if i % 2 else keep_epitopes),
    )
    muts_to_keep = numpy.random.choice(
        mut_escape_df["mutation"].unique(),
        size=n_muts - 1,
        replace=False,
    ).tolist()
    e_df = mut_escape_df.assign(
        escape=lambda x: x["escape"] + numpy.random.random(len(x)),
        epitope=lambda x: x["epitope"].map(flip_epitopes if i % 2 else keep_epitopes),
    ).query("mutation in @muts_to_keep")
    models.append(polyclonal.Polyclonal(mut_escape_df=e_df, activity_wt_df=a_df))

models_df = (
    pd.Series(models)
    .rename_axis("replicate")
    .rename("model")
    .reset_index()
    .assign(
        library=lambda x: x["replicate"].map(lambda r: "A" if r < 3 else "B"),
        replicate=lambda x: x["replicate"].map(lambda r: r % 3),
    )[["library", "replicate", "model"]]
)

models_df

Unnamed: 0,library,replicate,model
0,A,0,<polyclonal.polyclonal.Polyclonal object at 0x...
1,A,1,<polyclonal.polyclonal.Polyclonal object at 0x...
2,A,2,<polyclonal.polyclonal.Polyclonal object at 0x...
3,B,0,<polyclonal.polyclonal.Polyclonal object at 0x...
4,B,1,<polyclonal.polyclonal.Polyclonal object at 0x...


Now make the average model:

In [2]:
avg_model = polyclonal.PolyclonalAverage(models_df)

Get the correlations between library / replicates:

In [3]:
corr = avg_model.mut_escape_corr().assign(r2=lambda x: x["correlation"] ** 2)

corr.round(3)

Unnamed: 0,epitope,correlation,library_1,replicate_1,library_2,replicate_2,r2
0,1,1.0,A,0,A,0,1.0
1,2,1.0,A,0,A,0,1.0
2,1,0.877,A,1,A,0,0.77
3,2,1.0,A,1,A,0,0.999
4,1,0.997,A,2,A,0,0.994
5,2,0.992,A,2,A,0,0.984
6,1,0.999,B,0,A,0,0.998
7,2,0.992,B,0,A,0,0.984
8,1,0.893,B,1,A,0,0.798
9,2,0.988,B,1,A,0,0.976


In [4]:
# NBVAL_IGNORE_OUTPUT

avg_model.mut_escape_corr_heatmap(plot_corr2=True, diverging_colors=False)