# Test epitope harmonizing

In [1]:
import numpy

import random

import pandas as pd

from polyclonal import Polyclonal

import polyclonal.utils as utils

activity_wt_df = pd.DataFrame({"epitope": ["e1", "e2"], "activity": [2.0, 1.0]})

mut_escape_df = pd.DataFrame(
    {
        "mutation": [
            "M1C",
            "M1C",
            "G2A",
            "G2A",
            "A4K",
            "A4K",
            "A4L",
            "A4L",
            "A4Q",
            "A4Q",
        ],
        "epitope": ["e1", "e2", "e1", "e2", "e1", "e2", "e1", "e2", "e1", "e2"],
        "escape": [2.0, 0.0, 3.0, 0.0, 0.0, 2.5, 0.0, 1.5, 0.0, 3.5],
    }
)

polyclonal_sim = Polyclonal(activity_wt_df=activity_wt_df, mut_escape_df=mut_escape_df)

variants_df = pd.DataFrame.from_records(
    [
        ("AA", ""),
        ("AC", "M1C"),
        ("AG", "G2A"),
        ("AT", "A4K"),
        ("TA", "A4L"),
        ("GA", "A4Q"),
        ("CA", "M1C G2A"),
        ("CG", "M1C A4K"),
        ("TT", "M1C A4L"),
        ("GT", "M1C A4Q"),
        ("CC", "G2A A4K"),
        ("TC", "G2A A4L"),
        ("GG", "G2A A4Q"),
        ("CT", "M1C G2A A4K"),
        ("TG", "M1C G2A A4L"),
        ("GA", "M1C G2A A4Q"),
    ],
    columns=["barcode", "aa_substitutions"],
)

escape_probs = polyclonal_sim.prob_escape(
    variants_df=variants_df, concentrations=[1.0, 2.0, 4.0]
)

data_to_fit = escape_probs.rename(columns={"predicted_prob_escape": "prob_escape"})

In [2]:
n_eps = 2
poly_one = Polyclonal(data_to_fit=data_to_fit, n_epitopes=n_eps, 
                      activity_wt_df=None, site_escape_df=None)
poly_two = Polyclonal(data_to_fit=data_to_fit, n_epitopes=n_eps, 
                      activity_wt_df=None, site_escape_df=None)

In [3]:
random.seed(1)
poly_one.fit(fit_site_level_first=False)
poly_two.fit(fit_site_level_first=False)

      fun: 0.2419293641053542
 hess_inv: <12x12 LbfgsInvHessProduct with dtype=float64>
      jac: array([-4.06262290e-05,  3.01164680e-05,  2.53243944e-06,  9.46372400e-06,
        6.41968602e-05,  1.07553924e-04,  4.11875285e-05,  7.55580460e-05,
        2.39331231e-05,  8.96885272e-05,  1.89380327e-05,  5.22453826e-05])
  message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 81
      nit: 68
     njev: 81
   status: 0
  success: True
        x: array([ 1.15389091,  2.01172371,  0.15216369,  2.06724838,  0.04200508,
        3.22775792,  2.3581837 ,  0.01236656,  1.51382544, -0.06036563,
        3.03648058,  0.07705838])

### Tests

The following tests assess if the helper methods for epitope harmonizing all work when we have two identical models (1s in correlation matrix should be on diagonal):

In [4]:
corr_df = poly_two._make_correlation_matrix(poly_one)
assert len(corr_df) == n_eps**2
assert corr_df.r2.between(-1,1).all()
corr_df

Unnamed: 0,ref_epitope,self_epitope,r2
0,1,1,1.0
1,1,2,-0.872404
2,2,1,-0.872404
3,2,2,1.0


In [5]:
# This dataframe should return the following mapping matrix:
true_map = numpy.array([[1, 0], 
                        [0, 1]])
test_map = poly_two._create_mapping_matrix(corr_df)
assert numpy.array_equal(true_map, test_map)

Now, we will create an example where we train two models that learn the same parameters but flip the epitopes (i.e., I'm just going to flip the values in `mut_escape_df` from one model, and re-create polyclonal objects:

In [6]:
# Create a test example where two models "flipped" the epitopes
one_df = poly_one.mut_escape_df
two_df = poly_two.mut_escape_df

two_df.escape.iloc[5:10] = one_df.escape.iloc[0:5]
two_df.escape.iloc[0:5] = one_df.escape.iloc[5:10]

# Create polyclonal objects (can't seem to edit `mut_escape_df` bc it's a property)
original_poly = Polyclonal(mut_escape_df=one_df, 
                           activity_wt_df=poly_one.activity_wt_df, 
                           data_to_fit=None)
flipped_poly = Polyclonal(mut_escape_df=two_df, 
                          activity_wt_df=poly_two.activity_wt_df,
                          data_to_fit=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Another set of tests on the helper methods, this time, 1s should be on the "off-diagonal"

In [7]:
corr_df2 = flipped_poly._make_correlation_matrix(original_poly)
assert len(corr_df2) == n_eps**2
assert corr_df2.r2.between(-1,1).all()
corr_df2

Unnamed: 0,ref_epitope,self_epitope,r2
0,1,1,-0.872404
1,1,2,1.0
2,2,1,1.0
3,2,2,-0.872404


In [8]:
true_map = numpy.array([[0, 1], 
                        [1, 0]])
test_map = flipped_poly._create_mapping_matrix(corr_df2)
assert numpy.array_equal(true_map, test_map)
assert numpy.all(test_map.sum(axis=1) == 1)

This test is to ensure the mapping dictionary creates the epitope pairs we intend it to

In [9]:
# Test make_mapping_dict
true_dict = {'1':'2', '2':'1'}
flipped_poly._make_mapping_dict(test_map, original_poly)
assert true_dict == flipped_poly._mapping_dict

Now we harmonize the flipped object with the original one -- since these are the exact same dataframes but with flipped epitopes, the `aligned_mut_escape_df` should match the original's `mut_escape_df`.

We can also do the reverse:

In [10]:
flipped_poly.harmonize_epitopes_with(original_poly)
assert flipped_poly.aligned_mut_escape_df.equals(original_poly.mut_escape_df)

In [11]:
original_poly.harmonize_epitopes_with(flipped_poly)
assert original_poly.aligned_mut_escape_df.equals(flipped_poly.mut_escape_df)