# Test epitope harmonizing

## Setup

In [1]:
import numpy

import random

import pandas as pd

from polyclonal import Polyclonal

import polyclonal.utils as utils

import unittest

activity_wt_df = pd.DataFrame({"epitope": ["e1", "e2"], "activity": [2.0, 1.0]})

mut_escape_df = pd.DataFrame(
    {
        "mutation": [
            "M1C",
            "M1C",
            "G2A",
            "G2A",
            "A4K",
            "A4K",
            "A4L",
            "A4L",
            "A4Q",
            "A4Q",
        ],
        "epitope": ["e1", "e2", "e1", "e2", "e1", "e2", "e1", "e2", "e1", "e2"],
        "escape": [2.0, 0.0, 3.0, 0.0, 0.0, 2.5, 0.0, 1.5, 0.0, 3.5],
    }
)

polyclonal_sim = Polyclonal(activity_wt_df=activity_wt_df, mut_escape_df=mut_escape_df)

variants_df = pd.DataFrame.from_records(
    [
        ("AA", ""),
        ("AC", "M1C"),
        ("AG", "G2A"),
        ("AT", "A4K"),
        ("TA", "A4L"),
        ("GA", "A4Q"),
        ("CA", "M1C G2A"),
        ("CG", "M1C A4K"),
        ("TT", "M1C A4L"),
        ("GT", "M1C A4Q"),
        ("CC", "G2A A4K"),
        ("TC", "G2A A4L"),
        ("GG", "G2A A4Q"),
        ("CT", "M1C G2A A4K"),
        ("TG", "M1C G2A A4L"),
        ("GA", "M1C G2A A4Q"),
    ],
    columns=["barcode", "aa_substitutions"],
)

escape_probs = polyclonal_sim.prob_escape(
    variants_df=variants_df, concentrations=[1.0, 2.0, 4.0]
)

data_to_fit = escape_probs.rename(columns={"predicted_prob_escape": "prob_escape"})

In [2]:
n_eps = 2
poly_one = Polyclonal(
    data_to_fit=data_to_fit, n_epitopes=n_eps, activity_wt_df=None, site_escape_df=None
)
poly_two = Polyclonal(
    data_to_fit=data_to_fit, n_epitopes=n_eps, activity_wt_df=None, site_escape_df=None
)

In [3]:
random.seed(1)
_ = poly_one.fit(fit_site_level_first=False)
_ = poly_two.fit(fit_site_level_first=False)

## Tests

### Helper methods
The following tests assess if the helper methods for epitope harmonizing all work when we have two identical models (1s in correlation matrix should be on diagonal):

In [4]:
corr_df = poly_two._make_correlation_matrix(poly_one)
assert len(corr_df) == n_eps ** 2
assert corr_df.correlation.between(-1, 1).all()
corr_df

Unnamed: 0,ref_epitope,self_epitope,correlation
0,1,1,1.0
1,1,2,-0.872404
2,2,1,-0.872404
3,2,2,1.0


In [5]:
# This dataframe should return the following mapping matrix:
true_map = numpy.array([[1, 0], [0, 1]])
test_map = poly_two._create_max_correlation_mapping_matrix(corr_df)
assert numpy.array_equal(true_map, test_map)

#### Scenario: Flipped `mut_escape_df`

Now, we will create an example where we train two models that learn the same parameters but flip the epitopes (i.e., I'm just going to flip the values in `mut_escape_df` from one model, and re-create polyclonal objects:

In [6]:
# A quick fix for pandas warning.
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# Create a test example where two models "flipped" the epitopes
one_df = poly_one.mut_escape_df
two_df = poly_two.mut_escape_df
one_wt_df = poly_one.activity_wt_df
two_wt_df = pd.DataFrame(
    {"epitope": one_wt_df.epitope.values, "activity": one_wt_df.activity[::-1]}
)

# Create a "flipped" version of the `activity_wt_df`
two_df.escape.iloc[5:10] = one_df.escape.iloc[0:5]
two_df.escape.iloc[0:5] = one_df.escape.iloc[5:10]


# Create polyclonal objects (can't seem to edit `mut_escape_df` bc it's a property)
original_poly = Polyclonal(
    mut_escape_df=one_df, activity_wt_df=one_wt_df, data_to_fit=None
)
flipped_poly = Polyclonal(
    mut_escape_df=two_df, activity_wt_df=two_wt_df, data_to_fit=None
)

##### Sanity checks
Here, we just want to make sure the `mut_escape_df` properties for each `Polyclonal` object are flipped. 
This should result in the escape values for a given mutation (i.e. M1C), in epitopes 1 and of the `orignal_poly` object to be flipped in the `flipped_poly` object, and so on.

This should manifest in the `_params` data field as binary swaps across the arrays (i.e. [1, 2, 3, 4] --> [2,1,4,3]) after the first two epitope params (because params are on a mutation-epitope ordering)

In [7]:
original_poly.mut_escape_df

Unnamed: 0,epitope,site,wildtype,mutant,mutation,escape
0,1,1,M,C,M1C,0.152164
1,1,2,G,A,G2A,0.042005
2,1,4,A,K,A4K,2.358184
3,1,4,A,L,A4L,1.513825
4,1,4,A,Q,A4Q,3.036481
5,2,1,M,C,M1C,2.067248
6,2,2,G,A,G2A,3.227758
7,2,4,A,K,A4K,0.012367
8,2,4,A,L,A4L,-0.060366
9,2,4,A,Q,A4Q,0.077058


In [8]:
flipped_poly.mut_escape_df

Unnamed: 0,epitope,site,wildtype,mutant,mutation,escape
0,1,1,M,C,M1C,2.067248
1,1,2,G,A,G2A,3.227758
2,1,4,A,K,A4K,0.012367
3,1,4,A,L,A4L,-0.060366
4,1,4,A,Q,A4Q,0.077058
5,2,1,M,C,M1C,0.152164
6,2,2,G,A,G2A,0.042005
7,2,4,A,K,A4K,2.358184
8,2,4,A,L,A4L,1.513825
9,2,4,A,Q,A4Q,3.036481


In [9]:
original_poly._params

array([ 1.15389091,  2.01172371,  0.15216369,  2.06724838,  0.04200508,
        3.22775792,  2.3581837 ,  0.01236656,  1.51382544, -0.06036563,
        3.03648058,  0.07705838])

In [10]:
flipped_poly._params

array([ 2.01172371,  1.15389091,  2.06724838,  0.15216369,  3.22775792,
        0.04200508,  0.01236656,  2.3581837 , -0.06036563,  1.51382544,
        0.07705838,  3.03648058])

#### Scenario: Helper method input violations
Another set of tests on the helper methods, this time, 1s should be on the "off-diagonal"

In [11]:
corr_df2 = flipped_poly._make_correlation_matrix(original_poly)
assert len(corr_df2) == n_eps ** 2
assert corr_df2.correlation.between(-1, 1).all()
corr_df2

Unnamed: 0,ref_epitope,self_epitope,correlation
0,1,1,-0.872404
1,1,2,1.0
2,2,1,1.0
3,2,2,-0.872404


In [12]:
true_map = numpy.array([[0, 1], [1, 0]])
test_map = flipped_poly._create_max_correlation_mapping_matrix(corr_df2)
assert numpy.array_equal(true_map, test_map)
assert numpy.all(test_map.sum(axis=1) == 1)

This test is to ensure the mapping dictionary creates the epitope pairs we intend it to

In [13]:
# Test make_mapping_dict
true_dict = {"1": "2", "2": "1"}
test_dict = flipped_poly._make_mapping_dict(test_map, original_poly)
assert true_dict == test_dict

### Epitope harmonization

Now we harmonize the flipped object with the original one -- since these are the exact same dataframes but with flipped epitopes, after harmonization, `flipped_poly.mut_escape_df` should be equal to `original_poly.mut_escape_df`.

We should also have equal `activity_wt_df` and `_params` propertoes after harmonization as well.

In [14]:
assert not flipped_poly.mut_escape_df.equals(original_poly.mut_escape_df)
assert not flipped_poly.activity_wt_df.equals(original_poly.activity_wt_df)
assert not numpy.array_equal(original_poly._params, flipped_poly._params)
flipped_poly.harmonize_epitopes_with(original_poly)
assert flipped_poly.mut_escape_df.equals(original_poly.mut_escape_df)
assert flipped_poly.mut_escape_df.equals(original_poly.mut_escape_df)
assert flipped_poly.activity_wt_df.equals(original_poly.activity_wt_df)
assert numpy.array_equal(original_poly._params, flipped_poly._params)

Now we want to test if the params are the same (i.e., if we update these DFs and use them to create a new polyclonal object, we should get the same parameters out)

In [15]:
flipped_poly_clone = Polyclonal(
    mut_escape_df=flipped_poly.mut_escape_df,
    activity_wt_df=flipped_poly.activity_wt_df,
    data_to_fit=None,
)
# Sanity checks
assert flipped_poly_clone.mut_escape_df.equals(flipped_poly.mut_escape_df)
assert flipped_poly_clone.activity_wt_df.equals(flipped_poly.activity_wt_df)
assert numpy.array_equal(flipped_poly_clone._params, flipped_poly._params)

### Throwing exceptions when expected
Here, we test the behavior of the epitope harmonization when violations are violated.
Examples of these violations include:

* Epitopes aren't "1-to-1" (i.e., one epitope maps to multiple epitopes)
* Models have differing numbers of epitopes (non-square mapping matrix)
* Mapping matrix isn't binary (strictly 0/1 entries)

In [16]:
class ExceptionTests(unittest.TestCase):

    # Returns true if 1 + '1' raises a TypeError
    def test_non_diagonal_mapping_matrix(self):
        self.test_corr_df = pd.DataFrame(
            {
                "ref_epitope": [1, 1, 2, 2],
                "self_epitope": [1, 2, 1, 2],
                "correlation": [0.5, 0.5, 0.5, 0.5],
            }
        )
        with self.assertRaises(ValueError):
            flipped_poly._create_max_correlation_mapping_matrix(self.test_corr_df)

    def test_non_square_mapping_matrix(self):
        test_mat = numpy.array([[1, 0], [0, 1], [0, 0]])
        with self.assertRaises(ValueError):
            flipped_poly._check_epitope_mapping_matrix(test_mat)

    def test_mapping_matrix_values(self):
        test_mat = numpy.array([[1, 0], [0, 2]])
        with self.assertRaises(ValueError):
            flipped_poly._check_epitope_mapping_matrix(test_mat)


except_tests = ExceptionTests()
except_tests.test_non_diagonal_mapping_matrix()
except_tests.test_non_square_mapping_matrix()
except_tests.test_mapping_matrix_values()