# Test analytical gradients

Set up a toy example, with two different `Polyclonal` objects:
 - one with same variants for each concentration
 - one with different variants for each concentration
We do both because handled slightly differently internally in `Polyclonal`.

In [1]:
import numpy

import pandas as pd

from polyclonal import Polyclonal

activity_wt_df = pd.DataFrame({'epitope':  ['e1', 'e2'],
                               'activity': [ 2.0,  1.0]})

mut_escape_df = pd.DataFrame({
       'mutation': ['M1C', 'M1C', 'G2A', 'G2A', 'A4K', 'A4K', 'A4L', 'A4L'],
       'epitope':  [ 'e1',  'e2',  'e1',  'e2',  'e1',  'e2',  'e1',  'e2'],
       'escape':   [  2.0,   0.0,   3.0,   0.0,  0.0,    2.5,   0.0,   1.5],
       })

polyclonal_sim = Polyclonal(activity_wt_df=activity_wt_df,
                            mut_escape_df=mut_escape_df)

variants_df = pd.DataFrame.from_records(
         [('AA', ''),
          ('AC', 'M1C'),
          ('AG', 'G2A'),
          ('AT', 'A4K'),
          ('TA', 'A4L'),
          ('CA', 'M1C G2A'),
          ('CG', 'M1C A4K'),
          ('CC', 'G2A A4K'),
          ('TC', 'G2A A4L'),
          ('CT', 'M1C G2A A4K'),
          ('TG', 'M1C G2A A4L'),
          ('GA', 'M1C'),
          ],
         columns=['barcode', 'aa_substitutions'])

escape_probs = polyclonal_sim.prob_escape(variants_df=variants_df,
                                          concentrations=[1.0, 2.0, 4.0])

data_to_fit = (
         escape_probs
         .rename(columns={'predicted_prob_escape': 'prob_escape'})
         )

polyclonal_data = Polyclonal(data_to_fit=data_to_fit,
                             activity_wt_df=activity_wt_df,
                             site_escape_df=pd.DataFrame.from_records(
                                    [('e1', 1, 1.0), ('e1', 4, 0.0),
                                     ('e2', 1, 0.0), ('e2', 4, 2.0)],
                                    columns=['epitope', 'site', 'escape']),
                             data_mut_escape_overlap='fill_to_data',
                             )

# second model doesn't have the same mutations for all concentrations
polyclonal_data2 = Polyclonal(data_to_fit=data_to_fit.head(30),
                             activity_wt_df=activity_wt_df,
                             site_escape_df=pd.DataFrame.from_records(
                                    [('e1', 1, 1.0), ('e1', 4, 0.0),
                                     ('e2', 1, 0.0), ('e2', 4, 2.0)],
                                    columns=['epitope', 'site', 'escape']),
                             data_mut_escape_overlap='fill_to_data',
                             )

# Gradient of $p_v\left(c\right)$

In [2]:
eps = 1e-8

for i, model in enumerate([polyclonal_data, polyclonal_data2]):
    
    print(f"Model {i}")
    
    # analytical
    p_vc, dpvc_dparams = model._compute_1d_pvs(
                                params=model._params,
                                one_binarymap=model._one_binarymap,
                                binarymaps=model._binarymaps,
                                cs=model._cs,
                                calc_grad=True,
                                )
    
    # numerical, compare to analytical
    for iparam in range(len(model._params)):
        eps_vec = numpy.zeros(len(model._params))
        eps_vec[iparam] = eps
        p_vc_eps = model._compute_1d_pvs(
                        params=model._params + eps_vec,
                        one_binarymap=model._one_binarymap,
                        binarymaps=model._binarymaps,
                        cs=model._cs,
                        calc_grad=False,
                        )
        numerical_grad = (p_vc_eps - p_vc) / eps
        analytical_grad = dpvc_dparams[iparam].toarray()
        diff = numpy.sqrt(((numerical_grad - analytical_grad)**2).sum())
        mag = numpy.sqrt((analytical_grad**2).sum())
        if (diff > 1e-7) or (diff / mag > 1e-7):
            raise ValueError(f"{iparam=}, {diff=}, {mag=}\n"
                             f"{analytical_grad=}\n{numerical_grad=}")
    print('All gradients check')

Model 0
All gradients check
Model 1
All gradients check


## Gradient of loss on $p_v\left(c\right)$

In [3]:
eps = 1e-7

delta = 2

for i, model in enumerate([polyclonal_data, polyclonal_data2]):
    
    print(f"Model {i}")
    
    # analytical
    loss, dloss = model._loss_dloss(
                                params=model._params,
                                delta=2
                                )
    
    # numerical, compare to analytical
    for iparam in range(len(model._params)):
        eps_vec = numpy.zeros(len(model._params))
        eps_vec[iparam] = eps
        loss_eps, _ = model._loss_dloss(
                        params=model._params + eps_vec,
                        delta=2
                        )
        numerical_grad = (loss_eps - loss) / eps
        analytical_grad = dloss[iparam]
        diff = numpy.sqrt((numerical_grad - analytical_grad)**2)
        mag = numpy.sqrt(analytical_grad**2)
        if (diff > 1e-7) or (diff / mag > 1e-6):
            raise ValueError(f"{iparam=}, {diff=}, {mag=}\n"
                             f"{analytical_grad=}\n{numerical_grad=}")
            
    print('All gradients check')

Model 0
All gradients check
Model 1
All gradients check


## Gradient of site escape regularization

In [4]:
eps = 1e-7

delta = 2

for i, model in enumerate([polyclonal_data, polyclonal_data2]):
    
    print(f"Model {i}")
    
    # analytical
    reg, dreg = model._reg_escape(params=model._params,
                                  weight=0.2,
                                  delta=0.5)
    
    # numerical, compare to analytical
    for iparam in range(len(model._params)):
        eps_vec = numpy.zeros(len(model._params))
        eps_vec[iparam] = eps
        reg_eps, _ = model._reg_escape(
                        params=model._params + eps_vec,
                        weight=0.2,
                        delta=0.5,
                        )
        numerical_grad = (reg_eps - reg) / eps
        analytical_grad = dreg[iparam]
        diff = numpy.sqrt((numerical_grad - analytical_grad)**2)
        mag = numpy.sqrt(analytical_grad**2)
        if (diff > 1e-7) or (mag > 0 and diff / mag > 1e-6):
            raise ValueError(f"{iparam=}, {diff=}, {mag=}\n"
                             f"{analytical_grad=}\n{numerical_grad=}")
            
    print('All gradients check')

Model 0
All gradients check
Model 1
All gradients check
