# Parameter exploration for TCR/TCR antagonism in the revised AKPR model
Notebook to explore the revised AKPR model TCR/TCR antagonism results as a function of parameters. We define a few plotting functions that make it easy to change values in the vector of best parameter fits and visualize the corresponding antagonism curves. 

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

import sys, os
# Local modules
if not "../" in sys.path:
    sys.path.insert(1, "../")

from mcmc.costs_tcr_tcr_antagonism import cost_antagonism_akpr_i
from mcmc.costs_tcr_tcr_antagonism import antag_ratio_panel_akpr_i, steady_akpr_i_1ligand
from mcmc.mcmc_analysis import find_best_grid_point
from utils.preprocess import string_to_tuple
from mcmc.utilities_tcr_tcr_antagonism import (
    prepare_data, check_fit_model_antagonism, check_model_output, load_tcr_tcr_molec_numbers, plot_fit_antagonism
)
from models.akpr_i_model import activation_function, steady_akpr_i_2ligands

In [26]:
# Import data
# Number of TCR per T cell, L-pulse conversion parameters, peptide taus
molec_counts_fi = os.path.join("..", "data", "surface_counts", "surface_molecule_summary_stats.h5")
mtc = "Geometric mean"
nums = load_tcr_tcr_molec_numbers(molec_counts_fi, mtc, tcell_type="OT1_Naive")
tcr_number, l_conc_mm_params, pep_tau_map_ot1 = nums

## Antagonism ratio fitting
# Prepare data for fitting antagonism ratios
data_file_name = os.path.join("..", "data", "antagonism", "allManualSameCellAntagonismDfs_v3.h5")
df = pd.read_hdf(data_file_name)
df_data, df_err, tau_agonist = prepare_data(df, l_conc_mm_params, pep_tau_map_ot1)

In [27]:
# Set other parameters
# Model parameters
# Define the parameters that will remain the same throughout
# Parameters related to the cascade of phosphorylations
phi = 0.2
kappa = 1e-4
psi_0 = 0.0
I_tot = 1.0
N = 6
R_tot = tcr_number

# Bounds on parameters: phi, cmthresh, sthresh, psi_0
# Use relatively tight bounds based on previous runs.
# Use the log of parameters so the MCMC steps are even in log scale
fit_bounds = [(0.05, 5.0), (1, 10*R_tot), (1e-5, 1000*I_tot), (1e-8, 5.0)]
fit_bounds = [np.log10(np.asarray(a)) for a in zip(*fit_bounds)]

# Wrapping up parameters
rates_others = [kappa]  # k_I will be gridded over
total_RI = [R_tot, I_tot]

## Start by plotting the best fits

In [28]:
# Import analysis results
with open ("../results/mcmc/mcmc_analysis_akpr_i.json", "r") as h:
    akpr_analysis = json.load(h)

In [None]:
# Load best parameter fit to begin 
best_kmf, best_pvec, best_cost = find_best_grid_point(akpr_analysis, strat="best")
kmf_tuple = string_to_tuple(best_kmf)
nmf_best = [N, *kmf_tuple[1:]]
other_rates_best = rates_others + [kmf_tuple[0]]
print("Best kmf:", best_kmf)
print("Best fit:", 10.0**best_pvec)
print("Cost:", best_cost)

In [30]:
def change_log_ticks(axis, base=2, which="y"):
    if which == "y" or which == "both":
        ogyticks = axis.get_yticks()
        newyticks = list(np.unique([int(x) for x in ogyticks]))
        newyticklabels = [str(base)+'$^{'+str(x)+'}$' for x in newyticks]
        axis.set_yticks(newyticks)
        axis.set_yticklabels(newyticklabels)
    if which == "x" or which == "both":
        ogxticks = axis.get_xticks()
        newxticks = list(np.unique([int(x) for x in ogxticks]))
        newxticklabels = [str(base)+'$^{'+str(x)+'}$' for x in newxticks]
        axis.set_xticks(newxticks)
        axis.set_xticklabels(newxticklabels)
    return axis
    
def plot_fit_cost(pvec, bounds, kmf, other_rates, rstots, n_p, tau_ag, df_ratio, df_ci, mm_params, 
                 panel_fct=antag_ratio_panel_akpr_i, cost_fct=cost_antagonism_akpr_i):
    # First, compute a model panel
    other_args = [other_rates, rstots, n_p, tau_ag]
    df_mod = check_fit_model_antagonism(panel_fct, pvec, kmf, 
                                      df_ratio, df_ci, other_args=other_args, n_taus=101)
    
    # Compute the cost function too and print it
    cost_val = cost_fct(pvec, bounds, kmf, other_rates, rstots, n_p, tau_ag, df_ratio, df_ci)
    print("Cost function:", cost_val)

    plot_fit_antagonism(df_ratio, df_mod, mm_params, df_ci, cost=cost_val, model_ci=None)
    return df_mod, cost_val

In [31]:
def plot_model_output(expvec, other_rates, rstots, kmf, n_p):
    model_rates = [expvec[0], other_rates[0]] + list(expvec[1:3]) + [kmf[0], expvec[3]]
    res = check_model_output(steady_akpr_i_1ligand, model_rates, rstots, [n_p, *kmf[1:]])

    l_range, tau_range, outputs = res
    fig, ax = plt.subplots()
    fig.set_size_inches(5.5, 4.0)
    for i, tau in enumerate(tau_range):
        ax.plot(l_range, outputs[i], label=r"$\tau = {:.0f}$ s".format(tau))
    ax.set(xscale="log", yscale="log", xlabel=r"$L$", ylabel=r"$C_N$", title="AKPR SHP-1 model")
    ax.legend(loc="lower right")
    ax.annotate(r"Best $k_I, m, f$ : $({}, {}, {})$".format(*kmf) + "\n"
                + r"Best $C_{m, thresh}$ : " + "{:.1f}\n".format(expvec[0])
                + r"Best $S_{thresh}$ : " + "{:.2e}\n".format(expvec[1]),
                xy=(0.05, 0.95), ha="left", va="top",
                xycoords="axes fraction")
    fig.tight_layout()

    plt.show()
    plt.close()
    return l_range, tau_range, outputs

In [None]:
# Check the best fit from the simulations first
df_model, cost_test = plot_fit_cost(best_pvec, fit_bounds, kmf_tuple, rates_others, total_RI, 
                                     N, tau_agonist, df_data, df_err, l_conc_mm_params)

_ = plot_model_output(10**best_pvec, rates_others, total_RI, kmf_tuple, N)

## Try varying parameters and see the corresponding model curves
For instance, is there a way to better capture the 10 pM agonist, 1 nM antagonist condition, maybe at the expense of other parts of the fit? Try to manually adjust parameters to get there. It is possible, but then, there is too much enhancement at 1 uM antagonist. 

In [None]:
# Best kmf:  1, 4, 1
# Best fit: 
print(10**best_pvec)
kmf_tuple = (1, 4, 1)
tweak_pvec = np.log10([0.2, 2e3, 3e-6, 8e-6])

df_model, cost_test = plot_fit_cost(tweak_pvec, fit_bounds, kmf_tuple, rates_others, total_RI, 
                                     N, tau_agonist, df_data, df_err, l_conc_mm_params)

In [None]:
# Try a different k, m, f for fun
tweak_kmf = (1, 3, 2)
# Best pvec for the chosen kmf
best_pvec_tweak_kmf = np.asarray(akpr_analysis.get(str(tweak_kmf)).get("param_estimates").get("MAP best"))
print("Best pvec for chosen kmf:", 10**best_pvec_tweak_kmf)


df_model, cost_test = plot_fit_cost(best_pvec_tweak_kmf, fit_bounds, tweak_kmf, rates_others, total_RI, 
                                     N, tau_agonist, df_data, df_err, l_conc_mm_params)

# Try adding an activation function on $C_N$
For TCR/CAR, the individual receptor outputs are passed through Hill functions before being combined. Here, we wonder whether we should look directly at $C_N$ to compute the TCR/TCR antagonism ratio (as we did) or use a Hill-transformed output before computing that ratio. I had tried by MCMC before, but it did not seem to matter; the bare receptor outputs capture antagonism well enough. Let's try to see why here. 

In [35]:
### AKPR SHP-1 model ###
ln10 = np.log(10.0)
eps_for_log = 1e-8
def antag_ratio_panel_akpr_i_activ(pvec, kmf, other_rates, rstot, n_p, ag_tau, cond_index):
    expvec = np.exp(pvec*ln10)
    all_rates = [expvec[0]] + list(other_rates[:1]) + list(expvec[1:3]) + [kmf[0], expvec[3]]
    nmf = (n_p,) + tuple(kmf[1:])

    inames = cond_index.names
    df_ratio = pd.Series(np.zeros(len(cond_index)), index=cond_index)
    ag_alone = {}
    cn_thresh = expvec[4]
    for l_ag in cond_index.get_level_values(inames[0]).unique():
        cn_alone = steady_akpr_i_1ligand(all_rates, ag_tau, l_ag, rstot, nmf)[n_p]
        ag_alone[l_ag] = activation_function(cn_alone, cn_thresh)
    # Now, for each condition, compute model output for the mixture
    for l_ag, l_antag, antag_tau in cond_index:
        taus = np.asarray([ag_tau, antag_tau])
        lvec = np.asarray([l_ag, l_antag])
        complexes_mix = steady_akpr_i_2ligands(all_rates, taus, lvec, rstot, nmf)
        out_mix = activation_function(complexes_mix[nmf[0]] + complexes_mix[2*nmf[0]+1], cn_thresh)
        df_ratio[(l_ag, l_antag, antag_tau)] = out_mix / ag_alone[l_ag]
    return df_ratio


# Main AKPR SHP-1 cost function
def cost_antagonism_akpr_i_activ(pvec, pbounds, kmf, other_rates, rstot, n_p,
    ag_tau, df_ratio, df_err, weight_smallagconc=4.0):
    """
    Args:
        pvec (np.ndarray): cm_thresh, s_thresh, cn_thresh
        pbounds (list of 2 arrays): array of lower bounds, array of upper
        kmf (list): k_I, m, f
        other_rates (list): phi, kappa, psi_0
        rstot (list of 2 floats): R_tot, I_tot
        n_p (int): N
        ag_tau (float): tau of agonist. For the MI calculation, this will be used
            as one of the two taus to distinguish.
        df_ratio (pd.DataFrame): antagonism ratio data for a fixed agonist.
            Should have its three last index levels be the L1 (3rd to last), L2 (2nd to last)
            and tau2 (last).
        df_err (pd.DataFrame): log-scale error bars on the antagonism ratios.

    Returns:
        cost (float): total scalar cost.
    """
    # Check parameter boundaries
    if np.any(pvec < pbounds[0]) or np.any(pvec > pbounds[1]):
        return -np.inf
    # Part 1: compute antagonism ratio for each data condition
    # For each agonist L, compute agonist alone output
    # Then for each L2, tau2, compute the ratio
    try:
        df_ratio_model = antag_ratio_panel_akpr_i_activ(pvec, kmf, other_rates,
                            rstot, n_p, ag_tau, df_ratio.index)
    except ValueError as e:
        print(e)
        ratio_dists = np.inf
        print("Error with log10 parameter values {} and m,f={}".format(pvec, kmf[1:]))
    else:
        bonus_lvl = "AgonistConcentration"
        smallagconc = df_ratio.index.get_level_values(bonus_lvl).min()
        ratios = (np.log2(df_ratio_model/df_ratio+eps_for_log)/df_err)**2
        ratios.loc[ratios.index.isin([smallagconc], level=bonus_lvl)] *= weight_smallagconc
        ratio_dists = np.sum(ratios)

    return -ratio_dists

In [None]:
# Check the tweaked best fit from the simulations
tweak_kmf = tuple(string_to_tuple(best_kmf))
activ_pvec = np.concatenate([best_pvec, [1.0]])
activ_bounds = [(0.05, 5.0), (1, 10*R_tot), (1e-5, 1000*I_tot), (1e-8, 5.0), (1e-4, 1e6)]
activ_bounds = [np.log10(np.asarray(a)) for a in zip(*activ_bounds)]

df_model, cost_tweak = plot_fit_cost(activ_pvec, activ_bounds, tweak_kmf, rates_others, total_RI, 
                                N, tau_agonist, df_data, df_err, l_conc_mm_params,
                                panel_fct=antag_ratio_panel_akpr_i_activ, 
                                cost_fct=cost_antagonism_akpr_i_activ)


The activation function does not give a better fit, it is visually similar to the previous one, but has poorer log-posterior value. 