# Universal antagonism classification
For any antigen cognate to some TCR given its EC$_{50}$, predict whether it will be an agonist or antagonist at a given dose, against some other dose of CAR antigen. 

In fact, if a peptide is acting as an agonist at 1 $\mu$M but as an antagonist at 1 nM, classify it as a partial agonist. That could give 3 categories

### 6 antigen classes?
Any way to define 6 categories instead? Compute antagonism of the 6 prototypical categories of the Science paper, and classify real peptides based on which they are closest to in their antagonism vs  $L^T$ curve ? That would be nice. 
On the other hand it's a little surfeit, since we could just do that based on EC50?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import json, h5py
import os

In [None]:
from models.tcr_car_akpr_model import (
    activation_function, steady_akpr_i_receptor_types, 
    steady_akpr_i_1ligand, get_threshold
)
from utils.preprocess import (
    michaelis_menten, loglog_michaelis_menten, inverse_michaelis_menten, geo_mean_apply
)
from models.conversion import convert_ec50_tau_relative
from mcmc.costs_tcr_car_antagonism import repackage_tcr_car_params, antag_ratio_panel_tcr_car
from mcmc.plotting import change_log_ticks
from utils.preprocess import geo_mean_apply, write_conc_uM

## Aesthetic parameters

In [None]:
with open("results/for_plots/perturbations_palette.json", "r") as f:
    perturb_palette = json.load(f)
perturb_palette["None"] = [0., 0., 0., 1.]  # Black
sns.palplot(perturb_palette.values())

# EC50 to $\tau$ conversion

In [None]:
# Import CD25-based EC50s, to compute taus across different TCRs (murine, human, etc.)
# In M
df_cd25_ec50s = (pd.read_hdf("data/dose_response/experimental_peptide_ec50s_blasts.h5", 
                key="df").xs("CD25fit", level="Method"))
df_cd25_ec50s = df_cd25_ec50s.groupby(["TCR", "Peptide"]).apply(geo_mean_apply)

# Rename HHAT peptides to HHAT-...
rename_dict = {p:"HHAT-{}".format(p) for p in df_cd25_ec50s.xs("HHAT").index.unique()}
rename_dict.update({p:"NYESO-{}".format(p) for p in df_cd25_ec50s.xs("NYESO").index.unique()})
rename_dict.update({p:"OT1-{}".format(p) for p in df_cd25_ec50s.xs("OT1").index.unique()})
df_cd25_ec50s = df_cd25_ec50s.rename(rename_dict, level="Peptide")

df_cd25_ec50s

In [None]:
# Choose reference point: N4 at 10 s
ref_file = "data/reference_pep_tau_maps.json"
with open(ref_file, "r") as file:
    tau_refs = json.load(file)

ref_tau_n4 = tau_refs.get("N4")

# Also choose reference absolute EC50 for N4: use CD25 EC50s
# This means we will have different taus for OT-1 peptides vs. fig. 2
# But that's OK, we are using a different set of EC50s
# to illustrate the general procedure to predict antagonism
ref_ec50_n4 = df_cd25_ec50s.at[("OT1", "OT1-N4")]

def ec50_to_tau(ec50: np.float64, ec50_ref=ref_ec50_n4, tau_ref=ref_tau_n4):
    """ 
    Convert an absolute EC50 (in M) to a binding time, 
    with SIINFEKL = 10 s as a reference. 
    """
    return convert_ec50_tau_relative(ec50 / ec50_ref, tau_ref, npow=6)

# Other conversions and parameter loading

## Pulse concentration to antigen number conversion
Assuming the peptides load correctly on tumor cells. 

In other words, loading EC50 of the peptide is the same as for OT1 peptides. Obviously, this is not always true.  

In [None]:
# Load constant parameter values
samples_fname = samples_fname = "mcmc_results_tcr_car_both_conc.h5"
with h5py.File(os.path.join("results", "mcmc", samples_fname), "r") as rfile:
    data_group = rfile.get("data")
    fit_param_names = list(rfile.get("samples").attrs.get("param_names"))
    l_conc_mm_params = data_group.get("l_conc_mm_params")[()]
    cost_args_loaded = [data_group.get(a)[()]
                        for a in data_group.attrs.get("cost_args_names")]
    del data_group, rfile

In [None]:
# Multiply this by the number of MHC per tumor cell
def pulse_to_frac_loaded(conc):
    """ Compute the fraction of MHC that will be loaded at a pulse conc (in uM)"""
    return michaelis_menten(conc, 1.0, l_conc_mm_params[1])

## Load surface molecule numbers

In [None]:
# Load all CAR and TCR antigen numbers on the tumor lines used for each TCR line
molec_counts_filename = "data/surface_counts/surface_molecule_summary_stats.h5"
mtc = "Geometric mean"
tumors = ["E2APBX", "Nalm6", "PC9", "BEAS2B"]

# Maping each TCR line to a tumor type
# Let HHAT peptides both be on PC9, the tumor line
# As a proxy for MSKCC data, K562 (which we havent' characterized) is most similar to Nalm6
tcr_to_tumor = {"OT1":("E2APBX",), "NYESO":("Nalm6",), "HHAT":("BEAS2B", "PC9"), "MSKCC":("Nalm6",)}  
#tcr_to_tumor = {"OT1":("E2APBX",), "NYESO":("Nalm6",), "HHAT":("PC9",), "MSKCC":("Nalm6",)}  

# Surface area of cells, relative to E2aPBX
# Unsure about surface area of PC9 and BEAS2B, using 1.0
size_factors = {"E2APBX":1.0, "Nalm6":2.0, "PC9":1.0, "BEAS2B":1.0}

# Prepare DataFrame with MHC and tumor antigen levels
df_carag_mhc = pd.DataFrame(np.zeros([len(tumors), 2]), index=pd.Index(tumors, name="Tumor"), 
                           columns=pd.Index(["MHC", "CAR_Ag"], name="Molecule"))

# E2aPBX
molec_stats = pd.read_hdf(molec_counts_filename, key="surface_numbers_stats")
df_carag_mhc.loc["E2APBX", "MHC"] = molec_stats.loc[("E2aPBX_WT", "MHC"), mtc] / size_factors["E2APBX"]
df_carag_mhc.loc["E2APBX", "CAR_Ag"] = molec_stats.loc[("E2aPBX_WT", "CD19"), mtc]

# NYESO: on Nalm6
df_carag_mhc.loc["Nalm6", "MHC"] = molec_stats.loc[("Nalm6_19hi", "MHC"), mtc] / size_factors["Nalm6"]
df_carag_mhc.loc["Nalm6", "CAR_Ag"] = molec_stats.at[("Nalm6_19hi", "CD19"), mtc] / size_factors["Nalm6"]

# HHAT: assume both on the tumor line, PC9 (disregard healthy tissue BEAS2B)
for tum in ["PC9", "BEAS2B"]:
    df_carag_mhc.loc[tum, "MHC"] = molec_stats.loc[(tum, "MHC"), mtc] / size_factors[tum]
    df_carag_mhc.loc[tum, "CAR_Ag"] = molec_stats.loc[(tum, "Her2"), mtc] / size_factors[tum]

df_carag_mhc

## Load model parameters

In [None]:
# Load model best fits
fit_conc = ["1uM", "1nM"]
analysis_res_fname = "mcmc_analysis_tcr_car_both_conc.json"
with open(os.path.join("results", "mcmc", analysis_res_fname), "r") as jfile:
    all_results_dicts = json.load(jfile)
    del jfile

# Go back to linear-scale parameters
chosen_kmf = (1, 2, 1)
pvec_best = np.asarray(all_results_dicts.get(str(chosen_kmf)).get("param_estimates").get("MAP best"))

# Rearrange loaded parameters in the correct format 
other_rates, ritot, nmf_fixed, cd19_tau_l = cost_args_loaded
res = repackage_tcr_car_params(pvec_best, chosen_kmf, *cost_args_loaded[:3])
(
all_rates, tcr_rates, car_rates, ritot_vec, tcr_ri, car_ri,
nmf_both, tcr_nmf, car_nmf, threshold_taus
) = res

# Compute thresholds
tcr_thresh = steady_akpr_i_1ligand(tcr_rates, threshold_taus[0],
        10*tcr_ri[0], tcr_ri, tcr_nmf, large_l=True)[tcr_nmf[0]]
car_thresh = steady_akpr_i_1ligand(car_rates, threshold_taus[1],
        10*car_ri[0], car_ri, car_nmf, large_l=True)[car_nmf[0]]

# Antagonism as a function of EC50 for various TCR kinds
Continuous model curve vs tau (or EC50?) for each TCR line, with appropriate tumor numbers

In [None]:
# Generate model curves as a function of tau for each tumor line
# Do 1 uM and 1 nM. Will correspond to different
tau_range = np.linspace(tau_refs["None"], tau_refs["N4"], 200)
pulse_concs = [1e0, 1e-3]
model_curves = {}
for tcr, tumor in tcr_to_tumor.items():
    # For HHAT, different tumor in fact for p8f or WT peptide
    # Else, one tumor type, still need the level in the dataframe
    for tum in tumor:
        # Get L^C and max. L^T
        car_ag_tau_l = (cd19_tau_l[0], df_carag_mhc.loc[tum, "CAR_Ag"])
        # Prepare MultiIndex of L^T, tau^T
        l_range = [df_carag_mhc.loc[tum, "MHC"] * pulse_to_frac_loaded(c) 
                   for c in pulse_concs]
        tcr_index = pd.MultiIndex.from_product([l_range, tau_range], 
                                names=["TCR_Antigen_Density", "TCR_Antigen"])
        # Compute model predictions of antagonism ratio. Not adjusting any parameter
        # since we are making predictions about cytokines. 
        model_curves[(tcr, tum)] = antag_ratio_panel_tcr_car(pvec_best, chosen_kmf, 
                            other_rates, ritot, nmf_fixed, car_ag_tau_l, tcr_index)
        # Rename ligand numbers to concentrations
        rename_l = {l_range[i]:pulse_concs[i] for i in range(len(pulse_concs))}
        model_curves[(tcr, tum)] = (model_curves[(tcr, tum)].rename(rename_l, level="TCR_Antigen_Density")
                             .rename(write_conc_uM, level="TCR_Antigen_Density"))
    print("Generated model curves for {} TCR".format(tcr))


model_curves = pd.concat(model_curves, names=["TCR", "Tumor"])
model_curves.name = "FC"
model_curves

## Predictions for various TCR antigens
For each antigen, predict at 1 $\mu$M and at 1 nM. Then make a bar graph of $\log_2 FC$, with 2 bars for each antigen (black and red). 

In [None]:
# Main function to predict if an antigen will be an antagonist at a given pulse conc. 
def predict_antagonism(tcr_ec50, tcr_pulse_conc, **kwargs):
    """ 
    Args:
        tcr_ec50 (float): TCR antigen EC50, in M
        tcr_pulse_conc (float): TCR antigen pulse concentration, in uM
        
    Keyword args:
        car_l (float): number of CAR ligand molecules per tumor cell. Default: E2aPBX numbers. 
        car_tau (float): CAR ligand binding time. Default: 50s, as used in MCMC
        max_mhc (float): number of MHC per tumor cell. Default: E2aPBX numbers. 
        tcr_num (float): number of TCRs per T cell. Default: OT-1/CAR numbers
        car_num (float): number of CARs per T cell. Default: OT-1/CAR numbers. 
        ec50_ref (float): reference ec50, in M
        tau_ref (float): reference tau, in s
    
    Returns:
        
    """
    # Manage keyword arguments
    car_l = kwargs.get("car_l", cd19_tau_l[1])
    car_tau = kwargs.get("car_tau", cd19_tau_l[0])
    max_mhc = kwargs.get("max_mhc", l_conc_mm_params[0])
    tcr_num = kwargs.get("tcr_num", tcr_ri[0])
    car_num = kwargs.get("car_num", car_ri[0])
    ec50_ref = kwargs.get("ec50_ref", ref_ec50_n4)
    tau_ref = kwargs.get("tau_ref", ref_tau_n4)
    
    ritot_vec_loc = ritot_vec.copy()
    ritot_vec_loc[0] = np.asarray([tcr_num, car_num])
    car_ri_loc = car_ri.copy()
    car_ri_loc[0] = car_num
    
    # Convert ec50, pulse to tau, L
    tcr_tau = ec50_to_tau(tcr_ec50, ec50_ref=ec50_ref, tau_ref=tau_ref)
    tcr_l = max_mhc * pulse_to_frac_loaded(tcr_pulse_conc)
    
    # Compute response to CAR antigen alone
    ag_alone = steady_akpr_i_1ligand(car_rates, car_tau, car_l, car_ri_loc, car_nmf)[car_nmf[0]]
    ag_alone = activation_function(ag_alone, car_thresh)
    
    # Compute antagonism ratio
    lvec = np.asarray([tcr_l, car_l])
    tauvec = np.asarray([tcr_tau, car_tau])
    out_mix = steady_akpr_i_receptor_types(all_rates, tauvec, lvec, ritot_vec_loc, nmf_both)
    z_tcr = activation_function(out_mix[0][nmf_both[0][0]], tcr_thresh)
    z_car = activation_function(out_mix[1][nmf_both[0][1]], car_thresh)
    ratio = (z_tcr + z_car) / ag_alone
    return ratio
    

In [None]:
# For each TCR line, wrap predict_antagonism with the correct MHC and CAR Ag numbers
# Predict each antigen on each tumor type for that TCR; when plotting, 
# select the right (tumor, antigen) pair. 
model_preds_tcr_ags = {}
for tcr, tumor in tcr_to_tumor.items():
    if tcr == "MSKCC": continue  # Skip this, we don't have CD25 EC50s here
    for tum in tumor:
        predict_kwargs = {
            "car_l": df_carag_mhc.loc[tum, "CAR_Ag"], 
            "max_mhc": df_carag_mhc.loc[tum, "MHC"], 
            # Other kwargs are defaults: 
            # N4 tau and CD25 EC50 as ref, 
            # TCR and CAR receptor numbers, 
            # CAR antigen tau is same as CD19
        }
        def predict_antagonism_applicable(x):
            return pd.Series((predict_antagonism(x, 1e0, **predict_kwargs), 
                              predict_antagonism(x, 1e-3, **predict_kwargs)))
        df = df_cd25_ec50s.xs(tcr, level="TCR").apply(func=predict_antagonism_applicable)
        df.columns = pd.MultiIndex.from_product([["FC"], ["1uM", "1nM"]], 
                                                names=["Quantity", "TCR_Antigen_Density"])
        model_preds_tcr_ags[(tcr, tum)] = df.copy()

# Also compute the taus of these peptides
tcr_lines_ags_taus = df_cd25_ec50s.apply(ec50_to_tau)
tcr_lines_ags_taus.name = "tau"
model_preds_tcr_ags = pd.concat(model_preds_tcr_ags, names=["TCR", "Tumor"])
# Immediately drop the incorrect pairs of HHAT-tumor
#model_preds_tcr_ags = model_preds_tcr_ags.drop([("HHAT", "BEAS2B", "HHAT-p8F"), ("HHAT", "PC9", "HHAT-WT")])
# In fact, back to both peptides on PC9, so keep both tumors for both antigens
model_preds_tcr_ags = model_preds_tcr_ags.sort_index()
model_preds_tcr_ags

In [None]:
# Plotting the results
agdens_palette = {"1uM": perturb_palette["None"], "1nM": perturb_palette["AgDens"]}
nice_pulse_name = "TCR Ag pulse"

plotDf = (np.log2(model_preds_tcr_ags)
            .stack("TCR_Antigen_Density")
         )
# Sort peptides according to 1 uM antagonism
sorted_peps = (plotDf.xs("1uM", level="TCR_Antigen_Density")
               .sort_values(by="FC").index.get_level_values("Peptide"))
# Sort by antigen tau or EC50 instead?
#sorted_peps = sorted(plotDf.index.get_level_values("Peptide").unique(), 
#                    key=lambda x: tcr_lines_ags_taus.loc[(x.split("-")[0], x)])
plotDf.index = plotDf.index.rename(names=nice_pulse_name, level="TCR_Antigen_Density")
facet = sns.FacetGrid(
    data=plotDf.reset_index(), 
    hue=nice_pulse_name, 
    palette=agdens_palette, 
    hue_order=["1uM", "1nM"],
    col=nice_pulse_name, 
    col_order=["1uM", "1nM"],
)
facet.map(sns.barplot, "FC", "Peptide", 
         order=sorted_peps)
for ax in facet.axes.flat:
    ax.axvline(0.0, ls="--", color="k", lw=1.0)
facet.fig.set_size_inches(facet.fig.get_size_inches()[0], facet.fig.get_size_inches()[1]*2)
change_log_ticks(facet.axes.flat[0], which="x")
change_log_ticks(facet.axes.flat[1], which="x")
#facet.fig.savefig("figures/extra_predictions/tcr_kinds_antagonism_barplots.pdf", 
#                  transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Plot the model curves
fig, axes = plt.subplots(3, 1, sharex=True)
axes = axes.flatten()
fig.set_size_inches(2.5, 2.25*3)

for i, tcr in enumerate(["OT1", "NYESO", "HHAT"]):
    axes[i].axhline(0.0, ls="--", color="grey", lw=1.0)
    # Plot the model curve
    for ag_dens in model_curves.index.get_level_values("TCR_Antigen_Density").unique()[::-1]:
        tum_vals = (model_curves.loc[(tcr, slice(None), ag_dens)].index
                    .remove_unused_levels().get_level_values("Tumor").unique().values)
        lstyles = ["-", "--"]
        for j, tum in enumerate(tum_vals):
            curve = model_curves.loc[(tcr, tum, ag_dens, tau_range)].values
            clr = perturb_palette["None"] if ag_dens == "1uM" else perturb_palette["AgDens"]
            axes[i].plot(tau_range, np.log2(curve), label=ag_dens, color=clr, lw=2.0, 
                        ls=lstyles[j])
            # Mark the experimental antigens on these curves
            peps = (model_preds_tcr_ags.loc[(tcr, tum)].index
                    .get_level_values("Peptide").unique())
            for pep in peps:
                tau = tcr_lines_ags_taus.loc[(tcr, pep)]
                y = np.log2(model_preds_tcr_ags.loc[(tcr, tum, pep), ("FC", ag_dens)])
                axes[i].plot(tau, y, ls="none", ms=6, marker="o", mfc=clr, mec=clr)
    # Label this plot
    axes[i].set(ylabel=r"$FC_{\mathrm{TCR \rightarrow CAR}}$")
    change_log_ticks(axes[i], base=2, which="y")
    axes[i].set_title(tcr + " – " + "/".join(tcr_to_tumor.get(tcr)))
    for side in ["top", "right"]:
        axes[i].spines[side].set_visible(False)
    
axes[-1].set_xlabel(r"TCR Antigen $\tau^T$ (s)")

fig.tight_layout(h_pad=2.0)
#fig.savefig("figures/extra_predictions/tcr_kinds_model_antagonism_curves.pdf", 
#                  transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Save the model curves, the extra peptide predictions, and the EC50s and taus of peptides
plot_data_filename = "results/for_plots/tcr_kinds_universal_antagonism_model.h5"

#model_curves.to_hdf(plot_data_filename, key="model_curves")
#model_preds_tcr_ags.to_hdf(plot_data_filename, key="model_predictions_tcr_ags")
#tcr_lines_ags_taus.to_hdf(plot_data_filename, key="tcr_lines_antigen_taus")

# Predictions for neoantigens

Public data from Luksza, ..., Balachandran, 2022. 

Notice these EC50s have poor resolution (they screened a lot of them). 3 concentration points, linear-scale marker (% cells with some active marker). The WT peptides typically almost saturate the response curve; most peptides produce less response. 

Important bias: they did not measure full EC50 curves for all CMV-derived peptides (and only for CMV) with response below ~50 % at max. dose. This removes most potential antagonists and severely biases the distribution. **It is not fair to set these peptides to infinite EC50 as if they were null, because they do produce a lot of response**. So, we need to correct these missing EC50s, or drop CMV altogether. 

### Illustrating the bias



In [None]:
df_mut2 = pd.read_hdf("data/dose_response/MSKCC_EC50Df.hdf", key="df")
df_mut2 = df_mut2.drop("EC50 (ug/mL)", axis=1).rename({"EC50 (M)":"ec50_M"}, axis=1)
df_raw = pd.read_hdf("data/dose_response/MSKCC_rawDf.hdf")
df_raw

peps_with_inf = (df_mut2 == np.inf)
df_plot = df_raw.copy().xs("CMV", level="Antigen")
df_plot["not_measured_inf"] = peps_with_inf.xs("CMV", level="Antigen")

ec50_threshold_antagonists = 1e-3   # DETERMINE FROM FC PREDICTIONS?
avg_max_response_antagonists = df_mut2[df_mut2["ec50_M"] < ec50_threshold_antagonists]

In [None]:
sns.ecdfplot(data=df_plot.loc[df_plot["Dose (ug/mL)"] == 100.0].reset_index(), 
             x="Response (CD137+ %)", hue="not_measured_inf")
plt.show()
plt.close()

### Fixing the bias and making model predictions
gp100 and neoantigens had no such bias so we can use them to infer the missing CMV EC50s. 
Sooraj correlated the response at max. dose with the EC50 for all fully measured peptides (across all TCRs and antigens). 

In [None]:
pd.read_hdf("data/dose_response/correctedBalachandranEC50s.hdf")

In [None]:
df_mut_ec50s = pd.read_hdf("data/dose_response/correctedBalachandranEC50s.hdf")
# Keep only the corrected EC50s (identical to original if provided, inferred if missing originally)
df_mut_ec50s = df_mut_ec50s.xs("Yes", level="Corrected")
df_mut_ec50s = df_mut_ec50s.drop("EC50 (ug/mL)", axis=1).rename({"EC50 (M)":"ec50_M"}, axis=1)
#df_mut_ec50s = df_mut_ec50s.clip(upper=1e16)
df_mut_ec50s

## Reference point
They don't have N4 unfortunately. Let's assume CMV is something like N4. 

Now, a lot will depend on the choice of that reference: everything will be an antagonist if we choose that too low; nothing will be if we choose too high. So better associate CMV to something too high, so we certainly don't hyperinflate the number of antagonists we predict. 

In [None]:
ref_tau_cmv = 10.0
ref_ec50_cmv = df_mut_ec50s.loc[("CMV", slice(None), "WT"), "ec50_M"].min()
print("Reference: EC50 = {} M, tau = {} s".format(ref_ec50_cmv, ref_tau_cmv))

In [None]:
def predict_antagonism_applicable_mut(x):
    # Use Nalm6 MHC and CAR antigen numbers
    mut_kwargs = {
        "ec50_ref": ref_ec50_cmv, 
        "tau_ref": ref_tau_cmv, 
        "max_mhc": df_carag_mhc.at["Nalm6", "MHC"],
        "car_l": df_carag_mhc.at["Nalm6", "CAR_Ag"]
    }
    return pd.Series((predict_antagonism(x, 1e0, **mut_kwargs), 
                      predict_antagonism(x, 1e-3, **mut_kwargs)))

In [None]:
df_antagonism_mut = df_mut_ec50s["ec50_M"].apply(func=predict_antagonism_applicable_mut)
df_antagonism_mut.columns = pd.MultiIndex.from_product([["FC"], ["1uM", "1nM"]], 
                                            names=["Quantity", "TCR_Antigen_Density"])
df_antagonism_mut = df_antagonism_mut.dropna()

In [None]:
df_antagonism_mut

In [None]:
# Plotting the results
agdens_palette = {"1uM": perturb_palette["None"], "1nM": perturb_palette["AgDens"]}
nice_pulse_name = "TCR Ag pulse"
plotDf = np.log2(df_antagonism_mut)
new_lvl = "Antigen & mutation"
plotDf[("Index", new_lvl)] = (plotDf.index.get_level_values("Antigen") 
                                           + "-" + plotDf.index.get_level_values("Peptide"))
plotDf = plotDf.set_index(("Index", new_lvl), append=True)
plotDf.index = plotDf.index.set_names(names=new_lvl, level=-1)
plotDf = (plotDf.groupby(new_lvl).mean()  # Average across receptors
            .stack("TCR_Antigen_Density")
            .sort_values("FC")
         )
plotDf.index = plotDf.index.rename(names=nice_pulse_name, level="TCR_Antigen_Density")

facet = sns.FacetGrid(
    data=plotDf.reset_index(), 
    hue=nice_pulse_name, 
    palette=agdens_palette, 
    hue_order=["1uM", "1nM"],
    col=nice_pulse_name, 
    col_order=["1uM", "1nM"],
)
facet.map(sns.histplot, "FC", bins=20)
#for ax in facet.axes.flat:
#    ax.axvline(0.0, ls="--", color="k", lw=1.0)
facet.fig.set_size_inches(facet.fig.get_size_inches()[0], facet.fig.get_size_inches()[1]*2)
change_log_ticks(facet.axes.flat[0], which="x")
change_log_ticks(facet.axes.flat[1], which="x")

# Annotate with the fraction of antigens below FC = 1.0
for i in range(2):
    xlims =  facet.axes.flat[i].get_xlim()
    facet.axes.flat[i].set_xlim(xlims[0]-0.1, xlims[1])
    frac_below = np.sum(df_antagonism_mut.iloc[:, i] < 1.0) / df_antagonism_mut.iloc[:, i].count()
    facet.axes.flat[i].axvline(0.0, ls="--", color="grey", lw=1.0)
    ymax = facet.axes.flat[i].get_ylim()[1]
    facet.axes.flat[i].annotate("{:.1f} %".format(100.0*frac_below), xy=(-0.075, ymax*.98), 
                                va="top", ha="right")
    facet.axes.flat[i].annotate("{:.1f} %".format(100.0*(1.0 - frac_below)), xy=(0.075, ymax*.98), 
                                va="top", ha="left")

facet.axes.flat[0].set_ylabel("Number of peptides (#)")
facet.fig.tight_layout()
#facet.fig.savefig("figures/extra_predictions/mskcc_antagonism_fc_distributions.pdf", 
#                  transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Imagine MSKCC antigens on PC9 tumors
The amount of antagonism predicted will be much higher because CD19 is lower there. 

In [None]:
def predict_antagonism_applicable_mut_pc9(x):
    # Use Nalm6 MHC and CAR antigen numbers
    mut_kwargs = {
        "ec50_ref": ref_ec50_cmv, 
        "tau_ref": ref_tau_cmv, 
        "max_mhc": df_carag_mhc.at["PC9", "MHC"],
        "car_l": df_carag_mhc.at["PC9", "CAR_Ag"]
    }
    return pd.Series((predict_antagonism(x, 1e0, **mut_kwargs), 
                      predict_antagonism(x, 1e-3, **mut_kwargs)))

In [None]:
df_antagonism_mut_pc9 = df_mut_ec50s["ec50_M"].apply(func=predict_antagonism_applicable_mut_pc9)
df_antagonism_mut_pc9.columns = pd.MultiIndex.from_product([["FC"], ["1uM", "1nM"]], 
                                            names=["Quantity", "TCR_Antigen_Density"])
df_antagonism_mut_pc9 = df_antagonism_mut_pc9.dropna()



# Plotting the results
agdens_palette = {"1uM": perturb_palette["None"], "1nM": perturb_palette["AgDens"]}
nice_pulse_name = "TCR Ag pulse"
plotDf = np.log2(df_antagonism_mut_pc9)
new_lvl = "Antigen & mutation"
plotDf[("Index", new_lvl)] = (plotDf.index.get_level_values("Antigen") 
                                           + "-" + plotDf.index.get_level_values("Peptide"))
plotDf = plotDf.set_index(("Index", new_lvl), append=True)
plotDf.index = plotDf.index.rename(names=new_lvl, level=-1)
plotDf = (plotDf.groupby(new_lvl).mean()  # Average across receptors
            .stack("TCR_Antigen_Density")
            .sort_values("FC")
         )
plotDf.index = plotDf.index.rename(names=nice_pulse_name, level="TCR_Antigen_Density")

facet = sns.FacetGrid(
    data=plotDf.reset_index(), 
    hue=nice_pulse_name, 
    palette=agdens_palette, 
    hue_order=["1uM", "1nM"],
    col=nice_pulse_name, 
    col_order=["1uM", "1nM"],
)
facet.map(sns.histplot, "FC", bins=20)
#for ax in facet.axes.flat:
#    ax.axvline(0.0, ls="--", color="k", lw=1.0)
facet.fig.set_size_inches(facet.fig.get_size_inches()[0], facet.fig.get_size_inches()[1]*2)
change_log_ticks(facet.axes.flat[0], which="x")
change_log_ticks(facet.axes.flat[1], which="x")

# Annotate with the fraction of antigens below FC = 1.0
for i in range(2):
    xlims =  facet.axes.flat[i].get_xlim()
    facet.axes.flat[i].set_xlim(xlims[0]-0.1, xlims[1])
    frac_below = np.sum(df_antagonism_mut_pc9.iloc[:, i] < 1.0) / df_antagonism_mut_pc9.iloc[:, i].count()
    facet.axes.flat[i].axvline(0.0, ls="--", color="grey", lw=1.0)
    ymax = facet.axes.flat[i].get_ylim()[1]
    facet.axes.flat[i].annotate("{:.1f} %".format(100.0*frac_below), xy=(-0.075, ymax*.98), 
                                va="top", ha="right")
    facet.axes.flat[i].annotate("{:.1f} %".format(100.0*(1.0 - frac_below)), xy=(0.075, ymax*.98), 
                                va="top", ha="left")
facet.axes.flat[0].set_ylabel("Number of peptides (#)")

facet.fig.tight_layout()
#facet.fig.savefig("figures/extra_predictions/mskcc_antagonism_fc_distributions_pc9.pdf", 
#                  transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Save the predictions
#df_antagonism_mut.to_hdf("results/for_plots/mskcc_antagonism_fc_predictions_corrected.h5", key="Nalm6")
#df_antagonism_mut_pc9.to_hdf("results/for_plots/mskcc_antagonism_fc_predictions_corrected.h5", key="PC9")

## How does antagonist fraction and amplitude change with pulse concentration?
I can easily compute predictions for the MSKCC antigens at any pulse concentration I want, and compute the median antagonism and fraction of antagonists for each concentration. 

But do I care? 

In [None]:
def predict_antagonism_applicable_manyconc(x, lrange=None):
    # Use Nalm6 MHC and CAR antigen numbers
    mut_kwargs = {
        "ec50_ref": ref_ec50_cmv, 
        "tau_ref": ref_tau_cmv, 
        "max_mhc": df_carag_mhc.at["Nalm6", "MHC"],
        "car_l": df_carag_mhc.at["Nalm6", "CAR_Ag"]
    }
    if lrange is None:
        lrange = np.geomspace(1e-5, 1e0, 21)  # From 10 pM to 1 uM
    predictions_vs_l = [predict_antagonism(x, l, **mut_kwargs) for l in lrange]
    return pd.Series(predictions_vs_l)

def median_subgroup(ser, thresh):
    # Find the median of the values in ser below a certain threshold
    ser_group = ser.loc[ser < thresh]
    return ser_group.median()

In [None]:
conc_range = np.geomspace(1e-5, 1e0, 21)
df_antagonism_mut_vs_l = df_mut_ec50s["ec50_M"].apply(func=predict_antagonism_applicable_manyconc, 
                                                     lrange=conc_range)
df_antagonism_mut_vs_l.columns = pd.Index(conc_range, name="Pulse_concentration")
df_antagonism_mut_vs_l = df_antagonism_mut_vs_l.dropna()

# For each l, compute the fraction of antagonists, and the median antagonist value
ser_frac_mut = (df_antagonism_mut_vs_l < 1.0).sum(axis=0) / df_antagonism_mut_vs_l.count(axis=0)
ser_med_antag_fc = df_antagonism_mut_vs_l.apply(median_subgroup, thresh=1.0, axis=0)

In [None]:
fig, axes = plt.subplots(2, 1, sharex=True)
axes = axes.flatten()
axes[0].plot(conc_range, ser_frac_mut.values)
axes[0].set(xscale="log", ylabel="Fraction of antagonists")
axes[1].plot(conc_range, ser_med_antag_fc.values)
axes[1].set(xscale="log", xlabel=r"Pulse concentration ($\mu$M)", ylabel="Median FC of antagonists")
axes[1].axhline(1.0, ls="--", color="grey", lw=1.0)
plt.show()
plt.close()

In [None]:
# Consider as antagonists only peptides producing FC < threshold
antag_thresh = 0.85
# For each l, compute the fraction of antagonists, and the median antagonist value
ser_frac_mut_thresh = (df_antagonism_mut_vs_l < antag_thresh).sum(axis=0) / df_antagonism_mut_vs_l.count(axis=0)
ser_med_antag_fc_thresh = df_antagonism_mut_vs_l.apply(median_subgroup, thresh=antag_thresh, axis=0)

# Plot
fig, axes = plt.subplots(2, 1, sharex=True)
axes = axes.flatten()
axes[0].plot(conc_range, ser_frac_mut_thresh.values)
axes[0].set(xscale="log", ylabel="Fraction of antagonists")
axes[1].plot(conc_range, ser_med_antag_fc_thresh.values)
axes[1].set(xscale="log", xlabel=r"Pulse concentration ($\mu$M)", ylabel="Median FC of antagonists")
axes[1].axhline(1.0, ls="--", color="grey", lw=1.0)
plt.show()
plt.close()