# Human TCR experiments
They need some special data processing, because the CBA beads used are different and produce extra noise above the lower limit of detection. We need to remove this noise to ensure that non-responding peptides give null trajectories in latent space. 

We need to set all IL-17A to zero first, because it's mostly just noise for all peptides. 

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import os, sys
main_dir_path = os.path.abspath('../')
if main_dir_path not in sys.path:
    sys.path.insert(0, main_dir_path)

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import json
from time import perf_counter

In [None]:
%matplotlib inline
# Plot parameters for Science
plt.rcParams["figure.figsize"] = (2.5, 2.)
plt.rcParams["axes.labelsize"] = 8.
plt.rcParams["legend.fontsize"] = 8.
plt.rcParams["axes.labelpad"] = 0.5
plt.rcParams["xtick.labelsize"] = 7.
plt.rcParams["ytick.labelsize"] = 7.
plt.rcParams["legend.title_fontsize"] = 8.
plt.rcParams["axes.titlesize"] = 8.
plt.rcParams["font.size"] = 8.
# For larger display of small graphs in the notebook
plt.rcParams['figure.dpi'] = 120

# color map for antigens
with open(os.path.join(main_dir_path, "data", "misc", "human_tcr_peptides_colors.json"), "r") as h:
    humanPepsPalette = json.load(h)
    
# Add colors for four missing peptides...
humanPepsPalette["None"] = mpl.colors.to_hex((0, 0, 0, 1), keep_alpha=True)  # Grey
humanPepsPalette["ESO"] = mpl.colors.to_hex((0.8, 0.8, 0.8, 1.0), keep_alpha=True)
humanPepsPalette["PMA_Ionomycin"] = mpl.colors.to_hex("xkcd:crimson")
humanPepsPalette["aCD3_28"] = mpl.colors.to_hex("xkcd:bright blue")
humanPepsPalette["Null"] = mpl.colors.to_hex((0.7, 0.7, 0.7, 1.0), keep_alpha=True)  # darker Grey
sns.palplot(humanPepsPalette.values())
plt.show()
plt.close()

# CD69 functional EC50 assay
Dose response curves (supplementary figure S30 in the antigen encoding paper). 

<img src="../figures/supplemental-hTCR_EC50-panelD-new.png" alt="Dose response" width="500"/>

In [None]:
with open(os.path.join(main_dir_path, "data", "misc", "functional_EC50s_hTCR.json"), "r") as handle: 
    ser_ec50 = pd.Series(json.load(handle), name="EC50")
ser_ec50_norm = np.log10(ser_ec50 / ser_ec50.min()).drop("aCD3_aCD28")
print(ser_ec50_norm)

# Visualize cytokine time courses

In [None]:
df_raw = pd.read_hdf(os.path.join(main_dir_path, "data", "final", 
                        "cytokineConcentrationPickleFile-20210520-hTCR_2-final.hdf"))
keep_cytos = ["IFNg", "IL-17A", "IL-2", "IL-6", "TNFa"]
df_raw = df_raw.loc[df_raw.index.isin(keep_cytos, level="Cytokine")]
df_raw = df_raw.unstack("Cytokine").stack("Time")
df_raw = pd.concat({"concentration": df_raw}, names=['Feature'], axis=1)

In [None]:
# Only show cytokines of interest here (IFNg, IL-17A, IL-2) where special processing is involved. 
g = sns.relplot(data=(df_raw.loc[:, df_raw.columns.isin(["IFNg", "IL-17A", "IL-2"], level="Cytokine")]
                .stack("Cytokine").reset_index()), 
           x="Time", y="concentration", 
           hue="Peptide", col="TCellNumber", size="Concentration", row="Cytokine", style="Donor", kind="line", 
           height=1.5, sizes=[1.5], palette=humanPepsPalette)
for ax in g.axes.flat:
    ax.set_yscale("log")
for i in range(g.axes.shape[0]):
    titl = g.axes[i, 0].get_title().split("|")
    g.axes[i, 0].set_ylabel("[" + titl[0].split("=")[1].strip() + "] (pM)")  # Cytokine name
    for k in range(g.axes.shape[1]):
        titl = g.axes[i, k].get_title().split("|")
        if i == 0:
            g.axes[i, k].set_title(titl[1])
        else:
            g.axes[i, k].set_title("")
for k in range(g.axes.shape[1]):
    g.axes[-1, k].set_xlabel("Time (h)")
g.fig.savefig(os.path.join(main_dir_path, "figures", "supp", "htcr_raw_cytokine_data.pdf"), 
              transparent=True, bbox_inches="tight", bbox_extra_artists=(g.legend,))     
plt.show()
plt.close()

# Filtering background  noise with a K-S test on IFN-$\gamma$
The unusual background noise in this dataset can be noticed on the previous plots.  

To filter it out when necessary, we group time series for each peptide according to some specified levels (e.g. TCellNumber and Donor, in addition to Peptide of course), and compare each group to the group with the Null peptide condition. More precisely, compare their respective IFN-$\gamma$ distributions with a Kolmogorov-Smirnov test; if the group is found to have smaller or equal IFN-$\gamma$ values than the Null peptide, set a choice of cytokines (e.g. IL-2) to zero. 

Note that we set IL-17A to zero for *all* time series, since this cytokine is basically just noise for all peptides with human cells. This will be proved below by looking at the signal-to-noise ratio. 

The details of this filtering can be found in the function process_file_filter in the ltspcyt.scripts.process_raw_data module. 

Lastly, note that similar results to the filtering based on IFN-$\gamma$ can be obtained by increasing the lower LOD to 7 % of the detectable fluorescence range, instead of the default value of 3 %. 

In [None]:
from ltspcyt.scripts.process_raw_data import process_file_filter

In [None]:
human_folder = os.path.join(main_dir_path, "data", "initial") 
human_filename = "cytokineConcentrationPickleFile-20210520-hTCR_2.hdf"
[data, data_log, data_smooth, df_spl, filtered] = process_file_filter(human_folder, human_filename,
            take_log=True, rescale_max=False, smooth_size=3, rtol_spline=0.5, do_filter_null=True, 
            null_reference="Null", choice_remove_cyto=["IL-2"], choice_filter_cyto="IFNg", filter_pval=0.5, 
            split_filter_levels=["TCellNumber"], remove_il17=True, return_list_filt=True)

### Plot time series that were filtered out
As a check that we did not do anything silly here. Plot them in colors against a light grey background of all other trajectories. 

In [None]:
filtered_conds = [a[0] for a in filtered]
data_filtered = data.unstack(["Donor", "Concentration"]).loc[filtered_conds].stack(["Donor", "Concentration"])
data_other = data[~data.isin(data_filtered)]


tcellnums = data.index.unique("TCellNumber")
fig, axes = plt.subplots(2, len(tcellnums) // 2, sharey=True, sharex=True)
fig.set_size_inches(5.5, 4.)
all_handles, all_labels = [], []
kept_traj_color = "xkcd:light grey"
for i in range(len(tcellnums)): 
    g = sns.lineplot(data=(data_other.xs("IL-2", level="Cytokine", axis=1)
                     .xs(tcellnums[i], level="TCellNumber").stack("Time").reset_index()), 
            x="Time", y=0, hue=None, color=kept_traj_color, size="Peptide", 
            sizes=[1.5]*len(data_other.xs(tcellnums[i], level="TCellNumber").index.unique("Peptide")),
            style="Donor", ax=axes.flat[i], legend=False)
    #show_leg = bool(i == len(tcellnums) - 1)
    show_leg = True
    g = sns.lineplot(data=(data_filtered.xs("IL-2", level="Cytokine", axis=1)
                     .xs(tcellnums[i], level="TCellNumber").stack("Time").reset_index()), 
            x="Time", y=0, hue="Peptide", size="Peptide",
            sizes=[1.5]*len(data_filtered.xs(tcellnums[i], level="TCellNumber").index.unique("Peptide")),
            style="Donor", ax=axes.flat[i], legend=show_leg, palette=humanPepsPalette)
    # Get potentially new legend handles and labels, but remove the default legend
    handles, labels = axes.flat[i].get_legend_handles_labels()
    if i == 0: 
        all_handles = handles[:-4]
        all_labels = labels[:-4]
        final_handles = handles[-4:]
        final_labels = labels[-4:]
    else:
        for j in range(len(labels)):
            l = labels[j]
            if l == "Peptide": continue  # No need to include again
            elif l == "Donor": break  # Reached the end of peptides
            elif l not in all_labels:  # New peptide found
                all_labels.append(l)
                all_handles.append(handles[j])
    # Remove default legend
    g.legend_.remove()
    # Add title
    axes.flat[i].set_title(tcellnums[i] + " T cells", y=0.82, va="bottom", fontsize=10)

# Add common legend. Add a color to the list of peptides, for kept trajectories
all_handles.append(mpl.lines.Line2D([0], [0], lw=all_handles[-1].get_linewidth(), 
                    color=kept_traj_color, label="Not filtered"))
all_labels.append("Not filtered")
# Put back the final labels ("Donor" and the line styles)

all_handles += final_handles
all_labels += final_labels
leg = fig.legend(all_handles, all_labels, frameon=False, loc="upper left", bbox_to_anchor=(0.8, 1.))


# Change y scale to log
for ax in axes.flat:
    ax.set_yscale("log")
for i in range(axes.shape[0]):
    axes[i, 0].set_ylabel("[IL-2] (pM)")
for j in range(axes.shape[1]):
    axes[-1, j].set_xlabel("Time (h)")
fig.tight_layout()
fig.subplots_adjust(right=0.8)  # Make room for legend
fig.savefig(os.path.join(main_dir_path, "figures", "supp", 
                         "htcr_il-2_trajectories_filtered_by_ifng-kolmogorov-smirnov.pdf"),
            transparent=True, bbox_inches="tight", bbox_extra_artists=(leg,))
plt.show()
plt.close()

### Remark
A couple of time series seem to stand out in 100k T cells, but they turn out to be the two conditions that must have the least response: peptide 5Y, which produced fewer CD69+ T cells than any other peptide in our antigenicity assay (supplementary figure), and "None", which is T cells alone, without even tumor cells; it was just another control we had in this experiment. Clearly, these time series are just background and not a sign of any kind of activation. This is *very* reassuring. 

# Latent space and ballistic parameters after filtering
Run the entire processing pipeline, including the noise filtering above, which ensures that non-responding peptides come out with near-zero latent space trajectories and near-zero $v_0$ values, except 9V, 4A, 8S, and maybe 8K (which are supposed to respond). Otherwise, non-negligible artifacts can be seen in latent space for those time series which are really just noise and should be zero. 

### Project to latent space and fit model

In [None]:
from ltspcyt.scripts.adapt_dataframes import set_standard_order, sort_SI_column
from ltspcyt.scripts.latent_space import import_mutant_output
from ltspcyt.scripts.neural_network import import_WT_output

# Curve fitting functions
from ltspcyt.scripts.sigmoid_ballistic import return_param_and_fitted_latentspace_dfs

In [None]:
peptides=df_spl.index.get_level_values("Peptide").unique().to_list()
concentrations=["1uM","100nM","10nM","1nM"]
fit_vars={"Constant velocity":["v0","t0","theta","vt"],"Constant force":["F","t0","theta","vt"],
         "Sigmoid":["a0", "t0", "theta", "v1", "gamma"], 
         "Sigmoid_freealpha":["a0", "t0", "theta", "v1", "alpha", "beta"]}

In [None]:
minmaxfile = os.path.join(main_dir_path, "data", "trained-networks", "min_max-thomasRecommendedTraining.hdf")
df_min = pd.read_hdf(minmaxfile, key="df_min")
df_max = pd.read_hdf(minmaxfile, key="df_max")
projmat = np.load(os.path.join(main_dir_path, "data", "trained-networks", 
                               "mlp_input_weights-thomasRecommendedTraining.npy"))
print(df_max)

In [None]:
cytokines = df_min.index.get_level_values("Cytokine")
times = np.arange(1,73)

In [None]:
df_dat = df_spl.unstack("Time").loc[:,("integral", cytokines, times)].stack("Time")
df_dat = (df_dat - df_min)/(df_max - df_min)
df_proj = pd.DataFrame(np.dot(df_dat, projmat), index=df_dat.index,columns=["Node 1","Node 2"])

In [None]:
# Fitting
fit="Constant velocity"
regul_rate = 1.0
name_specs = "{}20_reg{}".format(fit, str(round(regul_rate, 2)).replace(".", ""))

start_time = perf_counter()

df_proj = pd.concat({"hTCR": df_proj}, names=["Data"])
ret = return_param_and_fitted_latentspace_dfs(df_proj, fit, reg_rate=regul_rate)
df_params, df_compare, df_hess, df_v2v1 = ret

end_t = perf_counter()
print("Time to fit: ", perf_counter() - start_time)
del start_time

nparameters = len(fit_vars[fit])
print(df_hess.median())

# Plot the $v_0$ parameter versus EC50
Hope that non-responding peptides are close to zero now. 

In [None]:
%matplotlib inline

In [None]:
peps_to_plot = ser_ec50_norm.index.to_list()

fig, ax = plt.subplots()
fig.set_size_inches(6, 4.)
for pep in peps_to_plot:
    yvals = df_params.xs(pep, level="Peptide", axis=0).loc[:, "v0"]
    ymean = np.mean(yvals)
    ybar = np.std(yvals)
    ec50 = [ser_ec50[pep]] * len(yvals)
    #ax.errorbar(ser_ec50[pep], ymean, ybar, ls="none", ms=6, marker="o", label=pep)
    ax.plot(ec50, yvals, ls="none", ms=6, marker="o", label=pep)
ax.legend()
ax.set(xscale="log", xlabel=r"EC$_{50}$ (Mol)", ylabel=r"$v_0$ (a. u.)")
plt.show()
plt.close()

# Latent spaces

In [None]:
df_plot = df_proj.reset_index()
sns.relplot(data=df_plot, x="Node 1", y="Node 2", hue="Peptide", size="TCellNumber", 
            style="Donor", col="Peptide",  col_wrap=5, height=2., kind="line", sort=False)

In [None]:
# Export parameter fits for further plotting v0 vs EC50 (main text figure 4)
df_params.to_hdf(os.path.join(main_dir_path, "results", "fits", 
                "hTCR_constant_velocity_parameter_fits.hdf"), key="df")

# Signal-to-noise ratio of cytokine measurements
The goal is to show how IL-17A is only noise and show why it was set to zero during the processing. 
We show these calculations in the notebook ``cytokines_distribution_noise.ipynb``, where we already carry similar calculations for the mouse data. 