# Figure 2: new odor recognition performance tests
This version is with a six-odor, turbulent background. 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import os, colorsys, json

from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
# Resources
data_folder = os.path.join("..", "results", "for_plots")
panels_folder = "panels/"
params_folder = os.path.join("..", "results", "common_params")

# Aesthetic parameters

In [None]:
# rcParams
with open(os.path.join(params_folder, "olfaction_rcparams.json"), "r") as f:
    new_rcParams = json.load(f)
plt.rcParams.update(new_rcParams)

# color maps
with open(os.path.join(params_folder, "back_colors.json"), "r") as f:
    all_back_colors = json.load(f)
back_color = all_back_colors["back_color"]
back_color_samples = all_back_colors["back_color_samples"]
back_palette = all_back_colors["back_palette"]

with open(os.path.join(params_folder, "orn_colors.json"), "r") as f:
    orn_colors = json.load(f)
    
with open(os.path.join(params_folder, "inhibitory_neuron_two_colors.json"), "r") as f:
    neuron_colors = np.asarray(json.load(f))
with open(os.path.join(params_folder, "inhibitory_neuron_full_colors.json"), "r") as f:
    neuron_colors_full = np.asarray(json.load(f))

with open(os.path.join(params_folder, "model_colors.json"), "r") as f:
    model_colors = json.load(f)
with open(os.path.join(params_folder, "model_nice_names.json"), "r") as f:
    model_nice_names = json.load(f)

In [None]:
n_neu = np.load(os.path.join(data_folder, 
                    "sample_turbulent_ibcm_simulation.npz"))["cbars_gamma"].shape[1]
n_components, n_orn = np.load(os.path.join(data_folder, 
                    "sample_turbulent_ibcm_simulation.npz"))["back_vecs"].shape

In [None]:
# Extra aesthetic parameters for this figure

# More legend rcParams: make everything smaller by 30 %
plt.rcParams["patch.linewidth"] = 0.75
legend_rc = {"labelspacing":0.5, "handlelength":2.0, "handleheight":0.7, 
             "handletextpad":0.8, "borderaxespad":0.5, "columnspacing":2.0}
for k in legend_rc:
    plt.rcParams["legend."+k] = 0.75 * legend_rc[k]

new_color = "r"
linestyles = ["-", "--", ":", (0, (5, 1, 2, 1)), "-."]
neuron_styles = linestyles + [(0, (1, 2, 1, 2))]

# Panel A: habituation narrow long trace
Part of the diagram

In [None]:
# Compute norm of PN activity
ex = np.load(os.path.join(data_folder, "sample_turbulent_ibcm_simulation.npz"))
tser_example = np.arange(*ex["tser_range"])
sser_example = ex["sser"]
dt_u = 10.0  # ms

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(100.0 / 25.4, 18.0 / 25.4)
snorm_ser = np.sqrt(np.sum(sser_example**2, axis=1))
ax.plot(tser_example * dt_u / 1000 / 60, snorm_ser, color=back_color, lw=0.5)
last_times = np.linspace(tser_example[-1] - 2000*10, tser_example[-1], 10)
for t in last_times:
    ax.plot(np.asarray([t-20, t, t+20])*dt_u/1000/60, 
            np.asarray([0.0, snorm_ser.max()/3+0.2*np.random.uniform(), 0.0]), 
            color="r", lw=0.85, alpha=0.8)
ymax = snorm_ser.max()
ax.annotate("Habituation period", xy=(28.0, ymax), ha="center", va="center", fontsize=6)
ax.annotate("Tests", xy=(last_times[4]*dt_u/60/1000, ymax), 
            color="r", ha="center", va="center", fontsize=6)
ax.annotate("", xy=(1.0, ymax), xytext=(17.0, ymax), 
            arrowprops={"color":"k", "width":0.1, "headwidth":2.0, "headlength":2.0})
ax.annotate("", xy=(last_times[0]*dt_u/60/1000 - 2, ymax), xytext=(40.0, ymax), 
            arrowprops={"color":"k", "width":0.1, "headwidth":2.0, "headlength":2.0})
ax.fill_between([0, last_times[0]*dt_u/60/1000 - 1.5], [0, 0], [ymax-0.4, ymax-0.4], 
               color=back_color_samples, alpha=0.3)
ax.set_ylabel("PN\nactivity\nnorm", fontsize=6)
#lbl = ax.set_xlabel("Time (min)", fontsize=6, labelpad=-6, ha="left", x=1.0)
lbl = ax.set_xlabel("Time (min)", fontsize=6)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "habituation_time_axis.pdf"), 
#           transparent=True, bbox_inches="tight", bbox_extra_artists=(lbl,))
plt.show()
plt.close()

# Former panel B, now in figure 1: background process
Histograms of concentrations, whiff duration and blank duration, overlaid with analytical distributions. 

In [None]:
ex2 = np.load(os.path.join(data_folder, "sample_turbulent_background.npz"))
# Time units per simulation step, for plotting, in ms
dt_u = 10.0  # ms

In [None]:
def hist_outline(ax, bins, height, **plot_kwargs):
    plot_hist = np.stack([height, height], axis=1).flatten()
    plot_edges = np.stack([bins[:-1], bins[1:]], axis=1).flatten()
    ax.plot(plot_edges, plot_hist, **plot_kwargs)
    ax.fill_between(plot_edges, min(0.0, height.min()), plot_hist,
                    color=plot_kwargs.get("color"), alpha=0.3)
    return ax

# Panel C: habituation, response to background goes down
Just show how different models fare in this respect: it does habituate. 

In [None]:
all_ysers = {}
for mod in ["ibcm", "pca", "avgsub"]:
    k = "biopca" if mod == "pca" else mod
    all_ysers[k] = np.load(os.path.join(data_folder, 
        "{}_full_habituation_run_example.npz".format(mod)))["s_snaps"]
all_ysers["none"] = np.load(os.path.join(data_folder, 
        "ibcm_full_habituation_run_example.npz"))["back_vec_snaps"]

# Should show the optimum too. 
all_ysers["optimal"] = all_ysers["none"] - all_ysers["none"].dot(
    np.load(os.path.join(data_folder, "optimal_habituation_example.npz"))["optimal_ws"][0].T)

In [None]:
# Plot habituation time series. Think about using some smoothing?
# No, we want this to look rough, hard problem, etc. 
fig, ax = plt.subplots()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*0.9, fig.get_size_inches()[1]*1.1)
# We know the time series of these simulations: 
# 360000 steps, skipping 20, 10 ms each step
extra_skp = 10
tser = np.arange(0.0, 360000.0, 20.0*extra_skp) * 0.01 / 60.0  # 0.01 s/step
# Choose plotting order wisely: no habituation first, smallest last
yser_norm = np.sqrt(np.sum(all_ysers["none"]**2, axis=1))[::extra_skp]
ax.plot(tser, yser_norm, label="None", color=model_colors["none"], lw=0.5, alpha=0.8)
for m in [ "avgsub", "biopca", "ibcm", "optimal"]:
    lbl = model_nice_names.get(m)
    yser_norm = np.sqrt(np.sum(all_ysers[m]**2, axis=1))[::extra_skp]
    ax.plot(tser, yser_norm, label=lbl, color=model_colors[m], lw=0.5, alpha=0.7)

leg_props = dict(borderaxespad=0.3, handlelength=1.2, facecolor=(1,1,1,0.8), edgecolor=(1,1,1,0.8))
ax.legend(loc="upper center", title="Model", **leg_props, ncol=2)

ax.set(ylabel=r"PN activity norm $\|\mathbf{y}(t)\|$", xlabel="Time (min)")
ax.set_title("Habituation", fontsize=plt.rcParams["font.size"], weight="bold", pad=15)
ylims_orig = ax.get_ylim()
ax.set_ylim([ylims_orig[0], ylims_orig[1]*0.9])
#ax.set_xlim([0.0, 5.0])

fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "habituation_examples_turbulent_back.pdf"), 
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Panel D: odor recognition performance, distance to new odor vector

In [None]:
all_dists = np.load(os.path.join(data_folder, "new_mix_distances_identity.npz"))
all_models = list(all_dists.keys())
all_models.remove("new_concs")
print(all_dists["ibcm"].shape)
print(all_models)
new_concs = all_dists["new_concs"]
# I know these are 0.5x and 1x the average concentration, so use these relative measures instead
rel_new_concs = [a/new_concs[1] for a in new_concs]

In [None]:
# Compute interesting statistics
show_models = ["none", "avgsub", "orthogonal", "biopca", "ibcm", "optimal"]
dist_histograms = {}
dist_cdfs = {}
dist_stats = {}
min_dist_to_show = np.log10((all_dists["optimal"] / np.asarray(new_concs).reshape(1, 1, 1, 2, 1)).min())
for m in show_models:
    dist_histograms[m] = []
    dist_cdfs[m] = []
    dist_stats[m] = []
    dists = []
    for i in range(len(new_concs)):
        # Normalize by new odor magnitude = 1.0 * conc
        # Combine all concentrations this way. 
        conc = new_concs[i]
        # Distances in log scale
        dists_i = all_dists[m][:, :, :, i].flatten() / conc
        dists_i = np.log10(dists_i[dists_i > 0.0])  # Drop exact zeros
        dists.append(dists_i)
    dists = np.concatenate(dists)
    dist_stats[m] = [
        np.mean(dists), 
        np.median(dists), 
        np.var(dists),
    ]
    # Remove extremely small distances for plotting purposes
    dists_clipped = dists[dists > min_dist_to_show]
    dist_histograms[m] = np.histogram(dists_clipped, bins="doane", density=True)
    # Cumulative distribution function of distances
    n_bins_dists = 300
    dists_axis = np.linspace(dists_clipped.min(), dists_clipped.max(), n_bins_dists)
    dists_counts = np.histogram(dists_clipped, bins=dists_axis)[0] / dists_clipped.size
    dists_cdf = np.concatenate([[0.0], np.cumsum(dists_counts)])
    dist_cdfs[m] = dists_cdf, dists_axis

In [None]:
# Plot histograms
fig, ax = plt.subplots()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*0.9, fig.get_size_inches()[1]*1.1)
for m in show_models:
    lbl = model_nice_names.get(m)
    hist_outline(ax, 10.0**dist_histograms[m][1], dist_histograms[m][0], 
                color=model_colors[m], lw=1.0, label=lbl)
handles1, labels1 = ax.get_legend_handles_labels()
for m in show_models:
    lbl = "Medians" if m == "none" else None
    li = ax.axvline(10.0**dist_stats[m][1], ls="--", color=model_colors[m], 
               lw=0.75)
    if lbl is not None:
        handles2, labels2 = [li], [lbl]
ax.set(xlabel=r"Distance $\|\mathbf{y}_{\mathrm{n}} - \mathbf{y}_{\mathrm{mix}}\| / \|\mathbf{y}_{\mathrm{n}}\|$", 
       ylabel="Probability density", xscale="log")
ax.set_title("Distance to new odor", fontsize=plt.rcParams["font.size"], weight="bold", pad=15)

# Create a legend for the first line.
leg_props = dict(frameon=False, borderaxespad=0.3, handlelength=1.2)
first_legend = ax.legend(handles=handles1, labels=labels1, loc="upper left", title="Model", **leg_props)
# Add the legend manually to the Axes.
ax.add_artist(first_legend)
# Create another legend for the second line.
ax.legend(handles=handles2, labels=labels2, loc='upper right', handletextpad=0.1, **leg_props)


fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "distance_to_new_odor_histograms_turbulent_back.pdf"), 
#           transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Plot CDF. I don't want to show the "Orthogonal" model in this figure
# but the histograms above show that IBCM and PCA peak at the norm of y_n, perp, 
# so they implement very well the strategy for which W was optimized. 
show_models = ["none", "avgsub", "biopca", "ibcm", "optimal"]
fig, ax = plt.subplots()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*0.85, fig.get_size_inches()[1]*1.1)
ax.set_title("Distance to new odor", fontsize=plt.rcParams["font.size"], weight="bold", pad=15)
for m in show_models:
    ax.plot(10.0**dist_cdfs[m][1], dist_cdfs[m][0], 
                color=model_colors[m], lw=1.0, label=model_nice_names.get(m))
ax.set(xlabel=r"Distance $\|\mathbf{y}_{\mathrm{n}} - \mathbf{y}_{\mathrm{mix}}\| / \|\mathbf{y}_{\mathrm{n}}\|$", 
       ylabel="Cumulative distribution", xscale="log")
ax.legend(title="Model", frameon=False)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "distance_to_new_odor_cdf_turbulent_back.pdf"), 
#           transparent=True, bbox_inches="tight")
plt.show()
plt.close()

### Versions with two plots, one per concentration

# Panel E: odor recognition performance, Jaccard
Should highlight somewhere that larger is better

In [None]:
all_jacs = np.load(os.path.join(data_folder, "jaccard_similarities_identity.npz"))
all_models = list(all_jacs.keys())
all_models.remove("new_concs")
print(all_jacs["ibcm"].shape)
print(all_models)
new_concs = all_jacs["new_concs"]
# I know these are 0.5x and 1x the average concentration, so use these relative measures instead
rel_new_concs = [a/new_concs[1] for a in new_concs]

In [None]:
# Compute interesting statistics
show_models = ["none", "avgsub", "biopca", "ibcm", "optimal"]
jac_histograms = {}
jac_cdfs = {}
jac_stats = {}
for m in show_models:
    jac_histograms[m] = {}
    jac_cdfs[m] = {}
    jac_stats[m] = {}
    for i in range(len(new_concs)):
        conc = new_concs[i]
        jacs_sim = all_jacs[m][:, :, :, i].flatten()
        jac_histograms[m][conc] = np.histogram(jacs_sim, bins="doane", density=True)
        jacs_dists = 1.0 - jacs_sim
        # There is only a discrete number of possible J, increments of card(z_n \cap z_mix)
        # So count each value
        dists_axis, dists_counts = np.unique(jacs_dists, return_counts=True)
        reorder = np.argsort(dists_axis)
        dists_axis = dists_axis[reorder]
        dists_counts = dists_counts[reorder] / jacs_dists.size
        dists_cdf = np.cumsum(dists_counts) 
        jac_cdfs[m][conc] = dists_cdf, dists_axis
        jac_stats[m][conc] = [
            np.mean(jacs_sim), 
            np.median(jacs_sim), 
            np.var(jacs_sim),
        ]

In [None]:
# Plot histograms
fig, axes = plt.subplots(1, 2)
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*1.5, fig.get_size_inches()[1]*1.1)
fig.suptitle("Jaccard similarity to new odor", fontsize=plt.rcParams["font.size"], weight="bold", y=0.92)
for i in range(len(new_concs)):
    conc = new_concs[i]
    conc_rel = rel_new_concs[i]
    ax = axes[i]
    for m in show_models:
        lbl = model_nice_names.get(m) if i ==0 else ""
        hist_outline(ax, jac_histograms[m][conc][1], jac_histograms[m][conc][0], 
                    color=model_colors[m], lw=1.0, label=lbl)
        lbl = "Median" if i == 1 and m == "none" else ""
        ax.axvline(jac_stats[m][conc][1], ls="--", color=model_colors[m], 
                   lw=0.75, label=lbl)
    ax.set(xlabel=r"Jaccard similarity $(z_{\mathrm{n}}, z_{\mathrm{mix}})$", 
           ylabel="Probability density", title=r"New conc.$= {:.1f} \langle c \rangle$".format(conc_rel))
    leg_title = "Model" if i == 0 else ""
    ax.legend(frameon=False, title=leg_title, borderaxespad=0.0, handlelength=1.2)

fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "jaccard_histograms_turbulent_back.pdf"), 
#           transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Supplementary panel: odor recognition performance cumulative

In [None]:
# Plot CDFs
fig, axes = plt.subplots(1, 2)
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*1.5, fig.get_size_inches()[1])
for i in range(len(new_concs)):
    conc = new_concs[i]
    conc_rel = rel_new_concs[i]
    ax = axes[i]
    for m in show_models:
        ax.plot(jac_cdfs[m][conc][1], jac_cdfs[m][conc][0], 
                    color=model_colors[m], lw=1.0, label=model_nice_names.get(m))
    ax.set(xlabel=r"Jaccard distance $(z_{\mathrm{n}}, z_{\mathrm{mix}})$", 
           ylabel="Cumulative distribution", title=r"New conc. $= {:.1f} \langle c \rangle$".format(conc_rel))
    ax.legend(title="Model", frameon=False)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "jaccard_distances_cdf_turbulent_back.pdf"), 
#           transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Supplementary panel: score vs background - new odor distance
Not using this version yet. Make this plot with PCA scaled up
Move to supplementary. Or maybe in the last figure, to say all models are limited? Need to re-run with PCA scaled up first. 

In [None]:
new_back_dists = np.load(os.path.join(data_folder, "new_back_distances_identity.npz"))["new_back_distances"]

In [None]:
# Concatenated Jaccard scores shaped [n_background, n_new_odors, n_times, n_new_concs, n_back_samples]
# new_back_dists shaped background, new_odor
# Just need median along axes 2, 3, 4 to get one per [back, new] pair
jaccards_per_pair = {}
for m in show_models:
    jaccards_per_pair[m] = np.median(all_jacs[m], axis=(2, 3, 4))

In [None]:
# Warning: kdeplot is slow, this figure takes around 1 min to generate
fig, ax = plt.subplots()
plot_type = "scatter"  # or "kde"
if plot_type == "kde":
    data = pd.concat({m:pd.DataFrame(np.stack([jaccards_per_pair[m].flatten(), new_back_dists.flatten()], axis=1), 
                                columns=pd.Index(["jaccard", "new-back"]))
                    for m in show_models}, names=["Model"])
    data = data.rename(model_nice_names, level="Model")
    model_nice_colors = {model_nice_names[m]:model_colors[m] for m in show_models}
    g = sns.kdeplot(data=data.reset_index(), x="new-back", y="jaccard", hue="Model", 
            palette=model_nice_colors, ax=ax, fill=True, alpha=0.5)
    sns.move_legend(g, frameon=False, loc="upper left")
elif plot_type == "scatter":
    for m in show_models:
        xy = np.stack([new_back_dists.flatten(), jaccards_per_pair[m].flatten()], axis=0)
        xy = np.unique(xy, axis=1)  # Find unique (x, y) pairs
        ax.scatter(xy[0], xy[1], color=model_colors[m], label=model_nice_names[m], s=0.9, alpha=0.1)

ax.set(xlabel="Background-new odor distance", ylabel=r"Jaccard similarity $(z_{\mathrm{n}}, z_{\mathrm{mix}})$")
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "back-new_distance_jaccard_correlation.pdf"), 
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Also, in particular, the improvement afforded by IBCM compared to no habituation
improvements = jaccards_per_pair["ibcm"] - jaccards_per_pair["none"]
fraction_positive = (improvements >= 0).sum() / improvements.size

fig, ax = plt.subplots()
ax.axhline(0.0, ls="--", color="grey", zorder=0)
ax.scatter(new_back_dists.flatten(), improvements.flatten(), s=1.0, alpha=0.7, color="k")
ax.set(xlabel="Background-new odor distance", ylabel="IBCM vs None improvement")
ax.annotate(f"{fraction_positive*100:.1f} %", xy=(0.8, 0.02), va="bottom")
ax.annotate(f"{(1.0-fraction_positive)*100:.1f} %", xy=(0.8, -0.04), va="top")
plt.show()
plt.close()

#### Remark
Extrapolating, we see how the IBCM model is driving new odors close to the background into the floor too quickly. In case this becomes a problem, e.g. if we run simulations with real odor vectors, just dial up the regularization $\beta$, to reduce the amount of inhibition exterted by the IBCM model, closer to a level that would preserve more of the parallel component. 

So, let reviewers ask, and do this fix if necessary. But don't show this figure except in supplementary maybe. 