# Figure 4: analysis of IBCM in turbulent backgrounds
This version is with a six-odor, turbulent background. 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import os, colorsys, json

from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
# Resources
data_folder = os.path.join("..", "results", "for_plots")
panels_folder = "panels/"
params_folder = os.path.join("..", "results", "common_params")

# Aesthetic parameters

In [None]:
# rcParams
with open(os.path.join(params_folder, "olfaction_rcparams.json"), "r") as f:
    new_rcParams = json.load(f)
plt.rcParams.update(new_rcParams)

# color maps
with open(os.path.join(params_folder, "back_colors.json"), "r") as f:
    all_back_colors = json.load(f)
back_color = all_back_colors["back_color"]
back_color_samples = all_back_colors["back_color_samples"]
back_palette = all_back_colors["back_palette"]

with open(os.path.join(params_folder, "orn_colors.json"), "r") as f:
    orn_colors = json.load(f)
    
with open(os.path.join(params_folder, "inhibitory_neuron_two_colors.json"), "r") as f:
    neuron_colors = np.asarray(json.load(f))
with open(os.path.join(params_folder, "inhibitory_neuron_full_colors.json"), "r") as f:
    neuron_colors_full = np.asarray(json.load(f))

with open(os.path.join(params_folder, "model_colors.json"), "r") as f:
    model_colors = json.load(f)
with open(os.path.join(params_folder, "model_nice_names.json"), "r") as f:
    model_nice_names = json.load(f)
model_colors["random"] = "k"
model_nice_names["random"] = "Rand. odors"

In [None]:
n_neu = np.load(os.path.join(data_folder, 
                    "sample_turbulent_ibcm_simulation.npz"))["cbars_gamma"].shape[1]
n_components, n_orn = np.load(os.path.join(data_folder, 
                    "sample_turbulent_ibcm_simulation.npz"))["back_vecs"].shape

In [None]:
# Extra aesthetic parameters for this figure

# More legend rcParams: make everything smaller by 30 %
plt.rcParams["patch.linewidth"] = 0.75
legend_rc = {"labelspacing":0.5, "handlelength":2.0, "handleheight":0.7, 
             "handletextpad":0.8, "borderaxespad":0.5, "columnspacing":2.0}
for k in legend_rc:
    plt.rcParams["legend."+k] = 0.75 * legend_rc[k]

new_color = "r"
linestyles = ["-", "--", ":", (0, (5, 1, 2, 1)), "-."]
neuron_styles = linestyles + [(0, (1, 2, 1, 2))]

In [None]:
def moving_var(points, kernelsize, ddof=1, boundary="free"):
    """ Computing the variance of time series points in a sliding window.

    Args:
        points (np.ndarray): the data points
        kernelsize (int): odd integer giving the window size. 
        boundary (str): how to deal with points within kernelsize//2 of edges
            "shrink": the window for a point within distance d < w
                is shrunk symmetrically to a kernel of size d
            "free": the window is asymmetric, full on the inside and clipped
                on the side near the edge.
            "noflux": these points are set to the value of the closest point
                with full window (i.e. distance kernelsize//2 of the edge)

    Returns:
        var_points (np.ndarray): standard deviation at every point
    """
    var_points = np.zeros(points.shape)
    # To compute std, we need to compute the average too
    avg_points = np.zeros(points.shape)
    if kernelsize < 3: raise ValueError("Need larger kernel for variance")
    if kernelsize % 2 == 0:  # if an even number was given
        kernelsize -= 1
    w = kernelsize // 2  # width
    end = avg_points.shape[0]  # index of the last element

    if boundary not in ["shrink", "free", "noflux"]:
        raise ValueError("Unknown boundary {}".format(boundary))

    # Smooth the middle points using slicing.
    # First store second moment in var_points
    var_points[w:end - w] = points[w:end - w]**2
    avg_points[w:end - w] = points[w: end - w]
    for j in range(w):  # Add points around the middle one
        avg_points[w:-w] += points[w - j - 1:end - w - j - 1]
        avg_points[w:-w] += points[w + j + 1:end - w + j + 1]
        var_points[w:-w] += points[w - j - 1:end - w - j - 1]**2
        var_points[w:-w] += points[w + j + 1:end - w + j + 1]**2

        # Use the loop to treat the two points at a distance j from boundaries
        if j < w and j > 0 and boundary == "shrink":
            avg_points[j] = np.sum(points[0:2*j + 1], axis=0) / (2*j + 1)
            var_points[j] = (np.sum(points[0:2*j + 1]**2, axis=0)
                    - avg_points[j]**2 * (2*j + 1)) / (2*j + 1 - ddof)
            avg_points[-j - 1] = np.sum(points[-2*j - 1:], axis=0) / (2*j + 1)
            var_points[-j - 1] = (np.sum(points[-2*j - 1:]**2, axis=0)
                    - avg_points[-j - 1]**2 * (2*j + 1)) / (2*j + 1 - ddof)
        elif j < w and boundary == "free":
            avg_points[j] = np.sum(points[0:j + w + 1], axis=0) / (j + w + 1)
            var_points[j] = (np.sum(points[0:j + w + 1]**2, axis=0)
                    - avg_points[j]**2 * (j + w + 1)) / (j + w + 1 - ddof)
            avg_points[-j - 1] = np.sum(points[-j - w - 1:], axis=0) / (j + w + 1)
            var_points[-j - 1] = (np.sum(points[-j - w - 1:]**2, axis=0)
                    - avg_points[-j - 1]**2 * (j + w + 1)) / (j + w + 1 - ddof)

    # Normalize the middle points by kernelsize - ddof
    avg_points[w:end - w] /= kernelsize
    var_points[w:end - w] /= (kernelsize - ddof)

    # Set the edge points to the nearest full point if boundary is no flux
    if boundary == "noflux":
        var_points[:w] = var_points[w]
        var_points[-w:] = var_points[-w]

    # Then subtract the average squared, taking ddof into account once
    var_points[w:end - w] -= (avg_points[w:end - w]**2
                                * kernelsize / (kernelsize - ddof))

    return var_points

# Panel A: schematic of the numerical experiment

# Panel B: performance versus olfactory space dimension

In [None]:
# Load saved statistics
all_jacs_stats = pd.read_hdf(os.path.join(data_folder, "jaccard_similarities_stats_dimensionality_identity.hdf"), key="df")
all_dists_stats = pd.read_hdf(os.path.join(data_folder, "new_mix_distances_stats_dimensionality_identity.hdf"), key="df")
animals_ns = {"Fly": 50.0, "Human": 300.0, "Mouse": 1000.0}

In [None]:
# Plots for two new odor concentrations? Or just one, keep the four tested concs. for supplementary. 
average_conc = np.sort(all_jacs_stats.index.get_level_values("new_conc").unique())[1]
n_new_concs = 1
keep_conc = np.sort(all_jacs_stats.index.get_level_values("new_conc").unique())[1:n_new_concs+1]
ns_range = np.sort(all_jacs_stats.index.get_level_values("N_S").unique())
fig, axes = plt.subplots(1, n_new_concs, sharex=True, sharey=True)
if n_new_concs == 1: axes = [axes]
else: axes = axes.flatten()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*1.2, plt.rcParams["figure.figsize"][1])

# Order models according to the line order (best first)
show_models = ["optimal", "orthogonal", "ibcm", "biopca", "avgsub", "none", "random"]
model_zorder = ["none", "avgsub", "random", "optimal", "orthogonal",  "biopca", "ibcm"]
model_linestyles = {show_models[i]:neuron_styles[i % 6] for i in range(len(show_models))}
model_linestyles["ibcm"], model_linestyles["optimal"] = "-", model_linestyles["ibcm"]
for m in show_models[::-1]:  # Plot IBCM last
    for i in range(n_new_concs):
        new_conc = keep_conc[i]
        lower = (all_jacs_stats.loc[(m, ns_range, new_conc), "mean"] 
                 - np.sqrt(all_jacs_stats.loc[(m, ns_range, new_conc), "var"])).clip(lower=0.0)
        upper = (all_jacs_stats.loc[(m, ns_range, new_conc), "mean"] 
                 + np.sqrt(all_jacs_stats.loc[(m, ns_range, new_conc), "var"])).clip(upper=1.0)
        axes[i].fill_between(ns_range, lower, upper, color=model_colors.get(m), alpha=0.25)
for m in show_models:
    for i in range(n_new_concs):
        new_conc = keep_conc[i]
        axes[i].plot(ns_range, all_jacs_stats.loc[(m, ns_range, new_conc), "mean"], 
            label=model_nice_names.get(m, m), color=model_colors.get(m), alpha=1.0, 
            ls=model_linestyles[m], zorder=model_zorder.index(m) + 20
        )
# Labeling the graphs, adding similarity between random odors, etc.
for i in range(n_new_concs):
    axes[i].set_title(r"New odor concentration $= \langle c \rangle$".format(int(keep_conc[i]/average_conc)))
    axes[i].set_xlabel(r"OSN space dimensionality, $N_\mathrm{S}$")
    axes[i].set_ylabel("Mean Jaccard similarity")
    ylim = axes[i].get_ylim()
    axes[i].set_ylim([ylim[0], 1.05])
    axes[i].set_xscale("log")
leg_title = "Model"
axes[-1].legend(loc="upper left", bbox_to_anchor=(0.98, 1.), frameon=False, 
                title=leg_title, borderaxespad=0.0, handlelength=1.5)
for ani in animals_ns:
    for i in range(n_new_concs):
        axes[i].axvline(animals_ns[ani], ls=":", color="k", lw=0.5, zorder=0)
        #axes[i].annotate(ani, (animals_ns[ani]*0.98, 1.05), ha="right", va="top", fontsize=6)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "jaccard_vs_dimension_oneconc.pdf"),
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Panel C: performance versus new odor concentration
Or use two panels for performance versus $N_\mathrm{S}$, keeping performance vs concentration for supplementary information. 

In [None]:
# Plots for two new odor concentrations? Or just one, keep the four tested concs. for supplementary. 
chosen_ns = 50  # Fly case
new_concs = np.sort(all_jacs_stats.index.get_level_values("new_conc").unique())
new_concs_multiples = np.round(new_concs / average_conc, 1)
fig, ax = plt.subplots()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*0.88, plt.rcParams["figure.figsize"][1])

# Order models according to the line order (best first)
show_models = ["optimal", "orthogonal", "ibcm", "biopca", "avgsub", "none", "random"]
model_zorder = ["none", "avgsub", "random", "optimal", "orthogonal",  "biopca", "ibcm"]
model_linestyles = {show_models[i]:neuron_styles[i % 6] for i in range(len(show_models))}
model_linestyles["ibcm"], model_linestyles["optimal"] = "-", model_linestyles["ibcm"]
for m in show_models[::-1]:  # Plot IBCM last
    lower = (all_jacs_stats.loc[(m, chosen_ns, new_concs), "mean"] 
             - np.sqrt(all_jacs_stats.loc[(m, chosen_ns, new_concs), "var"])).clip(lower=0.0)
    upper = (all_jacs_stats.loc[(m, chosen_ns, new_concs), "mean"] 
             + np.sqrt(all_jacs_stats.loc[(m, chosen_ns, new_concs), "var"])).clip(upper=1.0)
    ax.fill_between(new_concs_multiples, lower, upper, color=model_colors.get(m), alpha=0.25)
for m in show_models:
    ax.plot(new_concs_multiples, all_jacs_stats.loc[(m, chosen_ns, new_concs), "mean"], 
        label=model_nice_names.get(m, m), color=model_colors.get(m), alpha=1.0, 
        ls=model_linestyles[m], zorder=model_zorder.index(m) + 20
    )
# Labeling the graphs, adding similarity between random odors, etc.
ns_animals = {v:k for k, v in animals_ns.items()}
ax.set_title(r"OSN dimension $N_\mathrm{S}" + " = {0:d}$ ({1})".format(chosen_ns, ns_animals[chosen_ns]))
ax.set_xlabel(r"New odor conc. (multiple of $\langle c \rangle$)")
ax.set_ylabel("Mean Jaccard similarity")
ylim = ax.get_ylim()
ax.set_ylim([ylim[0], 1.05])
leg_title = "Model"
#ax.legend(loc="upper left", bbox_to_anchor=(0.98, 1.), frameon=False, 
#                title=leg_title, borderaxespad=0.0, handlelength=1.5)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "jaccard_vs_newconc_onedim.pdf"),
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Supplementary panels for several new concentrations or dimensions

In [None]:
# Supplementary version with multiple panels for several OSN space sizes
new_concs = np.sort(all_jacs_stats.index.get_level_values("new_conc").unique())
ns_range = [25, 50, 75, 100, 300, 600, 1000]
ncols = 4
nrows = len(ns_range) // ncols + min(1, len(ns_range) % ncols)

fig, axes = plt.subplots(nrows, ncols, sharex=True, sharey=True)
axes = axes.flatten()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*ncols * 0.8, 
                    plt.rcParams["figure.figsize"][1] * nrows * 0.8)

# Order models according to the line order (best first)
show_models = ["optimal", "orthogonal", "ibcm", "biopca", "avgsub", "none", "random"]
model_zorder = ["none", "avgsub", "random", "optimal", "orthogonal",  "biopca", "ibcm"]
model_linestyles = {show_models[i]:neuron_styles[i % 6] for i in range(len(show_models))}
model_linestyles["ibcm"], model_linestyles["optimal"] = "-", model_linestyles["ibcm"]
for m in show_models[::-1]:  # Plot IBCM last
    for i in range(len(ns_range)):
        ns = ns_range[i]
        lower = (all_jacs_stats.loc[(m, ns, new_concs), "mean"] 
                 - np.sqrt(all_jacs_stats.loc[(m, ns, new_concs), "var"])).clip(lower=0.0)
        upper = (all_jacs_stats.loc[(m, ns, new_concs), "mean"] 
                 + np.sqrt(all_jacs_stats.loc[(m, ns, new_concs), "var"])).clip(upper=1.0)
        axes[i].fill_between(new_concs, lower, upper, color=model_colors.get(m), alpha=0.25)
for m in show_models:
    for i in range(len(ns_range)):
        ns = ns_range[i]
        axes[i].plot(new_concs, all_jacs_stats.loc[(m, ns, new_concs), "mean"], 
            label=model_nice_names.get(m, m), color=model_colors.get(m), alpha=1.0, 
            ls=model_linestyles[m], zorder=model_zorder.index(m) + 20
        )
# Labeling the graphs, adding similarity between random odors, etc.
ns_animals = {v:k for k, v in animals_ns.items()}
for i in range(len(ns_range)):
    ns = ns_range[i]
    ti = r"$N_{\mathrm{S}} = {:d}$".format(ns)
    if ns in ns_animals:
        ti += " (" + ns_animals[ns] + ")"
    axes[i].set_title(ti, y=0.85)
    if nrows*ncols - i <= ncols:
        axes[i].set_xlabel(r"New concentration $c$")
    axes[i].set_ylabel("Mean Jaccard similarity")
    ylim = axes[i].get_ylim()
    axes[i].set_ylim([ylim[0], 1.05])
for i in range(len(ns_range), ncols*nrows):
    axes[i].set_axis_off()
handles, labels = axes[0].get_legend_handles_labels()
leg_title = "Model"
axes[-1].legend(handles, labels, loc="center", bbox_to_anchor=(0.5, 0.5), frameon=False, 
                title=leg_title, borderaxespad=0.0, handlelength=1.5)
fig.tight_layout()
fig.savefig(os.path.join(panels_folder, "supp_jaccard_vs_newconc_alldims.pdf"),
            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Plots of distance to new odor for all odor concentrations, for supplementary figures. 
n_new_concs = 4
keep_conc = np.sort(all_dists_stats.index.get_level_values("new_conc").unique())[0:n_new_concs]
ns_range = np.sort(all_dists_stats.index.get_level_values("N_S").unique())
fig, axes = plt.subplots(2, n_new_concs // 2, sharex=True, sharey=True)
if n_new_concs == 1: axes = [axes]
else: axes = axes.flatten()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*1.75, plt.rcParams["figure.figsize"][1]*1.75)

# Order models according to the line order (best first)
show_models = ["optimal", "orthogonal", "ibcm", "biopca", "avgsub", "none", "random"]
model_zorder = ["none", "avgsub", "random", "optimal", "orthogonal",  "biopca", "ibcm"]
model_linestyles = {show_models[i]:neuron_styles[i % 6] for i in range(len(show_models))}
model_linestyles["ibcm"], model_linestyles["optimal"] = "-", model_linestyles["ibcm"]
for m in show_models[::-1]:  # Plot IBCM last
    for i in range(n_new_concs):
        new_conc = keep_conc[i]
        lower = (all_dists_stats.loc[(m, ns_range, new_conc), "mean"] 
                 - np.sqrt(all_dists_stats.loc[(m, ns_range, new_conc), "var"])).clip(lower=0.0)
        upper = (all_dists_stats.loc[(m, ns_range, new_conc), "mean"] 
                 + np.sqrt(all_dists_stats.loc[(m, ns_range, new_conc), "var"])).clip(upper=1.0)
        axes[i].fill_between(ns_range, lower, upper, color=model_colors.get(m), alpha=0.25)
for m in show_models:
    for i in range(n_new_concs):
        new_conc = keep_conc[i]
        axes[i].plot(ns_range, all_dists_stats.loc[(m, ns_range, new_conc), "mean"], 
            label=model_nice_names.get(m, m), color=model_colors.get(m), alpha=1.0, 
            ls=model_linestyles[m], zorder=model_zorder.index(m) + 20
        )
# Labeling the graphs, adding similarity between random odors, etc.
for i in range(n_new_concs):
    axes[i].set_title(r"New conc.$= {:.1f} \langle c \rangle$".format(keep_conc[i] / average_conc), y=1.0)
    if i >= 2:
        axes[i].set_xlabel(r"OSN space dimensionality, $N_\mathrm{S}$")
    axes[i].set_ylabel(r"Mean dist. $\langle\|y_{\mathrm{new}} - y_{\mathrm{mix}}\|\rangle$")
    axes[i].set_xscale("log")
    axes[i].set_yscale("log")
leg_title = "Model"
axes[-1].legend(loc="lower left",  frameon=False, ncols=2, title=leg_title, 
                borderaxespad=0.0, handlelength=1.5, alignment="left")
for ani in animals_ns:
    for i in range(n_new_concs):
        axes[i].axvline(animals_ns[ani], ls=":", color="k", lw=0.5, zorder=0)
        axes[i].annotate(ani, (animals_ns[ani]*0.95, 1.05), ha="right", va="bottom", fontsize=6)

fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "supp_distance_vs_dimension_allconcs.pdf"),
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Supplementary panel: background inhibition for various models
Which color set to use? The model colors (IBCM, BioPCA, average subtraction, etc.). So IBCM here is turquoise. 

Which metric to plot? I think we should just plot the activity norm, $\sqrt{\vec{s}^T \vec{s}}$
Which metric to plot? I think we should just plot $\sqrt{\langle \vec{s}^T \vec{s} \rangle}$, the RMS norm, compared to the background's, $\sqrt{\langle \vec{x}_B^T \vec{x}_B \rangle}$, to illustrate both the reduction in the average and the reduction in the fluctuations. This is better than plotting the average norm ($\langle \sqrt{\vec{s}^T \vec{s}} \rangle$). 

In [None]:
# Variance time series
ex_s = np.load(os.path.join(data_folder, "sser_norm_turbulent_model_comparison.npz"))
show_models = ["none", "avgsub", "biopca", "ibcm", "ideal"]
# Variance averaged over a time window
std_options = dict(kernelsize=1500, boundary="free")
std_series = {
    a: np.sqrt(moving_var(ex_s[a], **std_options)) for a in show_models
}
# For reference, the averaging time window in minutes
step_size = tser_example[1] - tser_example[0]
avg_time_min = std_options["kernelsize"] * dt_u / 1000 / 60 * step_size
print("Sliding time window length:", avg_time_min, "min")

In [None]:
fig, ax = plt.subplots()
for mod in show_models:
    ax.plot(tser_example*dt_u/1000/60, ex_s[mod], label=model_nice_names[mod], 
           color=model_colors[mod], lw=0.5)
ax.set(xlabel="Time (min)")
ax.set_ylabel(r"PN activity norm, $\|\vec{s}\|$", labelpad=4)
ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1]*1.2])
ax.legend(frameon=False, title="Habituation model", ncol=2)
#fig.savefig(os.path.join(panels_folder, "sser_norm_turbulent_model_comparison.pdf"), 
#            transparent=True, bbox_inches="tight")
fig.tight_layout()
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots()
for mod in show_models:
    ax.plot(tser_example*dt_u/1000/60, std_series[mod], label=model_nice_names[mod], 
           color=model_colors[mod])
ax.set(xlabel="Time (min)")
ax.set_ylabel(r"PN norm st. dev., $\sigma_{\|\vec{s}\|}$", labelpad=4)
ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1]*1.6])
ax.legend(frameon=False, title="Habituation model", ncol=2)
fig.tight_layout()
#fig.savefig(os.path.join(panels_folder, "sser_norm_stdev_turbulent_model_comparison.pdf"), 
#            transparent=True, bbox_inches="tight")
plt.show()
plt.close()