# Non-orthogonality of odor vectors

Illustrate that the background vectors and resulting mixture are non-negative and non-orthogonal; also compute the average dot product between two vectors with exponentially-distributed elements afterwise unit-normed. 

## Imports

In [None]:
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pandas as pd
from time import perf_counter
import os, json, sys
if ".." not in sys.path:
    sys.path.insert(1, "..")
from os.path import join as pj

from modelfcts.average_sub import integrate_inhib_average_sub_skip

from modelfcts.ideal import (
    find_projector, 
    find_parallel_component, 
    ideal_linear_inhibitor, 
    compute_ideal_factor
)
from modelfcts.checktools import (
    analyze_pca_learning, 
    check_conc_samples_powerlaw_exp1
)
from modelfcts.backgrounds import (
    update_powerlaw_times_concs,
    sample_ss_conc_powerlaw,
    sample_ss_mixed_concs_powerlaw,
    generate_odorant, 
    generate_gamma_odorant
)
from utils.statistics import seed_from_gen
from modelfcts.distribs import (
    truncexp1_average,
    powerlaw_cutoff_inverse_transform
)
from utils.metrics import jaccard, l2_norm

### Aesthetic parameters

In [None]:
do_save_plots = True

root_dir = pj("..")
panels_folder = pj(root_dir, "figures", "powerlaw")
params_folder = pj(root_dir, "results", "common_params")

# rcParams
plt.rcParams["figure.figsize"] = (4.5, 3.0)
with open(pj(params_folder, "olfaction_rcparams.json"), "r") as f:
    new_rcParams = json.load(f)
plt.rcParams.update(new_rcParams)

# color maps
with open(pj(params_folder, "back_colors.json"), "r") as f:
    all_back_colors = json.load(f)
back_color = all_back_colors["back_color"]
back_color_samples = all_back_colors["back_color_samples"]
back_palette = all_back_colors["back_palette"]

with open(pj(params_folder, "orn_colors.json"), "r") as f:
    orn_colors = json.load(f)
    
with open(pj(params_folder, "inhibitory_neuron_two_colors.json"), "r") as f:
    neuron_colors = np.asarray(json.load(f))
with open(pj(params_folder, "inhibitory_neuron_full_colors.json"), "r") as f:
    neuron_colors_full24 = np.asarray(json.load(f))
# Here, 32 neurons, need to make a new palette with same parameters
neuron_colors_full = np.asarray(sns.husl_palette(n_colors=32, h=0.01, s=0.9, l=0.4, as_cmap=False))

with open(pj(params_folder, "model_colors.json"), "r") as f:
    model_colors = json.load(f)
with open(pj(params_folder, "model_nice_names.json"), "r") as f:
    model_nice_names = json.load(f)

models = list(model_colors.keys())
print(models)
    
models = list(model_colors.keys())

### Initialization

In [None]:
# Initialize common simulation parameters
n_dimensions = 50  # Fly dimensions
n_components = 4  # Number of background odors

inhib_rates = [5e-5, 1e-5]  # alpha, beta  [0.00025, 0.00005]

# Simulation duration
duration = 360000.0
deltat = 1.0
n_chunks = 1
skp = 50 * int(1.0 / deltat)

# Common model options
activ_function = "identity"  # "ReLU"

# Background process
update_fct = update_powerlaw_times_concs

# Choose randomly generated background vectors
# This seed gave nicely spread out odors easier to learn 0xe329714605b83365e67b44ed7e001ec
# Another random seed: 0xb7bf767bbad297aeeee19d0ccdc3647e
rgen_meta = np.random.default_rng(seed=0x47cf767aaab807aeeee19d0cfdc3629c)
back_components = np.zeros([n_components, n_dimensions])
for i in range(n_components):
    back_components[i] = generate_odorant(n_dimensions, rgen_meta, lambda_in=0.1)
back_components = back_components / l2_norm(back_components).reshape(-1, 1)

# Seed for background simulation, to make sure all models are the same
simul_seed = seed_from_gen(rgen_meta)

# Turbulent background parameters: same rates and constants for all odors
back_params = [
    np.asarray([1.0] * n_components),        # whiff_tmins
    np.asarray([500.] * n_components),       # whiff_tmaxs
    np.asarray([1.0] * n_components),        # blank_tmins
    np.asarray([800.0] * n_components),      # blank_tmaxs
    np.asarray([0.6] * n_components),        # c0s
    np.asarray([0.5] * n_components),        # alphas
]

# Compute mean of independent underlying variables, 
# to determine the mean and target covariance of mixed variables
tblo, tbhi, twlo, twhi = back_params[2], back_params[3], back_params[0], back_params[1]
whiffprob = np.mean(1.0 / (1.0 + np.sqrt(tblo*tbhi/twlo/twhi)))
avg_whiff_conc = np.mean(truncexp1_average(*back_params[4:6]))
mean_conc = whiffprob * avg_whiff_conc  # average time in whiffs vs blanks * average whiff conc
print("Analytical mean conc:", mean_conc)
#print("Numerical mean conc:", mean_conc_empirical)

# Then add background odor vectors last to that list
back_params.append(back_components)

# Initial values of background process variables (t, c for each variable)
init_concs = sample_ss_conc_powerlaw(*back_params[:-1], size=1, rgen=rgen_meta)
init_times = powerlaw_cutoff_inverse_transform(
                rgen_meta.random(size=n_components), *back_params[2:4])
tc_init = np.stack([init_times, init_concs.squeeze()], axis=1)

# Initial background vector 
init_bkvec = tc_init[:, 1].dot(back_components)
# nus are first in the list of initial background params
init_back_list = [tc_init, init_bkvec]

## Background process example

In [None]:
# Run a dense simulation to extract mixed concentrations for
# global correl_rho chosen above (0.7)
# Dummy initialization
avg_options = {"activ_fct": activ_function}
init_synapses_avg = np.zeros([1, n_dimensions])

sim_avg_res = integrate_inhib_average_sub_skip(
                init_synapses_avg, update_fct, init_back_list, 
                [], inhib_rates, back_params, duration, deltat,
                seed=simul_seed, noisetype="uniform", skp=1, **avg_options
)

_, bkser_avg, bkvecser_avg, _, _ = sim_avg_res
del sim_avg_res

In [None]:
# Background vectors time series with mixed concentrations
tslice = slice(0, 50000, 200)
n_cols = 6
n_plots = n_dimensions // 4  # Only show first 24 OSNs
n_rows = n_plots // n_cols + min(1, n_plots % n_cols)
fig, axes = plt.subplots(n_rows, n_cols, sharex=True, sharey=True)
fig.set_size_inches(n_cols*1.25, n_rows*1.25)
for i in range(n_plots):
    ax = axes.flat[i]
    ax.scatter(bkvecser_avg[tslice, 2*i+1], bkvecser_avg[tslice, 2*i], 
               s=9, alpha=0.5, color="k")
    for j in range(n_components):
        ax.plot(*zip([0.0, 0.0], 3.0*back_components[j, 2*i:2*i+2:][::-1]), lw=2.0)
    ax.set(xlabel="OSN {}".format(2*i+2), ylabel="OSN {}".format(2*i+1))
for i in range(n_plots, n_rows*n_cols):
    axes.flat[i].set_axis_off()
fig.tight_layout()
if do_save_plots:
    fig.savefig(pj("..", "figures", "correlation", "osn_background_vectors.pdf"), 
               transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Recognition of odor-specific tag elements vs distance to background

Reanalyzing the simulation results with a different metric, which measures the fraction of new odor tag elements (Kenyon cells) not present in the background sample's tag which are preserved in the tag for the response to the mixture. Habituation should emphasize these tag KCs while potentially removing elements common with background vectors. 

$$ F(z_\mathrm{mix}, z_\mathrm{new}; z_\mathrm{back}) = \frac{\mathrm{card}(z_\mathrm{mix} \cup z_\mathrm{new} / z_\mathrm{back})}{\mathrm{card}(z_\mathrm{new} / z_\mathrm{back})} $$

where $x/y$ denotes the difference between sets $x$ and $y$. 

In [None]:
# Load computed F across models, backgrounds, new odor samples.
data_folder = pj("..", "results", "for_plots")
all_frac_tags_file = np.load(pj(data_folder, "frac_distinct_tag_elements_identity.npz"))
available_models = list(all_frac_tags_file.keys())

# Plot as a function of Euclidean distance between new odor and background
new_back_dists = np.load(pj(data_folder, "new_back_distances_identity.npz"))["new_back_distances"]

# New odor concentrations
all_dists = np.load(pj(data_folder, "new_mix_distances_identity.npz"))
new_concs = all_dists["new_concs"]
rel_new_concs = [a/new_concs[1] for a in new_concs]
del all_dists

assert len(new_concs) == all_frac_tags_file["ibcm"].shape[3]

In [None]:
show_models = ["none", "avgsub", "biopca", "ibcm", "optimal"]
assert np.all([a in available_models for a in show_models]), "missing model"

# Sort scores per pair of odors and per new odor concentration
# Concatenated scores shaped [n_background, n_new_odors, n_times, n_new_concs, n_back_samples]
# new_back_dists shaped background, new_odor
# Just need median along axes 2, 4 to get one per [back, conc, new] triplet
scores_per_triplet = {}
for m in show_models:
    scores_per_triplet[m] = np.median(all_frac_tags_file[m], axis=(2, 4))

In [None]:
# Warning: with kdeplot, this figure is slow to generate, taking around 1 min
fig, axes = plt.subplots(1, 2)
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*1.8, plt.rcParams["figure.figsize"][1]*1.1)
ax = axes.flatten()
plot_type = "kde"  # or "kde"
if plot_type == "kde":
    for i, c in enumerate(rel_new_concs):
        ax = axes[i]
        data = pd.concat({m:pd.DataFrame(np.stack([scores_per_triplet[m][:, :, i].flatten(), 
                            new_back_dists.flatten()], axis=1), columns=pd.Index(["jaccard", "new-back"]))
                        for m in show_models}, names=["Model"])
        data = data.rename(model_nice_names, level="Model")
        model_nice_colors = {model_nice_names[m]:model_colors[m] for m in show_models}
        g = sns.kdeplot(data=data.reset_index(), x="new-back", y="jaccard", hue="Model", 
                palette=model_nice_colors, ax=ax, fill=True, alpha=0.5, legend=(i==0))
        g.set_title(r"New conc. $= {:.1f} \langle c \rangle$".format(c), y=0.98)
        if i == 0:
            old_legend = g.get_legend()
            handles = old_legend.legend_handles
            labels = [t.get_text() for t in old_legend.get_texts()]
            g.legend(handles[::-1], labels[::-1], title=old_legend.get_title().get_text())
            sns.move_legend(g, frameon=False, loc="upper left")
            #g.get_legend().set(labels=[model_nice_names[m] for m in show_models[::-1]])
            
        
elif plot_type == "scatter":
    for i, c in enumerate(rel_new_concs):
        ax = axes[i]
        for m in show_models:
            scores_m_i = scores_per_triplet[m][:, :, i]
            xy = np.stack([new_back_dists.flatten(), scores_m_i.flatten()], axis=0)
            xy = np.unique(xy, axis=1)  # Find unique (x, y) pairs
            ax.scatter(xy[0], xy[1], color=model_colors[m], label=model_nice_names[m], s=0.9, alpha=0.1)
            ax.set_title(r"New conc.: $= {:.1f} \langle c \rangle$".format(c), y=0.98)
    # Legend
    handles, labels = axes[1].get_legend_handles_labels()
    handles_new = []
    for h in handles:
        #handles_new.append(mpl.patches.Patch(facecolor=h.get_facecolor(), edgecolor=h.get_facecolor()))
        handles_new.append(mpl.lines.Line2D([0], [0], marker='o', color=h.get_facecolor(), 
                              markerfacecolor=h.get_facecolor(), markersize=4, alpha=1.0, ls="none"))
    axes[1].legend(handles_new[::-1], labels[::-1], fontsize=6, frameon=False, handlelength=1.0, handletextpad=0.3, 
              borderaxespad=0.3, labelspacing=0.3)


xlbl = r"New odor orthogonal part, $\|\mathbf{s}_\mathrm{new, \perp}\|$"
axes[0].set(xlabel=xlbl, ylabel="Fraction distinct tag elements\n" 
                   + r"$F(z_{\mathrm{mix}}, z_{\mathrm{new}}; z_{\mathrm{back}})$")
axes[1].set(xlabel=xlbl, ylabel=None)


fig.tight_layout()
if do_save_plots:
    fig.savefig(pj(panels_folder, "frac_distinct_kc_new_odor_distance_{}.pdf".format(plot_type)), 
                dpi=300, transparent=True, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# One plot across concentrations now?
# Warning: with kdeplot, this figure is slow to generate, taking around 1 min
fig, ax = plt.subplots()
fig.set_size_inches(plt.rcParams["figure.figsize"][0]*0.9, plt.rcParams["figure.figsize"][1]*1.1)
plot_type = "scatter"  # or "kde"
if plot_type == "kde":
    data = pd.concat({m:pd.DataFrame(np.stack([np.median(scores_per_triplet[m], axis=2).flatten(), 
                        new_back_dists.flatten()], axis=1), columns=pd.Index(["jaccard", "new-back"]))
                    for m in show_models}, names=["Model"])
    data = data.rename(model_nice_names, level="Model")
    model_nice_colors = {model_nice_names[m]:model_colors[m] for m in show_models}
    g = sns.kdeplot(data=data.reset_index(), x="new-back", y="jaccard", hue="Model", 
            palette=model_nice_colors, ax=ax, fill=True, alpha=0.5)
    g.set_title(r"New conc. $= {:.1f} \langle c \rangle$".format(c), y=0.98)
    old_legend = g.get_legend()
    handles = old_legend.legend_handles
    labels = [t.get_text() for t in old_legend.get_texts()]
    g.legend(handles[::-1], labels[::-1], title=old_legend.get_title().get_text())
    sns.move_legend(g, frameon=False, loc="upper left")
    #g.get_legend().set(labels=[model_nice_names[m] for m in show_models[::-1]])

        
elif plot_type == "scatter":
    for m in show_models:
        scores_m_i = np.median(scores_per_triplet[m], axis=2)
        xy = np.stack([new_back_dists.flatten(), scores_m_i.flatten()], axis=0)
        xy = np.unique(xy, axis=1)  # Find unique (x, y) pairs
        ax.scatter(xy[0], xy[1], color=model_colors[m], label=model_nice_names[m], s=0.9, alpha=0.1)
        ax.set_title(r"New conc.: $= {:.1f} \langle c \rangle$".format(c), y=0.98)
    # Legend
    handles, labels = ax.get_legend_handles_labels()
    handles_new = []
    for h in handles:
        #handles_new.append(mpl.patches.Patch(facecolor=h.get_facecolor(), edgecolor=h.get_facecolor()))
        handles_new.append(mpl.lines.Line2D([0], [0], marker='o', color=h.get_facecolor(), 
                              markerfacecolor=h.get_facecolor(), markersize=4, alpha=1.0, ls="none"))
    ax.legend(handles_new[::-1], labels[::-1], fontsize=6, frameon=False, handlelength=1.0, handletextpad=0.3, 
              borderaxespad=0.3, labelspacing=0.3)


xlbl = r"New odor orthogonal part, $\|\mathbf{s}_\mathrm{new, \perp}\|$"
ax.set(xlabel=xlbl, ylabel="Fraction distinct tag elements\n" 
                   + r"$F(z_{\mathrm{mix}}, z_{\mathrm{new}}; z_{\mathrm{back}})$")
ax.set(xlabel=xlbl, ylabel=None)



fig.tight_layout()
#if do_save_plots:
#    fig.savefig(pj(panels_folder, "frac_distinct_kc_new_odor_distance_oneplot_{}.pdf".format(plot_type)), 
#                dpi=300, transparent=True, bbox_inches="tight")
plt.show()
plt.close()

# Average dot product between random odors
Generate a bunch of odors and bootstrap the average dot product

In [None]:
# Bootstrap by sampling s_i and s_j with replacement, computing s_i.dot(s_j)
n_samp = int(1e5)
samp_size = int(1e4)
n_boot = 10000
boot_dots = np.zeros(n_boot)

start_t = perf_counter()
odors = generate_odorant((n_samp, n_dimensions), rgen_meta)
odors = odors / l2_norm(odors, axis=1)[:, None]
for i in range(n_boot):
    od_choice_i = rgen_meta.choice(n_samp, size=samp_size, replace=True)
    od_choice_j = rgen_meta.choice(n_samp, size=samp_size, replace=True)
    odors_sample_i = odors[od_choice_i]
    odors_sample_j = odors[od_choice_j]
    dotprods = np.sum(odors_sample_i * odors_sample_j, axis=1)
    boot_dots[i] = np.mean(dotprods)
end_t = perf_counter()
print("Finished {} bootstrap repeats with {} samples each in {:.2f} s".format(
    n_boot, samp_size, end_t - start_t))

In [None]:
mean_dot = np.mean(boot_dots)
vari_dot = np.var(boot_dots, ddof=1)  # unbiased estimator
print("Mean dot product:", mean_dot)
print("Standard dev.:", np.sqrt(vari_dot))

In [None]:
# Compare to generating new odors every time
n_boot2 = n_boot // 2
samp_size2 = samp_size // 2
boot_dots2 = np.zeros(n_boot2)
start_t = perf_counter()
for i in range(n_boot2):
    odors_sample_i = generate_odorant((samp_size2, n_dimensions), rgen_meta)
    odors_sample_i = odors_sample_i / l2_norm(odors_sample_i, axis=1)[:, None]
    odors_sample_j = generate_odorant((samp_size2, n_dimensions), rgen_meta)
    odors_sample_j = odors_sample_j / l2_norm(odors_sample_j, axis=1)[:, None]
    dotprods = np.sum(odors_sample_i * odors_sample_j, axis=1)
    boot_dots2[i] = np.mean(dotprods)
end_t = perf_counter()
print("Finished {} bootstrap repeats with {} samples each in {:.2f} s".format(
    n_boot2, samp_size2, end_t - start_t))

In [None]:
mean_dot2 = np.mean(boot_dots2)
vari_dot2 = np.var(boot_dots2, ddof=1)  # unbiased estimator
print("Mean dot product:", mean_dot2)
print("Standard dev. of estimate:", np.sqrt(vari_dot2))

# Same drill, but for gamma-distributed vector elements

In [None]:
# Compare to generating new odors every time
start_t = perf_counter()

boot_dots_gam = np.zeros(n_boot2)
mean_norms_gam = np.zeros(n_boot2)
samp_size3 = samp_size2 // 2
for i in range(n_boot2):
    odors_sample_i = generate_gamma_odorant((samp_size3, n_dimensions), rgen_meta)
    odors_sample_j = generate_gamma_odorant((samp_size3, n_dimensions), rgen_meta)
    dotprods = np.sum(odors_sample_i * odors_sample_j, axis=1)
    norms_ij = np.concatenate([l2_norm(odors_sample_i, axis=1), l2_norm(odors_sample_j, axis=1)])
    boot_dots_gam[i] = np.mean(dotprods)
    mean_norms_gam[i] = np.mean(norms_ij)
end_t = perf_counter()
print("Finished {} bootstrap repeats with {} samples each in {:.2f} s".format(
    n_boot2, samp_size3, end_t - start_t))

In [None]:
mean_dot_gam = np.mean(boot_dots_gam)
vari_dot_gam = np.var(boot_dots_gam, ddof=1)  # unbiased estimator
print("Mean dot product:", mean_dot_gam)
print("Standard dev. of estimate:", np.sqrt(vari_dot_gam))

# Compare to average vector norm 
mean_norm_gam = np.mean(mean_norms_gam)
vari_norm_gam = np.var(mean_norms_gam, ddof=1)
print("Mean vector norm:", mean_norm_gam)
print("Standard dev. of estimate:", np.sqrt(vari_norm_gam))