In [None]:
import numpy as np
import pandas as pd
import sys
if not "../" in sys.path:
    sys.path.insert(0, "../")
from modelfcts.ideal import find_projector, find_parallel_component
from utils.metrics import l2_norm

import matplotlib.pyplot as plt

In [None]:
set_mean = 10
indata = pd.read_csv('../data/hallem2006_TableS1_source.csv', index_col=0, header=0, encoding='utf-8')
normed = indata.T - indata.T.min()
normed = set_mean * normed/normed.mean()
normed.drop(['spontaneous firing rate'], axis=1, inplace=True)
normed

# Distribution of elements
Looks exponential, but there are correlations between odors, I'm sure, because dot products below do not, at all, follow the distribution of dot products between vectors made of iid exponential elements. 

In [None]:
fig, ax = plt.subplots()
ax.hist(normed.values.flatten(), bins="fd")
ax.set(xlabel="ORN activation", ylabel="Count")
plt.show()
plt.close()

# Study dot products

In [None]:
unitnormed = normed / np.sqrt((normed**2).sum(axis=0))
dot_products = unitnormed.T.dot(unitnormed)

In [None]:
dot_products_stats = dot_products.values[np.triu_indices(n=dot_products.shape[0], k=1)]

In [None]:
fig, ax = plt.subplots()
ax.hist(dot_products_stats, bins="fd")
ax.set(xlabel="Dot product", ylabel="Count")
plt.show()
plt.close()

# Conclusion: odors are not typically orthogonal! 
This is a big problem for the inhibition problem I have been considering, because the more orthogonal odors are, the better it works. 

The reason is the non-negativity of those vectors. Random non-negative vectors won't be orthogonal on average. And real odors tend to have similarity due to chemical similarities, so it's even worse here. 

Maybe by combining odors with Gautam's model, we can circumvent that problem, but for now, it's a serious issue. 

# Check random non-negative vectors
Uniformly random, then exponential random. 

In [None]:
rgen = np.random.default_rng(seed=0x5296381dd940500416b73afc46718ddf)

In [None]:
n_dims, n_odors = normed.shape
random_uniform_nn_vectors = rgen.random(size=(n_dims, n_odors))
random_uniform_nn_vectors /= np.sqrt(np.sum(random_uniform_nn_vectors**2, axis=0, keepdims=True))
dot_products_uniform = random_uniform_nn_vectors.T.dot(random_uniform_nn_vectors)
dot_products_uniform_stats = dot_products_uniform[np.triu_indices(n=n_odors, k=1)]

In [None]:
random_expo_nn_vectors = rgen.exponential(size=(n_dims, n_odors))
random_expo_nn_vectors /= np.sqrt(np.sum(random_expo_nn_vectors**2, axis=0, keepdims=True))
dot_products_expo = random_expo_nn_vectors.T.dot(random_expo_nn_vectors)
dot_products_expo_stats = dot_products_expo[np.triu_indices(n=n_odors, k=1)]

In [None]:
# Log-normal: depends a lot on the sigma parameter. 
# Small sigma: all vectors look alike, dot products are close to 1
# Large sigma: each vector has a few large components from the long tail, dot products are close to 0
#random_lognorm_nn_vectors = rgen.lognormal(sigma=1.0, size=(n_dims, n_odors))
#random_lognorm_nn_vectors /= np.sqrt(np.sum(random_lognorm_nn_vectors**2, axis=0, keepdims=True))
#dot_products_lognorm = random_lognorm_nn_vectors.T.dot(random_lognorm_nn_vectors)
#dot_products_lognorm_stats = dot_products_lognorm[np.triu_indices(n=n_odors, k=1)]

In [None]:
fig, ax = plt.subplots()
ax.hist(dot_products_stats, bins="fd", label="Data", alpha=0.7)
ax.hist(dot_products_uniform_stats, bins="fd", label="Uniform", alpha=0.7)
ax.hist(dot_products_expo_stats, bins="fd", label="Exponential", alpha=0.7)
#ax.hist(dot_products_lognorm_stats, bins="fd", label="Log-normal", alpha=0.7)
ax.set(xlabel="Dot product", ylabel="Count")
ax.legend()
plt.show()
plt.close()

# Distance between background and new odor 
Form backgrounds at random by choosing $n_B = 6$ odors, and compute the distance of new odors to that background (i.e. the length of the orthogonal component of the new odor). 

In [None]:
n_backs = 250
n_new = 40
n_b = 6
n_r = unitnormed.shape[0]
all_distances = np.zeros([n_backs, n_new])
all_distances_rand = np.zeros([n_backs, n_new])
for i in range(n_backs):
    # Avoid taking the same odor in background and new
    all_choices = rgen.choice(unitnormed.shape[1], size=n_b + n_new, replace=False)
    back_vecs = unitnormed.iloc[:, all_choices[:n_b]]
    projector = find_projector(back_vecs)
    x_new = unitnormed.iloc[:, all_choices[n_b:]].values
    # Also generate random odors
    back_vecs_rand = rgen.exponential(size=[n_r, n_b])
    back_vecs_rand = back_vecs_rand / l2_norm(back_vecs_rand, axis=0)
    projector_rand = find_projector(back_vecs_rand)
    x_new_rand = rgen.exponential(size=[n_r, n_new])
    x_new_rand = x_new_rand / l2_norm(x_new_rand, axis=0)
    for j in range(n_new):
        x_par = find_parallel_component(x_new[:, j], back_vecs, projector)
        all_distances[i, j] = l2_norm(x_new[:, j] - x_par, axis=0)
        x_par_rand = find_parallel_component(x_new_rand[:, j], back_vecs_rand, projector_rand)
        all_distances_rand[i, j] = l2_norm(x_new_rand[:, j] - x_par_rand, axis=0)

In [None]:
fig, ax = plt.subplots()
ax.hist(all_distances.flatten(), density=True, bins="doane", label="Data")
ax.hist(all_distances_rand.flatten(), density=True, bins="doane", color="green", label="Exponential")
ax.set(xlabel="Distance background-new odor", ylabel="Probability density")
ax.legend()
plt.show()
plt.close()