# Information content of IBCM vs PCA neurons
Simulate a background with, say, $n_B = 3$ odors, and compute the MI between odor concentrations and inhibitory neurons, $MI(\vec{\nu}, \vec{c})$. Select $n_B$ neurons: for IBCM, one specific to each odor, and for PCA, one per PC. Then test the impact of removing some neurons, check only one neuron (allows to compare with sklearn's implementation of MI estimation, too). 

Using my own implementation of the Kraskov et al., 2004, binless MI estimator. 

## Imports

In [None]:
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import os, json
import sys
sys.path.insert(1, "../")

from modelfcts.ibcm import (
    integrate_inhib_ibcm_network_options,
    compute_mbars_cgammas_cbargammas
)
from modelfcts.ibcm_analytics import (
    fixedpoint_thirdmoment_exact, 
    ibcm_fixedpoint_w_thirdmoment, 
)
from modelfcts.biopca import (
    integrate_inhib_ifpsp_network_skip,
    build_lambda_matrix,
)
from modelfcts.checktools import (
    analyze_pca_learning
)
from modelfcts.backgrounds import (
    update_powerlaw_times_concs, 
    sample_ss_conc_powerlaw, 
    generate_odorant
)
from utils.statistics import seed_from_gen
from modelfcts.distribs import (
    powerlaw_cutoff_inverse_transform
)
from simulfcts.plotting import (
    plot_cbars_gamma_series, 
    plot_w_matrix, 
    plot_background_norm_inhibition, 
    plot_background_neurons_inhibition, 
    plot_pca_results, 
)
from simulfcts.analysis import compute_back_reduction_stats
from utils.metrics import l2_norm

from utils.continuous_mi_estimator import (
    kraskov_continuous_info, 
    kraskov_differential_entropy
)
from utils.random_matrices import random_orthogonal_mat

### Aesthetic parameters

In [None]:
#plt.style.use(['dark_background'])
plt.rcParams["figure.figsize"] = (4.5, 3.0)

In [None]:
models = ["ibcm", "biopca", "avgsub", "ideal", "orthogonal", "none"]
model_nice_names = {
    "ibcm": "IBCM",
    "biopca": "BioPCA",
    "avgsub": "Average",
    "ideal": "Ideal",
    "orthogonal": "Orthogonal",
    "none": "None"
}
model_colors = {
    "ibcm": "xkcd:turquoise",
    "biopca": "xkcd:orangey brown",
    "avgsub": "xkcd:navy blue",
    "ideal": "xkcd:powder blue",
    "orthogonal": "xkcd:pale rose",
    "none": "grey"
}

### Initialization

In [None]:
# Initialize common simulation parameters
n_dimensions = 25  # Half the real number for faster simulations
n_components = 3  # Number of background odors

inhib_rates = [0.0001, 0.00002]  # alpha, beta  [0.00025, 0.00005]

# Simulation duration
duration = 600000.0  # long simulation need more samples
deltat = 1.0
n_chunks = 10
skp = 10 * int(1.0 / deltat)

# Common model options
activ_function = "identity"  #"ReLU"

# Background process
update_fct = update_powerlaw_times_concs

# Choose randomly generated background vectors
rgen_meta = np.random.default_rng(seed=0x8896ce0154295ba29df7e93dc277af2d)
#rgen_meta = np.random.default_rng(seed=0x85dfce01542492a29df7e93dc277ad2d)
back_components = np.zeros([n_components, n_dimensions])
for i in range(n_components):
    back_components[i] = generate_odorant(n_dimensions, rgen_meta, lambda_in=0.1)
back_components = back_components / l2_norm(back_components).reshape(-1, 1)

# Seed for background simulation, to make sure all models are the same
simul_seed = seed_from_gen(rgen_meta)

# Turbulent background parameters: same rates and constants for all odors
back_params = [
    np.asarray([1.0] * n_components),        # whiff_tmins
    np.asarray([500.] * n_components),       # whiff_tmaxs
    np.asarray([1.0] * n_components),        # blank_tmins
    np.asarray([800.0] * n_components),      # blank_tmaxs
    np.asarray([0.6] * n_components),        # c0s
    np.asarray([0.5] * n_components),        # alphas
]
back_params.append(back_components)

# Initial values of background process variables (t, c for each variable)
init_concs = sample_ss_conc_powerlaw(*back_params[:-1], size=1, rgen=rgen_meta)
init_times = powerlaw_cutoff_inverse_transform(
                rgen_meta.random(size=n_components), *back_params[2:4])
tc_init = np.stack([init_times, init_concs.squeeze()], axis=1)

# Initial background vector 
init_bkvec = tc_init[:, 1].dot(back_components)
# nus are first in the list of initial background params
init_back_list = [tc_init, init_bkvec]

### Pairwise similarity between background odors
Determines how well-posed the PCA is and how easy it is for the IBCM model to disentangle odors


## IBCM habituation
### IBCM simulation

In [None]:
# IBCM model parameters
n_i_ibcm = 12  # Number of inhibitory neurons for IBCM case

# Model rates
learnrate_ibcm = 0.0008   #5e-5
tau_avg_ibcm = 1600  # 2000
coupling_eta_ibcm = 0.6/n_i_ibcm
ssat_ibcm = 50.0
k_c2bar_avg = 0.1
decay_relative_ibcm = 0.005
lambd_ibcm = 1.0
ibcm_rates = [
    learnrate_ibcm, 
    tau_avg_ibcm, 
    coupling_eta_ibcm, 
    lambd_ibcm,
    ssat_ibcm, 
    k_c2bar_avg,
    decay_relative_ibcm 
]
ibcm_options = {
    "activ_fct": activ_function, 
    "saturation": "tanh", 
    "variant": "law", 
    "decay": True
}

# Initial synaptic weights: small positive noise
init_synapses_ibcm = 0.3*rgen_meta.standard_normal(size=[n_i_ibcm, n_dimensions])*lambd_ibcm

In [None]:
def recompute_theta_series(cbser, tau, dt):
    theta = np.zeros([cbser.shape[0], cbser.shape[1]])
    theta[0] = cbser[0]**2
    for i in range(cbser.shape[0]-1):
        theta[i+1] = theta[i] + dt/tau*(cbser[i]*cbser[i] - theta[i])
    return theta

In [None]:
# Run the IBCM simulation
sim_results = integrate_inhib_ibcm_network_options(
            init_synapses_ibcm, update_fct, init_back_list, 
            ibcm_rates, inhib_rates, back_params, duration, 
            deltat, seed=simul_seed, noisetype="uniform",  
            skp=skp, **ibcm_options
)

# Concatenate
[tser_ibcm,
 nuser_ibcm,
 bkvecser_ibcm,
 mser_ibcm,
 cbarser_ibcm,
 thetaser_ibcm,
 wser_ibcm,
 yser_ibcm] = sim_results

### IBCM habituation analysis

In [None]:
# Calculate cgammas_bar and mbars
transient = int(5/6*duration / deltat) // skp
# Dot products \bar{c}_{\gamma} = \bar{\vec{m}} \cdot \vec{x}_{\gamma}
mbarser, c_gammas, cbars_gamma = compute_mbars_cgammas_cbargammas(
                                    mser_ibcm, coupling_eta_ibcm, back_components)
sums_cbars_gamma = np.sum(cbars_gamma, axis=2)
sums_cbars_gamma2 = np.sum(cbars_gamma*cbars_gamma, axis=2)

# Analytical prediction, exact: need moments of nu. Easiest to compute numerically. 
conc_ser = nuser_ibcm[:, :, 1]
# Odors are all iid so we can average over all odors
mean_conc = np.mean(conc_ser)
sigma2_conc = np.var(conc_ser)
thirdmom_conc = np.mean((conc_ser - mean_conc)**3)
moments_conc = [mean_conc, sigma2_conc, thirdmom_conc]

# Analytical prediction
res = fixedpoint_thirdmoment_exact(moments_conc, 1, n_components-1, lambd=lambd_ibcm)
c_specif, c_nonspecif = res[:2]
cs_cn = res[:2]

# Count how many dot products are at each possible value. Use cbar = 1.0 as a split. 
split_val = 2.0 * lambd_ibcm
cbars_gamma_mean = np.mean(cbars_gamma[transient:], axis=0)
cgammas_bar_counts = {"above": int(np.sum(cbars_gamma_mean.flatten() > split_val)), 
                      "below": int(np.sum(cbars_gamma_mean.flatten() <= split_val))}
print(cgammas_bar_counts)

specif_gammas = np.argmax(np.mean(cbars_gamma[transient:], axis=0), axis=1)
print(specif_gammas)

# Analytical W
analytical_w = ibcm_fixedpoint_w_thirdmoment(inhib_rates, moments_conc, back_components, cs_cn, specif_gammas)

In [None]:
fig, ax = plt.subplots()
#ax.plot(tser_ibcm[:300], nuser_ibcm[:300, :, 1])
neurons_cmap = sns.color_palette("Greys", n_colors=n_i_ibcm)
for i in range(n_i_ibcm):
    ax.plot(tser_ibcm/1000, thetaser_ibcm[:, i], lw=0.5, color=neurons_cmap[i])
ax.set(xlabel="Time (x1000 steps)", ylabel=r"$\bar{\Theta} = \bar{c}^2$ moving average")
plt.show()
plt.close()

In [None]:
fig , ax, _ = plot_cbars_gamma_series(tser_ibcm, cbars_gamma, 
                        skp=20, transient=320000 // skp)
# Compare to exact analytical fixed point solution
#ax.set_xlim([350, 360])
ax.axhline(c_specif, ls="--", color="grey", 
           label=r"Analytical $\bar{c}_{\gamma=\mathrm{specific}}$")
ax.axhline(c_nonspecif, ls="--", color="grey", 
           label=r"Analytical $\bar{c}_{\gamma=\mathrm{non}}$")
fig.tight_layout()
leg = ax.legend(loc="upper left", bbox_to_anchor=(1., 1.))

#fig.savefig("figures/powerlaw/cbargammas_series_turbulent_background_example.pdf", 
#            transparent=True, bbox_inches="tight", bbox_extra_artists=(leg,))
plt.show()
plt.close()

In [None]:
# Correlation between nu's and c's, see if some neurons are specific to odors
# Each neuron turns out to correlate its response to  one concentration
# that means it is specific to that odor. 
cbarser_norm_centered = cbarser_ibcm - np.mean(cbarser_ibcm[transient:], axis=0)
conc_ser_centered = (nuser_ibcm[:, :, 1] 
                     - np.mean(nuser_ibcm[transient:, :, 1], axis=0))
correl_c_nu = np.mean(cbarser_norm_centered[transient:, :, None] 
                      * conc_ser_centered[transient:, None, :], axis=0)

fig, ax = plt.subplots()
img = ax.imshow(correl_c_nu.T)
ax.set(ylabel=r"Component $\gamma$", xlabel=r"Neuron $i$")
fig.colorbar(img, label=r"$\langle (\bar{c}^i - \langle \bar{c}^i \rangle)"
             r"(\nu_{\gamma} - \langle \nu_{\gamma} \rangle) \rangle$", 
            location="top")
fig.tight_layout()
plt.show()
plt.close()

# Check if each component has at least one neuron
for comp in range(n_components):
    print("Number of neurons specific to component {}: {}".format(
            comp, np.sum(np.mean(cbars_gamma[-2000:, :, comp], axis=0) > split_val*1.5)))

In [None]:
fig, ax, bknorm_ser, ynorm_ser = plot_background_norm_inhibition(
                                tser_ibcm, bkvecser_ibcm, yser_ibcm, skp=1)

# Compute noise reduction factor, annotate
transient = 100000 // skp
norm_stats = compute_back_reduction_stats(bknorm_ser, ynorm_ser, trans=transient)

print("Mean activity norm reduced to "
      + "{:.1f} % of input".format(norm_stats['avg_reduction'] * 100))
print("Standard deviation of activity norm reduced to "
      + "{:.1f} % of input".format(norm_stats['std_reduction'] * 100))
ax.annotate("St. dev. reduced to {:.1f} %".format(norm_stats['std_reduction'] * 100), 
           xy=(0.98, 0.98), xycoords="axes fraction", ha="right", va="top")

ax.legend(loc="center right", bbox_to_anchor=(1.0, 0.8))
fig.tight_layout()
plt.show()
plt.close()

In [None]:
fig, axes = plot_w_matrix(tser_ibcm, wser_ibcm, skp=100)
fig.tight_layout()
plt.show()
plt.close()

## BioPCA simulation
### BioPCA habituation simulation

Also try to choose a $\Lambda$ which creates blow-ups in $W$. 

Note that in the PCA model, as in IBCM, it is entirely $M$ which is scaled by $\Lambda$. The matrix $L' = L^{-1}$, which is returned by my integration function, has the PCA's principal values on its diagonal, irrespective of $\Lambda$. Meanwhile, $M$ is scaled with $\Lambda$ and $L'$, so $\Lambda^{-1} L'^{-1}M = U$, the matrix of orthonormal eigenvectors from the PCA (vectors are the rows of $U$), while the projector $LM$ still has scale $\Lambda_i$ for the eigenvector in row $i$.

In BioPCA, $\vec{\overline{c}} = LM \vec{x}$, so it also has scale $\Lambda$ in it. 

In [None]:
# BioPCA model parameters
n_i_pca = n_components*2  # Number of inhibitory neurons for BioPCA case

# Model rates
learnrate_pca = 1e-4  # Learning rate of M
# Choose Lambda diagonal matrix as advised in Minden et al., 2018
# but scale it up to counteract W regularization
lambda_range_pca = 0.5
lambda_max_pca = 12.0
# Learning rate of L, relative to learnrate. Adjusted to Lambda in the integration function
rel_lrate_pca = 2.0  #  / lambda_max_pca**2 
lambda_mat_diag = build_lambda_matrix(lambda_max_pca, lambda_range_pca, n_i_pca)

xavg_rate_pca = learnrate_pca
pca_options = {
    "activ_fct": activ_function, 
    "remove_lambda": False, 
    "remove_mean": True, 
    "w_norms": (2, 2)
}
biopca_rates = [learnrate_pca, rel_lrate_pca, lambda_max_pca, lambda_range_pca, xavg_rate_pca]


# Initial synaptic weights: small positive noise
rgen_pca = np.random.default_rng(seed=0x8b6664612cfeda4a121436fcfbbca449)
init_synapses_pca = rgen_pca.standard_normal(size=[n_i_pca, n_dimensions]) / np.sqrt(n_i_pca)
init_mmat_pca = rgen_pca.standard_normal(size=[n_i_pca, n_dimensions]) / np.sqrt(n_dimensions)
init_lmat_pca = np.eye(n_i_pca, n_i_pca)  # Supposed to be near-identity, start as identity
ml_inits_pca = [init_mmat_pca, init_lmat_pca]

In [None]:
# Run simulation
sim_results = integrate_inhib_ifpsp_network_skip(
                ml_inits_pca, update_fct, init_back_list, biopca_rates, 
                inhib_rates, back_params, duration, deltat, 
                seed=simul_seed, noisetype="uniform", skp=skp, **pca_options)
(tser_pca, 
 nuser_pca, 
 bkvecser_pca, 
 mser_pca, 
 lser_pca, 
 xser_pca, 
 cbarser_pca, 
 wser_pca, 
 yser_pca) = sim_results

### BioPCA simulation analysis

In [None]:
res = analyze_pca_learning(bkvecser_pca, mser_pca, lser_pca, 
                           lambda_mat_diag, demean=pca_options["remove_mean"])
true_pca, learnt_pca, fser, off_diag_l_avg_abs, align_error_ser = res

In [None]:
fig, axes = plot_pca_results(tser_pca/1000, true_pca, learnt_pca, align_error_ser, 
                             off_diag_l_avg_abs, skp=20)
axes[-1].set_xlabel("Time (x1000 steps)")
fig.set_size_inches(fig.get_size_inches()[0], 3*2.5)
plt.show()
plt.close()

In [None]:
fig, ax, bknorm_ser, ynorm_ser = plot_background_norm_inhibition(
                                tser_pca, bkvecser_pca, yser_pca, skp=10)

# Compute noise reduction factor, annotate
transient = 100000 // skp
norm_stats = compute_back_reduction_stats(bknorm_ser, ynorm_ser, trans=transient)

print("Mean activity norm reduced to "
      + "{:.1f} % of input".format(norm_stats['avg_reduction'] * 100))
print("Standard deviation of activity norm reduced to "
      + "{:.1f} % of input".format(norm_stats['std_reduction'] * 100))
ax.annotate("St. dev. reduced to {:.1f} %".format(norm_stats['std_reduction'] * 100), 
           xy=(0.98, 0.98), xycoords="axes fraction", ha="right", va="top")

ax.legend(loc="center right", bbox_to_anchor=(1.0, 0.8))
fig.tight_layout()
plt.show()
plt.close()

In [None]:
fig, axes, _ = plot_background_neurons_inhibition(tser_pca, bkvecser_pca, yser_pca, skp=10)
plt.show()
plt.close()

In [None]:
fig, axes = plot_w_matrix(tser_pca, wser_pca, skp=10)
plt.show()
plt.close()

## Compare to constant random orthogonal projections
Since the projection matrix $M$, as an orthogonal rotation matrix, would be invertible and constant, no information should be lost about the concentration. This should hence give perfect MI and thus tell us the optimum that could be achieved, which we can compare to PCA and IBCM. 

But of course, in reality, since the olfactory system only has access to $\vec{x}$ and not the concentrations themselves, it is not possible to use constant weights in the right subspace at the beginning, hence why learning models are necessary.  

In [None]:
mmat = random_orthogonal_mat(n_components, rng=rgen_meta)
c_projs = nuser_ibcm[:, :, 1].dot(mmat.T)

## MI estimation for both models
Select one neuron specific to each odor for IBCM; select the first $n_B$ PCA neurons for BioPCA. 

Don't forget to whiten concentrations and $\bar{c}$s so they have comparable scales. 

In [None]:
transient = 200000 // skp
# IBCM selected neurons
first_specif_neurons = [np.argmin(specif_gammas == i) for i in range(n_components)]
cser_ibcm_ss = cbarser_ibcm[transient:, first_specif_neurons]
c_ibcm_means = np.mean(cser_ibcm_ss, axis=0)
c_ibcm_stds = np.std(cser_ibcm_ss, axis=0)
cser_ibcm_normed = (cser_ibcm_ss - c_ibcm_means) / c_ibcm_stds

concser_ibcm_ss = nuser_ibcm[transient:, :, 1]
conc_ibcm_means = np.mean(concser_ibcm_ss, axis=0)
conc_ibcm_stds = np.std(concser_ibcm_ss, axis=0)
concser_ibcm_normed = (concser_ibcm_ss - conc_ibcm_means) / conc_ibcm_stds

# BioPCA selected neurons
cser_biopca_ss = cbarser_pca[transient:, :n_components]
c_biopca_means = np.mean(cser_biopca_ss, axis=0)
c_biopca_stds = np.std(cser_biopca_ss, axis=0)
cser_biopca_normed = (cser_biopca_ss - c_biopca_means) / c_biopca_stds

concser_biopca_ss = nuser_pca[transient:, :, 1]
conc_biopca_means = np.mean(concser_biopca_ss, axis=0)
conc_biopca_stds = np.std(concser_biopca_ss, axis=0)
concser_biopca_normed = (concser_biopca_ss - conc_biopca_means) / conc_biopca_stds

# Constant projection matrix
c_projs_ss = c_projs[transient:]
c_projs_mean = np.mean(c_projs_ss, axis=0)
c_projs_std = np.std(c_projs_ss, axis=0)
c_projs_normed = (c_projs_ss - c_projs_mean) / c_projs_std

In [None]:
estim_args = {
    "k": 10, 
    "version": 1, 
    "base": 2.0
}
kparam = 20
ver_choice = 1
print("Computing IBCM MI...")
mi_ibcm = kraskov_continuous_info(cser_ibcm_normed, concser_ibcm_normed, **estim_args)
print("Computing BioPCA MI...")
mi_biopca = kraskov_continuous_info(cser_biopca_normed, concser_biopca_normed, **estim_args)
print("Computing MI with constant projections...")
mi_projs = kraskov_continuous_info(c_projs_normed, concser_ibcm_normed, **estim_args)

In [None]:
# Compare to entropy estimate of concentrations, too
print("Computing IBCM concentration entropy...")
conc_entropy_ibcm = kraskov_differential_entropy(concser_ibcm_normed, **estim_args)
print("Computing BioPCA concentration entropy...")
conc_entropy_biopca = kraskov_differential_entropy(concser_biopca_normed, **estim_args)

In [None]:
print("IBCM MI:", mi_ibcm, "bits, compared to optimal MI:", mi_projs, "bits")
print("BioPCA MI:", mi_biopca, "bits, compared to optimal MI:", mi_projs, "bits")
if mi_ibcm < mi_biopca:
    print("Well, that's disappointing, but expected, since M fluctuates more in IBCM")