In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import sys
sys_byteorder = sys.byteorder

from sklearn.decomposition import FastICA, PCA

if ".." not in sys.path:
    sys.path.insert(1, "..")
from utils.statistics import seed_from_gen
from modelfcts.distribs import truncexp1_inverse_transform, truncexp1_density
from modelfcts.backgrounds import (update_tc_odor, logof10, sample_background_powerlaw,
                        sample_ss_conc_powerlaw, decompose_nonorthogonal_basis, update_alternating_inputs)
from modelfcts.checktools import check_conc_samples_powerlaw_exp1

In [None]:
n_dimensions = 4  # Number of ORNs = number of recorded mixtures
n_components = 3  # Number of independent components

# Random number generator
rgen_meta = np.random.default_rng(seed=0xcd905cb60bb4152747d486201735b136)

back_params_sym = [
    np.asarray([1.0] * n_components),        # whiff_tmins
    np.asarray([100.] * n_components),       # whiff_tmaxs
    np.asarray([2.0] * n_components),        # blank_tmins
    np.asarray([200.0] * n_components),      # blank_tmaxs
    np.asarray([0.3] * n_components),        # c0s
    np.asarray([0.5] * n_components),        # alphas
]
back_params_sym = np.asarray(back_params_sym).T

# Choose background odor components (mixing matrix columns)
back_components_sym = np.ones([n_components, n_dimensions]) * 0.2
#back_components_sym = rgen_meta.exponential(size=[n_components, n_dimensions])
for i in range(n_components):
    back_components_sym[i, i] = 0.8
    back_components_sym[i] /= np.sqrt(np.sum(back_components_sym[i]**2))
mixing_matrix = back_components_sym

In [None]:
mixing_matrix

In [None]:
# Generate time series of odor concentrations
# Initial conditions
n_steps = 100000
deltat = 1.0

tc_current = np.asarray([[2.0, 0.0], [1.0, 1.0], [0.0, 0.0]])
c_series = np.zeros([n_steps+1, n_components])
c_series[0] = tc_current[:, 1]
unif_noises = rgen_meta.uniform(size=[n_steps, n_components, 2])
for i in range(n_steps):
    for j in range(n_components):
        tc_current[j] = update_tc_odor(tc_current[j], deltat, unif_noises[i, j], *back_params_sym[j])
    c_series[i] = tc_current[:, 1]

In [None]:
# Check that the generated time series have the right distribution
fig, axes = check_conc_samples_powerlaw_exp1(c_series.T, *(back_params_sym.T))
plt.show()
plt.close()

In [None]:
# Check that the generated time series have the right distribution
fig, axes = plt.subplots(n_components, sharex=True)
trange = range(80000, 82000, 1)
tslice = slice(80000, 82000, 1)
for i in range(n_components):
    axes[i].plot(trange, c_series[tslice, i], color="k")
    axes[i].set(xlabel="Time", ylabel=r"Concentration $\nu$")
plt.show()
plt.close()

## Independent component analysis of the mixtures at the ORNs


In [None]:
mixtures = np.dot(c_series, mixing_matrix)  # Indexed time, ORN

In [None]:
fast_ica = FastICA(n_components=n_components, algorithm="parallel", whiten="unit-variance", 
                  random_state=seed_from_gen(rgen_meta, nbits=32)).fit(mixtures)

In [None]:
# Unmixing matrix: apply to mixtures.T to recover sources
unmix_results = fast_ica.components_.dot(mixtures.T)  # [source, ORN]x[time, ORN].T
fast_ica.components_

In [None]:
# Recovered components: normalize and compensate unmixing results
recovered_components = fast_ica.mixing_.copy().T  # [source, ORN]
for i in range(recovered_components.shape[0]):
    norm_of_comp = np.sqrt(np.sum(recovered_components[i]**2))
    recovered_components[i] /= norm_of_comp
    unmix_results[i] *= norm_of_comp
    if np.all(recovered_components[i] < 0):
        recovered_components[i] *= -1
        unmix_results[i] *= -1
print(recovered_components)
print(back_components_sym)

In [None]:
def find_permutation(shuffled, reference):
    """ Find permutation indices that reorder shuffled into reference. 
    I.e. find the indices where each row of reference is in shuffled, 
    so calling shuffled[indices] puts it back in order. 
    Do it by minimizing the squared difference between rows. 
    Greedy algorithm, may not be optimal if reconstruction
    is not very accurate. 
    """
    indices = np.arange(shuffled.shape[0]).astype(int)
    sum_axes = tuple(range(1, shuffled.ndim))
    for i in range(len(indices)):
        squared_dists = np.sum((shuffled - reference[i:i+1])**2, axis=sum_axes)
        indices[i] = np.argmin(squared_dists)
    return indices

In [None]:
permut_indices = find_permutation(recovered_components, back_components_sym)
recovered_components_ordered = recovered_components[permut_indices]
unmix_results_ordered = unmix_results[permut_indices]
print(recovered_components_ordered)

In [None]:
# OK, so ICA seems to recover the components and the concentrations over time, up to a sign. 
# Can I find out how it recovers the components (fast_ica.mixing_ matrix) and apply
# the same strategy to IBCM demixing? 
# Check that the generated time series have the right distribution
fig, axes = plt.subplots(n_components, sharex=True)
trange = range(80000, 82000, 1)
tslice = slice(80000, 82000, 1)
for i in range(n_components):
    axes[i].plot(trange, unmix_results_ordered[i, tslice], color="xkcd:purple")
    axes[i].plot(trange, c_series[tslice, i], color="xkcd:grey", ls=":")
    axes[i].set(xlabel="Time", ylabel=r"Concentration $\nu$")
plt.show()
plt.close()

# ICA works extremely well to decompose the background
This is true for independent odors, probably not for odors with some correlation (but I'm not sure how to code that for turbulent environments anyways for the moment). 

Two questions:
- Can it work equally well online? In real-time, can it identify the current concentration of each source?
    - Answer: yes, just apply the demixing matrix learnt up to now. 
- What does it do when a new source is presented?
    - It tries to interpret it as part of the signal of the background. But the demixing matrix is not going to project into the orthogonal subspace: it only reconstructs a projection in the background subspace. So it should achieve what we want. 
    
## How to use this
Two options. 
 1. Figure out how, from IBCM, we can extract the mixing matrix A for the concentration-like IBCM activations c found from the IBCM network. 
 2. Use someone else's algorithm, BioNICA, to inhibit the olfactory background.

Ideally, compare the two, and also online PCA. Hopefully, IBCM can work best when starting to combine odors non-linearly and with some correlation between them. 

# Detection of a new odor

- Learn an unmixing matrix from a long time series of background. 
- Then, try to unmix new samples that combine the background with a new odor. Also project back to the mixed space. 
- Compare to a projection in the background subspace: they should agree. 
- This means that by subtracting the demixed-remixed background+new odor, we are subtracting only the component int he background subspace. This is what we wanted to achieve. 

In [None]:
# Generate mixtures of the existing background with new odors
new_odor = np.roll(back_components_sym[0], shift=-1)  # Should be a new vector
typical_conc = np.mean(back_params_sym[:, -2]) * np.mean(back_params_sym[:, -1])

mix_samples, _ = sample_background_powerlaw(back_components_sym, *back_params_sym.T, size=1000, rgen=rgen_meta)
# mix_samples indexed [n_samples, n_orn]
mix_samples = mix_samples + 0.2 * new_odor.reshape(1, -1) * typical_conc

In [None]:
# Try demixing and projecting back
demixed_concs = fast_ica.components_.dot(mix_samples.T)  # indexed [source, n_sample]
recomposed_mixes = fast_ica.mixing_.dot(demixed_concs)  # [ORN, source]x[source, n_sample]

# inhibit
inhib_fraction = 5/6
inhibited_mix = mix_samples - inhib_fraction*recomposed_mixes.T
print(mix_samples)
print(inhibited_mix)
print(0.2*new_odor*typical_conc)
#print(recomposed_mixes)

# Check if only orthogonal part left: yes indeed! 
print(inhibited_mix[0].dot(back_components_sym[0]))

In [None]:
def l2_norm(vecs):
    """ Computes l2 norm of vectors stored along the last axis of vecs. 
    Args:
        vecs can be either a single vector (1d) or an  arbitrary array of vectors, 
            where the last dimension indexes elements of vectors. 
    
    Returns: if vecs is of shape (K x L x ... M x N), 
        returns an array of distances of shape (K x L x ... x M) 
    """
    return np.sqrt(np.sum(vecs**2, axis=-1))

def l1_norm(vecs):
    """ |x| = \sum_i |x_i|"""
    return np.sum(np.abs(vecs), axis=-1)

def linf_norm(vecs):
    """ |x| = max_i(|x_i|) """
    return np.max(np.abs(vecs), axis=-1)

def cosine_dist(x, y):
    """ d(x, y) = 1 - (x \cdot y)/(|x| |y|)"""
    xnorm, ynorm = l2_norm(x), l2_norm(y)
    return 1.0 - x.dot(np.moveaxis(y, -1, 0)) / xnorm / ynorm

def distance_panel_target(mixes, target):
    """ Compute a panel of distances between the pure (target) new odor and mixtures 
    (which can be without inhibition, with average inhibition, IBCM inhibition, etc.). 
    
    Four distances included, in order: l2, l1, linf, cosine_dist
    
    Args:
        mixes (np.ndarray): mixtures of odors to compute distance from target, 
            the last axis should have the size of target, 
            while other axes are arbitrary.  
        target (np.1darray): target odor vector, same length as
            last axis of mixes. 
    Returns:
        dist_panel (np.ndarray): shape of pure, except the last axis, 
            which has length 4 (for the number of distances computed). 
    """
    # Make axis 0 the axis indexing distance metrics, to begin with
    # And move it to the last axis before returning
    dist_array = np.zeros([4] + list(mixes.shape[:-1]))
    # No need to add axes to target vector; if it is 1d, it is broadcasted
    # along the last axis of mixes, which indexes elements of each vector. 
    dist_array[0] = l2_norm(target - mixes)
    dist_array[1] = l1_norm(target - mixes)
    dist_array[2] = linf_norm(target - mixes)
    dist_array[3] = cosine_dist(target, mixes)
    
    return np.moveaxis(dist_array, 0, -1)

In [None]:
# Statistics
dist_pure_inhib_none = distance_panel_target(mix_samples, 0.2* typical_conc*new_odor )
#dist_pure_inhib_avg = distance_panel_target(inhib_avg_samples, new_odor)
dist_pure_inhib_ica = distance_panel_target(inhibited_mix, 0.2*typical_conc*new_odor)

median_distances_none = np.median(dist_pure_inhib_none, axis=0)
#median_distances_avg = np.median(dist_pure_inhib_avg, axis=0)
median_distances_ica = np.median(dist_pure_inhib_ica, axis=0)

In [None]:
# Histogram of distance to pure odor, for each distance
# Overlay histogram for mix without and with inhibition
fig, axes = plt.subplots(2, 2)
axes = axes.flatten()
clr_none = "xkcd:navy blue"
clr_ibcm = "xkcd:turquoise"
clr_ica = "xkcd:cherry"
clr_avg = "xkcd:orangey brown"
dist_names = [r"$L^2$ distance", r"$L^1$ distance", r"$L^{\infty}$ distance", "Cosine distance"]
for i, ax in enumerate(axes):
    ax.hist(dist_pure_inhib_none[:, i], label="No inhibition", facecolor=clr_none, alpha=0.6, 
        edgecolor=clr_none, density=True)
    ax.axvline(median_distances_none[i], color=clr_none, ls="--", lw=2.0)
    #ax.hist(dist_pure_inhib_avg[:, i], label="Average inhibition", facecolor=clr_avg, alpha=0.6, 
    #    edgecolor=clr_avg, density=True)
    #ax.axvline(median_distances_avg[i], color=clr_avg, ls="--", lw=1.0)
    ax.hist(dist_pure_inhib_ica[:, i], label="ICA inhibition", facecolor=clr_ica, alpha=0.6, 
        edgecolor=clr_ica, density=True) 
    ax.axvline(median_distances_ica[i], color=clr_ica, ls="--", lw=2.0)
    ax.set(xlabel="Distance to new odor", ylabel="Probability density", title=dist_names[i])
axes[0].legend()
fig.tight_layout()
plt.show()
plt.close()

## Conclusion
ICA, offline, works really well. It does exactly what I expected.
Offline, other algorithms would probably work in the same manner: it's easy to compute some basis for the background vector subspace, and then decompose the incoming odor on that subspace. PCA would do the same; any orthogonal basis of a set of samples would do the same, really. 
    
What isn't obvious is how to then decompose the incoming odor on the basis; we need the 'demixing' matrix. Offile, that's a matrix inversion; what biological algorithm can do that online? 

If I can find a way, even approximate, to find the basis (A matrix, to project back) and the demixing matrix (basically the inverse of A), with IBCM, this is a worthy finding. 

Then, I could compare to BioNICA in terms of performance. I'm sure IBCM isn't going to do great, because it is not very efficient when there are long temporal correlations, it tends to diverge, it takes a long time to converge, etc. But at least in principle, it would be a meaningful finding. 



# Compare to PCA
PCA probably does not learn the odor components, but I'm curious to see how it can be used anyways to inhibit a background. Test the offline case in this notebook. For an online version, could use IncrementalPCA from sklearn. 


#### Assuming column vectors
Take the PCs as "odor" vectors, A. Then, "concentrations" of those odors are obtained by decomposing an ORN vector on that basis, which is done by inverting the equation 
$$ \vec{x} = A \vec{c} \Rightarrow \vec{c} = A^+ \vec{x} $$

Then, the decomposition can be reassembled on the basis of PCs by taking the dot product with $A$ again; in one step, this means that a vector $\vec{x}$ can be projected to the vector subspace spanned by $A$ using the projection $P = AA^+$: $A^+$ decomposes into 'concentrations' and $A$ combines back the basis vectors. 

#### With row vectors
Matrices act from the right, so reverse the order of matrices. Projector is $A^+ A$, so the row vector corresponding to the projection of a column vector $\vec{x}$ is obtained by taking $\vec{x}^T A^+ A$. 

In [None]:
pca_obj = PCA(n_components='mle').fit(mixtures)
# Here, since rows contain vectors, the matrix A, A^+ act from the right, e.g., conc.dot(A) assembles odors. 
pca_basis = pca_obj.components_  # mixing matrix: indexed [component, orn]: each row is a basis vector
pca_demixing = np.linalg.pinv(pca_basis)  # demixing matrix, pseudo-inverse of A. indexed [orn, component]
pca_projector = pca_demixing.dot(pca_basis)  # Will act from the right, shape [orn, orn]

In [None]:
# Try demixing and projecting back with PCA
demixed_pca_concs = mix_samples.dot(pca_demixing)  # indexed [n_sample, component]
recomposed_pca_mixes = demixed_pca_concs.dot(pca_basis)  # [n_sample, component]x[component, orn]

# inhibit
inhibited_pca_mix = mix_samples - inhib_fraction*recomposed_pca_mixes
print(inhibited_pca_mix)
print(0.2*new_odor*typical_conc)
#print(recomposed_mixes)

# Check if only orthogonal part left: yes indeed if inhib_fraction=1 ! 
print(inhibited_pca_mix[0].dot(back_components_sym[0]))

In [None]:
# Statistics
dist_pure_inhib_pca = distance_panel_target(inhibited_pca_mix, 0.2*typical_conc*new_odor)
median_distances_pca = np.median(dist_pure_inhib_pca, axis=0)

In [None]:
# Histogram of distance to pure odor, for each distance
# Overlay histogram for mix without and with inhibition
fig, axes = plt.subplots(2, 2)
axes = axes.flatten()
clr_pca = "xkcd:green"
dist_names = [r"$L^2$ distance", r"$L^1$ distance", r"$L^{\infty}$ distance", "Cosine distance"]
for i, ax in enumerate(axes):
    ax.hist(dist_pure_inhib_none[:, i], label="No inhibition", facecolor=clr_none, alpha=0.6, 
        edgecolor=clr_none, density=True)
    ax.axvline(median_distances_none[i], color=clr_none, ls="--", lw=2.0)
    ax.hist(dist_pure_inhib_pca[:, i], label="ICA inhibition", facecolor=clr_pca, alpha=0.6, 
        edgecolor=clr_pca, density=True) 
    ax.axvline(median_distances_pca[i], color=clr_pca, ls="--", lw=2.0)
    ax.set(xlabel="Distance to new odor", ylabel="Probability density", title=dist_names[i])
axes[0].legend()
fig.tight_layout()
plt.show()
plt.close()

## Conclusion on PCA
Both PCA and ICA give identical background inhibition in the learnt subspace. However, they learn different bases: PCA learns an orthogonal basis (principal components, obviously), while ICA can actually recover odor vectors and their concentrations, at least in the symmetric case and with independent concentrations, even though this basis is not orthogonal. Therefore, ICA is more interesting for its concrete relation to the problem studied, while PCA is mathematically simpler but more abstract. 

The big problem: both require computation of $A^+$ from $A$, or vice-versa, to demix or remix the concentrations into ORN activity. That's not a trivial operation to perform online with a neural network. 
But with the IBCM model, we would also need to do the same with the basis of synaptic weight vectors $\vec{m}$. We easily obtain concentrations by projecting $\vec{x}$ on the various $\vec{m}$, corresponding to rows of the matrix $W = A^+$, but we would need to invert to find the actual vector components to recombine in order to inhibit odors in ORN space. 

So, this means we need neurons computing the pseudo-inverse of the synaptic weights $\vec{m}$: not clear how. That's what I was trying with my $\vec{w}$ vectors (notational mishap: my $\vec{w}$ is really the $A$ matrix in Hyvarinen and Oja, not the $W$ matrix; their $W$ is my $M$, really. )