# Spatial Pooler autorecurrence test

Here I test how SP react to reccurent input propagation.

In this test I take one $NxN$ SP and, starting from random input, sequentially propagate it's output to an input in order to find out:

- conditions, when it's prone to converge into static pattern or pattern oscillations
- how to measure this property

In [None]:
import gym
import gym_minigrid as mg
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output
from time import sleep

%matplotlib inline

import random
import numpy as np
import sys

from htm.bindings.algorithms import SpatialPooler, Classifier
from htm.bindings.sdr import SDR, Metrics

from htm_rl.htm_plugins.temporal_memory import TemporalMemory
from htm_rl.common.int_sdr_encoder import IntSdrEncoder

In [None]:
from collections import deque, Counter

def count_similar(x, a):
    return any(
        1
        for y in a
        if len(x & y) >= sim_threshold
    )

def update_similar_score(sparse_sdr):
    sparse_sdr = frozenset(sparse_sdr)
    similar_cnt = count_similar(sparse_sdr, timeline)
    
    uniques[sparse_sdr] += 1
    if len(timeline) > 0:
        leftmost = timeline[0]
        uniques[leftmost] -= 1
        if uniques[leftmost] == 0:
            del uniques[leftmost]
    timeline.append(sparse_sdr)
    uniques_hist.append(len(uniques))
    
    ma = similars_ma[-1]
    if len(similars) == similars.maxlen:
        ma -= similars.popleft()
    ma += similar_cnt
    similars.append(similar_cnt)
    similars_ma.append(ma)

n_ = 32
n = n_**2
act_threshold = 18
sim_threshold = 17
sparsity = act_threshold / n
seed = 42
np.random.seed(seed)

sdr = SDR(n)
sp = SpatialPooler(
    inputDimensions=[n], columnDimensions=[n], globalInhibition=True, potentialPct=.4, localAreaDensity=sparsity,
    stimulusThreshold=1, synPermConnected=.4, synPermActiveInc=.1, synPermInactiveDec=.01, boostStrength=4.,
    dutyCyclePeriod=int(np.sqrt(act_threshold)/sparsity), minPctOverlapDutyCycle=sparsity/2,
    seed=seed, 
)
sdr.randomize(sparsity=sparsity, seed=seed)
dense_sdr = np.zeros(n, dtype=np.uint8)

initial_sdr = sdr
sdr = SDR(n)
sdr.sparse = initial_sdr.sparse

t = n
print(f'T={t}')

sdr_stats = Metrics(sdr, t*10)
hist_len = max(1000, t)
timeline, similars, similars_ma, uniques, uniques_hist = deque([], t), deque([], t), deque([0], hist_len), Counter(), deque([], hist_len)
for i in range(t*20):
    sp.compute(sdr, learn=True, output=sdr)
    
for i in range(hist_len*3):
    sp.compute(sdr, learn=True, output=sdr)
    update_similar_score(sdr.sparse)
    
print(sdr_stats.activationFrequency)

# Moving avg probability to see almost the same output for sliding window T. Almost the same == overlap score at least `sim_threshold`
_, (ax1, ax2) = plt.subplots(2, 1, figsize=(20, 9))

ax1.set_title(f'Moving avg probability to see almost the same output (=overlap score {sim_threshold} out of {act_threshold}) for sliding window T={t}')
xs = np.arange(len(similars_ma))
ys = np.array(similars_ma) * 100 / t
ax1.plot(xs, ys)
ax1.set_ylim(-10, 110)
print(f'Similar count mean, %: {ys.mean()}')

# Ratio of exactly unique outputs for sliding window T
ax2.set_title(f'Ratio of exactly unique outputs for sliding window T={t}')
xs = np.arange(len(uniques_hist))
ys = np.array(uniques_hist) * 100 / t
ax2.set_ylim(-10, 110)
_ = ax2.plot(xs, ys)

In [None]:
def to_square(dense_sdr):
    return dense_sdr.reshape((n_, n_))

def plot_vec(sparse_sdr, ax):
    dense_sdr[sparse_sdr] = 1
    ax.imshow(to_square(dense_sdr))
    ax.set_axis_off()
    dense_sdr[sparse_sdr] = 0

def plot_timeline(nrows=6, ncols=12, learn=False):
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*2, nrows*2))

    for row in range(nrows):
        for col in range(ncols):
            sp.compute(sdr, learn=learn, output=sdr)
            plot_vec(sdr.sparse, ax=axes[row][col])

n_ = 15
n = n_**2
act_threshold = 10
sim_threshold = 8
sparsity = act_threshold / n
seed = 42
np.random.seed(seed)

sdr = SDR(n)
sp = SpatialPooler(
    inputDimensions=[n], columnDimensions=[n], globalInhibition=True, potentialPct=.4, localAreaDensity=sparsity,
    stimulusThreshold=1, synPermConnected=.4, synPermActiveInc=.1, synPermInactiveDec=.01, boostStrength=2.,
    dutyCyclePeriod=int(np.sqrt(act_threshold)/sparsity), minPctOverlapDutyCycle=sparsity/5,
    seed=seed, 
)
sdr.randomize(sparsity=sparsity, seed=seed)
dense_sdr = np.zeros(n, dtype=np.uint8)

initial_sdr = sdr
sdr = SDR(n)
sdr.sparse = initial_sdr.sparse

t = n
print(f'T={t}')

sdr_stats = Metrics(sdr, t*10)
hist_len = max(1000, t)
timeline, similars, similars_ma, uniques, uniques_hist = deque([], t), deque([], t), deque([0], hist_len), Counter(), deque([], hist_len)
for i in range(t*20):
    sp.compute(sdr, learn=True, output=sdr)
#     sdr.addNoise(1.2 / act_threshold)
    
for i in range(hist_len*3):
    sp.compute(sdr, learn=True, output=sdr)
    update_similar_score(sdr.sparse)
    
print(sdr_stats.activationFrequency)

# Moving avg probability to see almost the same output for sliding window T. Almost the same == overlap score at least `sim_threshold`
_, (ax1, ax2) = plt.subplots(2, 1, figsize=(20, 9))

ax1.set_title(f'Moving avg probability to see almost the same output (=overlap score {sim_threshold} out of {act_threshold}) for sliding window T={t}')
xs = np.arange(len(similars_ma))
ys = np.array(similars_ma) * 100 / t
ax1.plot(xs, ys)
ax1.set_ylim(-10, 110)
print(f'Similar count mean, %: {ys.mean()}')

# Ratio of exactly unique outputs for sliding window T
ax2.set_title(f'Ratio of exactly unique outputs for sliding window T={t}')
xs = np.arange(len(uniques_hist))
ys = np.array(uniques_hist) * 100 / t
ax2.set_ylim(-10, 110)
_ = ax2.plot(xs, ys)

plot_timeline(nrows=4, ncols=10, learn=False)

Some observations:

- sparsity
  - important, but I didn't get relation
  - less is better (KO)


- `potentialPct`
  - sparsity among potential synapses intialization
  - very important, less is better until X, also faster
  - for this particular case
    - X loosely depends on stimulusThreshold
    - I think 0.2-0.4 is a very promising range
  - but before I used 0.8 and everything was ok too


- `synPermActiveInc`/`synPermActiveDec`
  - delta for learning
  - high decrement can break things
 

- `boostStrength`
  - important only if unstable setup
  - otherwise 2-4 is ok,.. even 1, which is "no boost"
  

- `dutyCyclePeriod`, `minPctOverlapDutyCycle`
  - `dutyCyclePeriod` affects max delay for entropy bumping, more important
  - `minPctOverlapDutyCycle` affects how many cols are affected, less important
  - equation for the 1st looks creepy, but it should be read like this:
    - how frequent we want re-check entropy?
    - ideally every col fires every $1 / sparsity)$ steps
    - e.g. 400 cells, 2% sparsity (=8 cells), then with perfectly random output all cells will be fired after 400/8 = 50 steps = 1 / (2%)
    - but in reality input is not uniformly random ==> output is not perfectly random
    - so it's highly likely that after 50 steps there're a lot of cells that aren't fired, i.e. statistics is probably skewed
    - that's why it's wise to increase this period x2-x10. Sqrt(activation_threshold) is a balanced value inbetween.


- actual parameters used in this notebook __are good only for this special case__
  - they can be used as starting point in real cases
  - these params are optimised for forcing SP to raise entropy to the max
  - I haven't measured how it's achieved
    - either by breaking static patterns and short oscillations, i.e. SP learns a function with high order of periodicity
    - or by continually changing synapses permanence bumping low-firing cols
  - but I'm almost sure that the effect of the latter is not neglible
  - so in reality you should adapt parameters in order to reduce high-entropy pressure