# Implement Spatial Pooler

Данная тетрадь содержит задачу реализации Spatial Pooler'а.

Для начала посмотри эпизоды 0-8 видео гайда [HTMSchool](https://www.youtube.com/watch?v=XMB0ri4qgwc&list=PL3yXMgtrZmDqhsFQzwUC9V8MeeVOQ7eZ9).

## 01. Getting ready

Данная секция содержит:

- [опционально] установка `htm.core`
- импорт необходимых пакетов (убедись, что все они установлены)
- загрузка датасета

### HTM.Core

Если у тебя не установлен пакет `htm.core`, раскомментируй и запусти следующую ячейку. В случае проблем, обратись к официальной странице пакета на [гитхабе](https://github.com/htm-community/htm.core#installation) и проверь требуемые зависимости.

In [None]:
# !python -m pip install -i https://test.pypi.org/simple/ htm.core

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from scipy.sparse import csr_matrix

from htm.bindings.algorithms import SpatialPooler
from htm.bindings.sdr import SDR, Metrics

%matplotlib inline
    
seed = 1337

### Load data

Следующая ячейка загружает датасет MNIST (займет порядка 10-20 сек).

In [None]:
def load_ds(name, num_test, shape=None):
    """ 
    fetch dataset from openML.org and split to train/test
    @param name - ID on openML (eg. 'mnist_784')
    @param num_test - num. samples to take as test
    @param shape - new reshape of a single data point (ie data['data'][0]) as a list. Eg. [28,28] for MNIST
    """
    data = fetch_openml(name, version=1)
    sz=data['target'].shape[0]

    X = data['data']
    if shape is not None:
        new_shape = shape.insert(0, sz)
        X = np.reshape(X, shape)

    y = data['target'].astype(np.int32)
    # split to train/test data
    train_labels = y[:sz-num_test]
    train_images = X[:sz-num_test]
    test_labels  = y[sz-num_test:]
    test_images  = X[sz-num_test:]

    return train_labels, train_images, test_labels, test_images


def shuffle_data(x, y):
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    x, y = np.array(x), np.array(y)
    return x[indices], y[indices]


train_labels, train_images, test_labels, test_images = load_ds('mnist_784', 10000, shape=[28,28])

np.random.seed(seed)
train_images, train_labels = shuffle_data(train_images, train_labels)
test_images, test_labels = shuffle_data(test_images, test_labels)

n_train_samples = train_images.shape[0]
n_test_samples = test_images.shape[0]
image_shape = train_images[0].shape
image_side = image_shape[0]
image_size = image_side ** 2


train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

Пример формата данных датасета

In [None]:
plt.imshow(train_images[0])
print(f'Label: {train_labels[0]}')
print(f'Image shape: {image_shape}')
print(f'Image middle row: {train_images[0][image_side//2]}')

Перекодируем датасет в бинарные изображения и дальше будем работать с бинарными данными.

In [None]:
def plot_flatten_image(flatten_image, image_height=28):
    plt.imshow(flatten_image.reshape((image_height, -1)))

def to_binary_flatten_images(images):
    n_samples = images.shape[0]
    # flatten every image to vector
    images = images.reshape((n_samples, -1))
    # binary encoding: each image pixel is encoded either 0 or 1 depending on that image mean value
    images = (images >= images.mean(axis=1, keepdims=True)).astype(np.int8)
    return images


train_images = to_binary_flatten_images(train_images)
test_images = to_binary_flatten_images(test_images)
plot_flatten_image(train_images[0])

## 02. Baseline: classifier on raw input

In [None]:
%%time

def test_bare_classification(x_tr,  y_tr, x_tst, y_tst):
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(x_tr, y_tr)
    
    score = linreg.predict(x_tst) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '%')
    return score

n = 1000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

# 87.3; 888ms
test_bare_classification(x_tr, y_tr, x_tst, y_tst)

## 03. Spatial Pooler: skeleton

In [None]:
class NoOpSpatialPooler:
    def __init__(self, input_size):
        self.input_size = input_size
        self.output_size = input_size
        
    def compute(self, dense_sdr, learn):
        return np.nonzero(dense_sdr)[0]
        

np.random.seed(seed)
sp = NoOpSpatialPooler(train_images[0].size)
sparse_sdr = sp.compute(train_images[0], True)

print(sparse_sdr.size, sp.output_size)
assert sparse_sdr.size < sp.output_size
sparse_sdr

## 04. Train/test SP performance

In [None]:
%%time

def pretrain_sp(sp, images, n_samples):
    for img in images[:n_samples]:
        sp.compute(img, True)
    
def encode_to_csr_with_sp(images, sp, learn):
    flatten_encoded_sdrs = []
    indptr = [0]
    for img in images:
        encoded_sparse_sdr = sp.compute(img, learn)
        flatten_encoded_sdrs.extend(encoded_sparse_sdr)
        indptr.append(len(flatten_encoded_sdrs))

    data = np.ones(len(flatten_encoded_sdrs))
    csr = csr_matrix((data, flatten_encoded_sdrs, indptr), shape=(images.shape[0], sp.output_size))
    return csr

def test_classification_with_sp(x_tr,  y_tr, x_tst, y_tst, sp):
    # a small pretrain SP before real work
    pretrain_sp(sp, x_tr, n_samples=1000)
    
    # encode images and continuously train SP
    csr = encode_to_csr_with_sp(x_tr, sp, learn=True)
    
    # train linreg
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(csr, y_tr)
    
    # encode test images (without SP learning) and then test score
    csr = encode_to_csr_with_sp(x_tst, sp, False)
    score = linreg.predict(csr) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '% for n =', len(x_tr))
    return score

n = 1000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]
my_sp = NoOpSpatialPooler(train_images[0].size)

# 87.3; 1.16s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, my_sp)

## 04. Spatial Pooler: learning

In [None]:
class LearnableSpatialPooler:
    def __init__(
        self, input_size, output_size, 
        permanence_threshold, sparsity_level, synapse_permanence_deltas, min_activation_threshold
    ):
        self.input_size = input_size
        self.output_size = output_size
        
        self.sparsity_level = sparsity_level
        # todo
        self.n_active_bits = int(self.output_size * sparsity_level)
        
        self.permanence_threshold = permanence_threshold
        self.synapse_permanence_increment, self.synapse_permanence_decrement = synapse_permanence_deltas
        self.min_activation_threshold = min_activation_threshold
        
        # initialization
        # todo all
        self.joint_shape = (output_size, input_size)
        self.receptive_fields = np.random.choice(2, size=self.joint_shape, p=[.2, .8])
        self.connections_permanence = np.random.uniform(size=self.joint_shape) * self.receptive_fields
        
        # remove
        self.dp = np.empty(input_size, dtype=np.float)
        
    def compute(self, dense_sdr, learn):
        dense_sdr = dense_sdr.astype(np.bool)
        active_cells = self.connections_permanence[:, dense_sdr] >= self.permanence_threshold
        overlaps = np.count_nonzero(active_cells, -1)
        
        activated_cols = np.argpartition(-overlaps, self.n_active_bits)[:self.n_active_bits]
        activated_cols = activated_cols[overlaps[activated_cols] >= self.min_activation_threshold]
        
        if learn:
            self._update_permanence(dense_sdr, activated_cols)

        return activated_cols
    
    def _update_permanence(self, sdr, columns):
        dp = self.dp
        dp[sdr] = self.synapse_permanence_increment
        dp[~sdr] = -self.synapse_permanence_decrement
        perm = self.connections_permanence[columns]
        perm = np.clip(perm + dp * self.receptive_fields[columns], 0, 1)

        
np.random.seed(seed)
sp = LearnableSpatialPooler(
    input_size=train_images[0].size,
    output_size=10**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .02),
    min_activation_threshold=4
)
sparse_sdr = sp.compute(train_images[0], True)

print(sparse_sdr.size, sp.output_size, sp.n_active_bits)
assert sparse_sdr.size == sp.n_active_bits
sparse_sdr

In [None]:
%%time

n = 1000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = LearnableSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4
)
# 80.2; 2.98 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
class BoostedSpatialPooler:
    def __init__(
        self, input_size, output_size,
        permanence_threshold, sparsity_level, synapse_permanence_deltas, min_activation_threshold=1, potenrial_synapses_p=.8,
        max_boost_factor=1.5, boost_sliding_window=(1000, 1000)
    ):
        self.input_size = input_size
        self.output_size = output_size
        self.joint_shape = (output_size, input_size)
        
        self.sparsity_level = sparsity_level
        self.n_active_bits = int(self.output_size * sparsity_level)
        
        self.permanence_threshold = permanence_threshold
        self.syn_perm_inc, self.syn_perm_dec = synapse_permanence_deltas
        self.min_activation_threshold = min_activation_threshold
        
        self.max_boost_factor = max_boost_factor
        self.activity_duty_cycle, self.overlap_duty_cycle = boost_sliding_window
        
        # init 
        self.receptive_fields = np.random.choice(2, size=self.joint_shape, p=[1-potenrial_synapses_p, potenrial_synapses_p])
        self.connections_permanence = np.random.uniform(size=self.joint_shape) * self.receptive_fields
        self.time_avg_activity = np.full(self.output_size, self.sparsity_level, dtype=np.float)
        self.time_avg_overlap = np.ones(self.output_size, dtype=np.float)
        self.dp = np.empty(input_size, dtype=np.float)
        self.boost = self._compute_boost()
        
    def compute(self, dense_sdr, learn):
        dense_sdr = dense_sdr.astype(np.bool)
        active_cells = self.connections_permanence[:, dense_sdr] >= self.permanence_threshold
        overlaps = np.count_nonzero(active_cells, -1) * self.boost
        
        activated_cols = np.argpartition(-overlaps, self.n_active_bits)[:self.n_active_bits]
        activated_cols = activated_cols[overlaps[activated_cols] >= self.min_activation_threshold]
        
        if learn:
            self._update_permanence(dense_sdr, activated_cols)
            self._update_activity_boost(activated_cols)
#             self._update_overlap_boost(dense_sdr, activated_cols, overlaps)

        return activated_cols
    
    def _update_permanence(self, dense_sdr, activated_cols):
        dp = self.dp
        dp[dense_sdr] = self.syn_perm_inc
        dp[~dense_sdr] = -self.syn_perm_dec
        perm = self.connections_permanence[activated_cols]
        perm = np.clip(perm + dp * self.receptive_fields[activated_cols], 0, 1)
        
    def _update_activity_boost(self, activated_cols):
        self.time_avg_activity *= (self.activity_duty_cycle - 1) / self.activity_duty_cycle
        self.time_avg_activity[activated_cols] += 1 / self.activity_duty_cycle
        self.boost = self._compute_boost()
        
    def _update_overlap_boost(self, x, rows, cols, overlaps):
        self.time_avg_overlap += (overlaps - self.time_avg_overlap) / self.overlap_duty_cycle
        k = int(.05 * self.output_size)
        to_boost_indices = np.argpartition(self.time_avg_overlap, k)[:k]
        to_boost = self.connections_permanence[to_boost_indices]
        to_boost = np.clip(to_boost + .1 * self.permanence_threshold, 0, 1)
        
    def _compute_boost(self):
        return np.exp(-self.max_boost_factor * (self.time_avg_activity - self.time_avg_activity.mean()))
        

np.random.seed(1337)
my_sp = BoostedSpatialPooler(train_images[0].size, 10**2, .5, .04, (.1, .02), 4, potenrial_synapses_p=.8)
my_sp.compute(train_images[0], True)

In [None]:
%%time

n = 1000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = BoostedSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4,
    max_boost_factor=3
)
# 84.0; 3.24 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
%%time

n = 1000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = BoostedSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4,
    potenrial_synapses_p=.4,
    max_boost_factor=3
)
# 84.0; 3.24 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

___

## TESTING

In [None]:
%%time

def test_bare_classification(x_tr,  y_tr, x_tst, y_tst):
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(x_tr, y_tr)
    
    score = linreg.predict(x_tst) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '%')
    return score

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

# 92.11; 38s
test_bare_classification(x_tr, y_tr, x_tst, y_tst)

In [None]:
%%time

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = LearnableSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4
)
# 89.3; 72 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
%%time

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = LearnableSpatialPooler(
    input_size=train_images[0].size, 
    output_size=50**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4
)
# 93.15; 221 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
%%time

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = BoostedSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4,
    max_boost_factor=3
)
# 91.15; 86 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
%%time

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = BoostedSpatialPooler(
    input_size=train_images[0].size, 
    output_size=50**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4,
    max_boost_factor=3
)
# 94.44; 238 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)

In [None]:
%%time

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

sp = BoostedSpatialPooler(
    input_size=train_images[0].size, 
    output_size=30**2,
    permanence_threshold=.5,
    sparsity_level=.04,
    synapse_permanence_deltas=(.1, .03),
    min_activation_threshold=4,
    potenrial_synapses_p=.4,
    max_boost_factor=3
)
# 84.0; 3.24 s
test_classification_with_sp(x_tr, y_tr, x_tst, y_tst, sp)