In [None]:
import gym
import gym_minigrid as mg
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output
from time import sleep

%matplotlib inline

import random
import numpy as np
import sys

# fetch datasets from www.openML.org/ 
from sklearn.datasets import fetch_openml

from htm.bindings.algorithms import SpatialPooler, Classifier
from htm.bindings.sdr import SDR, Metrics


def load_ds(name, num_test, shape=None):
    """ 
    fetch dataset from openML.org and split to train/test
    @param name - ID on openML (eg. 'mnist_784')
    @param num_test - num. samples to take as test
    @param shape - new reshape of a single data point (ie data['data'][0]) as a list. Eg. [28,28] for MNIST
    """
    data = fetch_openml(name, version=1)
    sz=data['target'].shape[0]

    X = data['data']
    if shape is not None:
        new_shape = shape.insert(0, sz)
        X = np.reshape(X, shape)

    y = data['target'].astype(np.int32)
    # split to train/test data
    train_labels = y[:sz-num_test]
    train_images = X[:sz-num_test]
    test_labels  = y[sz-num_test:]
    test_images  = X[sz-num_test:]

    return train_labels, train_images, test_labels, test_images

def encode(data, out):
    """
    encode the (image) data
    @param data - raw data
    @param out  - return SDR with encoded data
    """
    out.dense = data >= np.mean(data) # convert greyscale image to binary B/W.
    #TODO improve. have a look in htm.vision etc. For MNIST this is ok, for fashionMNIST in already loses too much information


# These parameters can be improved using parameter optimization,
# see py/htm/optimization/ae.py
# For more explanation of relations between the parameters, see 
# src/examples/mnist/MNIST_CPP.cpp 
default_parameters = {
    'potentialRadius': 7,
    'boostStrength': 7.0,
    'columnDimensions': (79, 79),
    'dutyCyclePeriod': 1402,
    'localAreaDensity': 0.1,
    'minPctOverlapDutyCycle': 0.2,
    'potentialPct': 0.1,
    'stimulusThreshold': 6,
    'synPermActiveInc': 0.14,
    'synPermConnected': 0.5,
    'synPermInactiveDec': 0.02
}

## Load data

In [None]:
train_labels, train_images, test_labels, test_images = load_ds('mnist_784', 10000, shape=[28,28]) # HTM: ~95.6%
# train_labels, train_images, test_labels, test_images = load_ds('Fashion-MNIST', 10000, shape=[28,28]) # HTM baseline: ~83%

def shuffle_data(x, y):
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    x, y = np.array(x), np.array(y)
    return x[indices], y[indices]

np.random.seed(1337)

train_images, train_labels = shuffle_data(train_images, train_labels)
test_images, test_labels = shuffle_data(test_images, test_labels)

## Bare HTM classifier

In [None]:
%%time

def run_bare_classifier(parameters=default_parameters, argv=None, verbose=True):
    training_data = list(zip(train_images, train_labels))
    test_data     = list(zip(test_images, test_labels))
    random.shuffle(training_data)

    # Setup the AI.
    enc = SDR(train_images[0].shape)
    columns_stats = Metrics( enc, 99999999 )
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = training_data[i]
        encode(img, enc)
        sdrc.learn( enc, lbl )

    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        encode(img, enc)
        if lbl == np.argmax( sdrc.infer( enc ) ):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score

run_bare_classifier()

## HTM SP

In [None]:
%%time

def run_htm_sp(parameters=default_parameters, argv=None, verbose=True):
    training_data = list(zip(train_images, train_labels))
    test_data     = list(zip(test_images, test_labels))
    random.shuffle(training_data)

    # Setup the AI.
    enc = SDR(train_images[0].shape)
    sp = SpatialPooler(
        inputDimensions            = enc.dimensions,
        columnDimensions           = parameters['columnDimensions'],
        potentialRadius            = parameters['potentialRadius'],
        potentialPct               = parameters['potentialPct'],
        globalInhibition           = True,
        localAreaDensity           = parameters['localAreaDensity'],
        stimulusThreshold          = int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec         = parameters['synPermInactiveDec'],
        synPermActiveInc           = parameters['synPermActiveInc'],
        synPermConnected           = parameters['synPermConnected'],
        minPctOverlapDutyCycle     = parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod            = int(round(parameters['dutyCyclePeriod'])),
        boostStrength              = parameters['boostStrength'],
        seed                       = 0, # this is important, 0="random" seed which changes on each invocation
        spVerbosity                = 99,
        wrapAround                 = False)
    columns = SDR( sp.getColumnDimensions() )
    columns_stats = Metrics( columns, 99999999 )
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = training_data[i]
        encode(img, enc)
        sp.compute( enc, True, columns )
        sdrc.learn( columns, lbl )

    print(str(sp))
    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        encode(img, enc)
        sp.compute( enc, False, columns )
        if lbl == np.argmax( sdrc.infer( columns ) ):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score

run_htm_sp()

## Bare Sklearn classifier

In [None]:
%%time

from sklearn.linear_model import LogisticRegression
from scipy.sparse import csr_matrix

def encode_to_csr(x, enc: SDR):    
    encoded_images_flatten = []
    indptr = [0]
    for img in x:
        encode(img, enc)
        encoded_images_flatten.extend(enc.sparse)
        indptr.append(len(encoded_images_flatten))
    
    data = np.ones(len(encoded_images_flatten))
    csr = csr_matrix((data, encoded_images_flatten, indptr), shape=(x.shape[0], enc.size))
    return csr

def run_bare_sklearn_classifier(x_tr,  y_tr, x_tst, y_tst, parameters=default_parameters):    
    enc = SDR(x_tr[0].shape)
    csr = encode_to_csr(x_tr, enc)
    
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(csr, y_tr)
    
    csr = encode_to_csr(x_tst, enc)
    score = linreg.predict(csr) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '%')
    return score

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

run_bare_sklearn_classifier(x_tr, y_tr, x_tst, y_tst)

## HTM SP + Sklearn classifier

In [None]:
from sklearn.linear_model import LogisticRegression
from scipy.sparse import csr_matrix

def encode_to_csr_sp(x, enc: SDR, sp, columns, learn):            
    encoded_images_flatten = []
    indptr = [0]
    for img in x:
        encode(img, enc)
        sp.compute( enc, learn, columns )
        encoded_images_flatten.extend(columns.sparse)
        indptr.append(len(encoded_images_flatten))

    
    data = np.ones(len(encoded_images_flatten))
    csr = csr_matrix((data, encoded_images_flatten, indptr), shape=(x.shape[0], columns.size))
    return csr

In [None]:
%%time
def run_htm_sp_sklearn(x_tr,  y_tr, x_tst, y_tst, parameters=default_parameters):
    enc = SDR(train_images[0].shape)
    sp = SpatialPooler(
        inputDimensions            = enc.dimensions,
        columnDimensions           = parameters['columnDimensions'],
        potentialRadius            = parameters['potentialRadius'],
        potentialPct               = parameters['potentialPct'],
        globalInhibition           = True,
        localAreaDensity           = parameters['localAreaDensity'],
        stimulusThreshold          = int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec         = parameters['synPermInactiveDec'],
        synPermActiveInc           = parameters['synPermActiveInc'],
        synPermConnected           = parameters['synPermConnected'],
        minPctOverlapDutyCycle     = parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod            = int(round(parameters['dutyCyclePeriod'])),
        boostStrength              = parameters['boostStrength'],
        seed                       = 0, # this is important, 0="random" seed which changes on each invocation
        spVerbosity                = 0,
        wrapAround                 = False)
    columns = SDR( sp.getColumnDimensions() )

    # train SP
    for img in x_tr[:1000]:
        encode(img, enc)
        sp.compute( enc, True, columns )
    
    # train linreg
    csr = encode_to_csr_sp(x_tr, enc, sp, columns, True)
    
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(csr, y_tr)
    
    csr = encode_to_csr_sp(x_tst, enc, sp, columns, False)
    score = linreg.predict(csr) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '% for n =', len(x_tr))
    return score

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

run_htm_sp_sklearn(x_tr, y_tr, x_tst, y_tst)

## HTM SP + Sklearn classifier small size

In [None]:
%%time
def run_htm_sp_sklearn_small(x_tr,  y_tr, x_tst, y_tst, output_dim, parameters=default_parameters):
    # Get training data
    enc = SDR(train_images[0].shape)
    sp = SpatialPooler(
        inputDimensions            = enc.dimensions,
        columnDimensions           = output_dim,
        potentialRadius            = parameters['potentialRadius'],
        potentialPct               = parameters['potentialPct'],
        globalInhibition           = True,
        localAreaDensity           = parameters['localAreaDensity'],
        stimulusThreshold          = int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec         = parameters['synPermInactiveDec'],
        synPermActiveInc           = parameters['synPermActiveInc'],
        synPermConnected           = parameters['synPermConnected'],
        minPctOverlapDutyCycle     = parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod            = int(round(parameters['dutyCyclePeriod'])),
        boostStrength              = parameters['boostStrength'],
        seed                       = 0, # this is important, 0="random" seed which changes on each invocation
        spVerbosity                = 0,
        wrapAround                 = False)
    columns = SDR( sp.getColumnDimensions() )

    # train SP
    for img in x_tr[:1000]:
        encode(img, enc)
        sp.compute( enc, True, columns )
    
    # train linreg
    csr = encode_to_csr_sp(x_tr, enc, sp, columns, True)
    
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(csr, y_tr)
    
    csr = encode_to_csr_sp(x_tst, enc, sp, columns, False)
    score = linreg.predict(csr) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '% for n =', len(x_tr))
    return score

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

d = 50
run_htm_sp_sklearn_small(x_tr, y_tr, x_tst, y_tst, (d, d))

In [None]:
output_dims = list(range(20, 50, 5))
scores = [run_htm_sp_sklearn_small(x_tr, y_tr, x_tst, y_tst, (d, d)) for d in output_dims]

plt.plot(output_dims, scores)

## My SP implementation

In [None]:
default_parameters = {
    'potentialRadius': 7,
    'boostStrength': 7.0,
    'columnDimensions': (79, 79),
    'dutyCyclePeriod': 1402,
    'localAreaDensity': 0.1,
    'minPctOverlapDutyCycle': 0.2,
    'potentialPct': 0.1,
    'stimulusThreshold': 6,
    'synPermActiveInc': 0.14,
    'synPermConnected': 0.5,
    'synPermInactiveDec': 0.02
}

In [None]:
def encode_img(img):
    return (img >= img.mean()).astype(np.int8)

sample = train_images[0]
sample = encode_img(sample)

class MySpatialPooler:
    def __init__(self, input_shape, output_shape, permanence_threshold, sparsity_level, syn_perm_deltas, min_activation_threshold=1, max_boost_factor=1.5, boost_sliding_window=(1000, 1000)):
        assert isinstance(input_shape, tuple) and isinstance(output_shape, tuple)
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.joint_shape = output_shape + input_shape
        self.output_size = output_shape[0] * output_shape[1]
        
        self.sparsity_level = sparsity_level
        self.n_active_bits = int(self.output_size * sparsity_level)
        
        self.permanence_threshold = permanence_threshold
        self.syn_perm_inc, self.syn_perm_dec = syn_perm_deltas
        self.min_activation_threshold = min_activation_threshold
        
        self.max_boost_factor = max_boost_factor
        self.activity_duty_cycle, self.overlap_duty_cycle = boost_sliding_window
        
        # init 
        self.receptive_fields = np.random.choice(2, size=self.joint_shape, p=[.2, .8])
        self.connections_permanence = np.random.uniform(size=self.joint_shape) * self.receptive_fields
        self.time_avg_activity = np.full(self.output_shape, self.sparsity_level, dtype=np.float)
        self.time_avg_overlap = np.ones(self.output_shape, dtype=np.float)
        self.dp = np.empty(input_shape, dtype=np.float)
        self._compute_boost()
        
    def compute(self, x, learn):
        x = x.astype(np.bool)
        active_cells = self.connections_permanence[:, :, x] >= self.permanence_threshold
        overlaps = (np.count_nonzero(active_cells, -1) * self.boost).ravel()
        activated_indices = np.argpartition(-overlaps, self.n_active_bits)[:self.n_active_bits]
        activated_indices = activated_indices[overlaps[activated_indices] >= self.min_activation_threshold]
        
        if learn:
            rows, cols = np.unravel_index(activated_indices, self.output_shape)
            self._update_permanence(x, rows, cols)
            self._update_activity_boost(rows, cols)
#             self._update_overlap_boost(x, rows, cols, overlaps)

        return activated_indices
    
    def _update_permanence(self, x, rows, cols):
        dp = self.dp
        dp[x] = self.syn_perm_inc
        dp[~x] = -self.syn_perm_dec
        perm = self.connections_permanence[rows, cols]
        perm = np.clip(perm + dp * self.receptive_fields[rows, cols], 0, 1)
        
    def _update_activity_boost(self, rows, cols):
        self.time_avg_activity *= (self.activity_duty_cycle - 1) / self.activity_duty_cycle
        self.time_avg_activity[rows, cols] += 1 / self.activity_duty_cycle
        self._compute_boost()
        
    def _update_overlap_boost(self, x, rows, cols, overlaps):
        self.time_avg_overlap += (overlaps.reshape(self.output_shape) - self.time_avg_overlap) / self.overlap_duty_cycle
        k = int(.05 * self.output_size)
        to_boost_indices = np.argpartition(self.time_avg_overlap.ravel(), k)[:k]
        to_boost_indices = np.unravel_index(to_boost_indices, self.output_shape)
        to_boost = self.connections_permanence[to_boost_indices]
        to_boost = np.clip(to_boost + .1 * self.permanence_threshold, 0, 1)
        
    def _compute_boost(self):
        self.boost = np.exp(-self.max_boost_factor * (self.time_avg_activity - self.time_avg_activity.mean()))
        

np.random.seed(1337)
my_sp = MySpatialPooler(train_images[0].shape, (10, 10), .5, .04, (.1, .02), 4)
my_sp.compute(sample, True)

In [None]:
from sklearn.linear_model import LogisticRegression
from scipy.sparse import csr_matrix

def encode_to_csr_my_sp(x, sp, learn=False):            
    encoded_images_flatten = []
    indptr = [0]
    for img in x:
        img = encode_img(img)
        encoded_images_flatten.extend(sp.compute(img, learn))
        indptr.append(len(encoded_images_flatten))

    
    data = np.ones(len(encoded_images_flatten))
    csr = csr_matrix((data, encoded_images_flatten, indptr), shape=(x.shape[0], sp.output_size))
    return csr

In [None]:
%%time
def run_my_sp_sklearn_small(x_tr,  y_tr, x_tst, y_tst, sp):
    enc = SDR(sp.input_shape)
    columns = SDR(sp.output_shape)

    # train SP
    for img in x_tr[:1000]:
        img = encode_img(img)
        sp.compute(img, True)
    
    # train linreg
    csr = encode_to_csr_my_sp(x_tr, sp, True)
    
    linreg = LogisticRegression(tol=.001, max_iter=100, multi_class='multinomial', penalty='l2', solver='lbfgs', n_jobs=3)
    linreg.fit(csr, y_tr)
    
    csr = encode_to_csr_my_sp(x_tst, sp, False)
    score = linreg.predict(csr) == y_tst
    score = score.mean()
    print('Score:', 100 * score, '% for n =', len(x_tr))
    return score

n = 100000
x_tr, y_tr = train_images[:n], train_labels[:n]
x_tst, y_tst = test_images[:n], test_labels[:n]

my_sp = MySpatialPooler(train_images[0].shape, (50, 50), .5, .04, (.1, .02), 4, max_boost_factor=5)
run_my_sp_sklearn_small(x_tr, y_tr, x_tst, y_tst, my_sp)