# Learn random sequences of agent observations

Здесь я просто пробую `minigrid`, учусь кодировать наблюдения агента в SDR и проверяю, способна ли TM запоминать последовательности наблюдений (а значит и предсказывать возможные варианты наблюдений в будущем).

In [None]:
import gym
import gym_minigrid as minigirid
import matplotlib.pyplot as plt
import numpy as np
from time import sleep
from htm.bindings.sdr import SDR
from htm.algorithms import TemporalMemory as TM
from IPython.display import clear_output
import random

%matplotlib inline

In [None]:
def plot_prediction_accuracy(xs, ys, title):
    plt.ylim([-0.1,1.1])
    plt.plot(xs, ys)
    plt.xlabel("Timestep")
    plt.ylabel("Prediction Accuracy")
    plt.title(title)
    plt.show()

def extract_observation_data(raw_obs):
    x = raw_obs[:, :, 0].copy()
    
    # make data is categorical on [0, 2] range
    x[x == 8] = 0
    
    # take 4x5 observation
    x = x[-5:, 1:-1]
    return x

def merge_data(raw_obs, action, reward):
    # all data scalars are categorical on [0,2]
    # so we can just concat it together
    obs = extract_observation_data(raw_obs)
    obs = obs.ravel()
    obs = np.concatenate((obs, [action, reward]))
    return obs

def encode_data(raw, active_bits=3):
    s = SDR((raw.size, active_bits))
    
    for i in range(active_bits):
        s.dense[raw == i, i] = 1
        
    s.dense = s.dense
    return s

In [None]:
tm = TM(
    columnDimensions = (5 * 5 + 2, 3),
    cellsPerColumn=8,
    initialPermanence=0.5,
    connectedPermanence=0.5,
    minThreshold=5,
    maxNewSynapseCount=50,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    activationThreshold=5,
    predictedSegmentDecrement=0.0001,
)

env = gym.make('MiniGrid-Empty-8x8-v0')
env = minigirid.wrappers.ImgObsWrapper(env)
random.seed(1337)
np.random.seed(1337)

render = False
# render = True

k = 3
actions, a_ind = [2, 0, 1, 1, 0] * k + [1, 2, 1] + [2, 0, 1, 1, 0] * k + [0, 2, 0], 0
obs = env.reset()
ys = []

for _ in range(800):
    action = np.random.choice(3)
#     action = actions[a_ind % len(actions)]
    a_ind += 1
    
    next_obs, reward, done, info = env.step(action)
    
    obs_sdr = encode_data(merge_data(obs, action, reward))
    tm.compute(obs_sdr, learn=True)
    ys.append(1 - tm.anomaly)
    
    if done:
        a_ind = 0
        next_obs = env.reset()
#         tm.reset()
    
    if render:
        clear_output()

        plt.imshow(env.render('rgb_array'))
        plt.show()
        sleep(.1)
        
    obs = next_obs
    
env.render(close=True)
env.close()

xs = np.arange(len(ys))
ys = np.array(ys)

plt.plot(xs, ys)