In [None]:
from htm_rl.agent.agent import Agent, AgentRunner
from htm_rl.agent.memory import Memory, TemporalMemory
from htm_rl.agent.planner import Planner
from htm_rl.common.sa_sdr_encoder import SaSdrEncoder, format_sa_superposition
from htm_rl.common.base_sa import SaRelatedComposition, Sa, SaSuperposition
from htm_rl.common.int_sdr_encoder import IntSdrEncoder, IntRangeEncoder
from htm_rl.common.int_sdr_encoder import SequenceSdrEncoder
from htm_rl.envs.gridworld_pomdp import GridWorld

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import randint

In [None]:
def init_memory(pars, sa_encoder, start_indicator=None):
    tm = TemporalMemory(**pars)
    memory = Memory(tm, sa_encoder, sa_encoder.format, format_sa_superposition, start_indicator=start_indicator)
    return memory

def learn_way(way, memory, environment, verbosity=1):
    memory.reset()
    state, reward, done = environment.reset(), 0, False
    for action in way:
        if verbosity > 1:
            environment.render()
            print(f'Action {action} State: {state}')
        memory.train(Sa(state, action), verbosity)
        state, _, _, info = environment.step(action)

def check_agent(memory, environment, goal_state, verbosity=1):
    planner = Planner(memory, 10, 1)
    agent = Agent(memory, planner, environment.n_actions)
    run = AgentRunner(agent, environment, 1, max_steps, 0, verbosity)
    run.agent.planner.add_goal(goal_state)
    run.agent.set_planning_horizon(10)
    run.run()
    if run.train_stats.rewards[-1] > 0:
        return True
    else:
        return False

In [None]:
def random_way(max_steps, n_actions):
    return [randint(0, n_actions-1) for _ in range(max_steps)]

def run_way_after_experiments(pars, sa_encoder, goal_state,
                              start_indicator=None,
                              n_experiments=3,
                              verbosity=0,
                              learning_true_count=1):
    results = list()
    ways_history = {'way': [], 'steps': [], 'experiment': []}
    for experiment in tqdm(range(n_experiments)):
        n_steps = 0 # number of noise action sequences
        while True:
            memory = init_memory(pars, sa_encoder, start_indicator=start_indicator)
            way = None
            for step in range(n_steps):
                way = random_way(max_steps, gw.n_actions)
                learn_way(way, memory, gw)
            for _ in range(learning_true_count):
                learn_way(actions, memory, gw)
            if not check_agent(memory, gw, goal_state, verbosity):
                if way is not None:
                    ways_history['way'].append(str(way))
                    ways_history['steps'].append(n_steps)
                    ways_history['experiment'].append(experiment)
                break
            else:
                n_steps += 1
        results.append(n_steps)
    return results, ways_history

In [None]:
max_steps = 12
actions = [2, 2, 1, 2, 2, 1, 2, 2]
world_description = [[2,0,0],
                     [1,1,0],
                     [0,0,0]]

In [None]:
gw = GridWorld(world_description, (3, 3), agent_initial_position={'row': 2, 'column': 0},
               observable_vars=['distance', 'surface'])

In [None]:
gw.render()

In [None]:
gw.observable_state, gw.filtered_observation

In [None]:
surface_bits = 10
distance_bits = 10
action_bits = 20

state_encoder = SequenceSdrEncoder('state',
                                   encoders=[
                                             IntSdrEncoder('distance',
                                                                   gw.world_size[0] + 1,
                                                                   distance_bits,
                                                                   distance_bits - 3),
                                             IntSdrEncoder('surface', 3 + 1, surface_bits, surface_bits)
                                            ],
                                   size=2)

In [None]:
action_encoder = IntSdrEncoder('action', gw.n_actions + 1,
                              value_bits=action_bits, activation_threshold=action_bits-3)

In [None]:
sa_encoder = SaSdrEncoder(state_encoder, action_encoder)

In [None]:
sa_encoder.total_bits, sa_encoder.value_bits, sa_encoder.activation_threshold

In [None]:
goal_state = (0, 2)

In [None]:
pars = dict(n_columns=sa_encoder.total_bits,
                                cells_per_column=50,
                                activation_threshold=sa_encoder.activation_threshold,
                                learning_threshold=sa_encoder.activation_threshold,
                                initial_permanence=0.5,
                                connected_permanence=0.5,
                                maxNewSynapseCount=sa_encoder.value_bits,
                                maxSynapsesPerSegment=sa_encoder.value_bits,
                                permanenceIncrement=0.1,
                                permanenceDecrement=0.05,
                                predictedSegmentDecrement=0.025)

In [None]:
results = run_way_after_experiments(pars, sa_encoder, goal_state,
                                    start_indicator=Sa((3, 3), 3),
                                    n_experiments=100,
                                    learning_true_count=1)

In [None]:
df_results = np.array(results[0])
df_results.mean(), df_results.std()

In [None]:
pars = dict(n_columns=sa_encoder.total_bits,
                                cells_per_column=8,
                                activation_threshold=sa_encoder.value_bits,
                                learning_threshold=sa_encoder.value_bits,
                                initial_permanence=0.5,
                                connected_permanence=0.5,
                                maxNewSynapseCount=sa_encoder.value_bits,
                                maxSynapsesPerSegment=sa_encoder.value_bits,
                                permanenceIncrement=0.1,
                                permanenceDecrement=0.05,
                                predictedSegmentDecrement=0.025)

In [None]:
results = run_way_after_experiments(pars, sa_encoder, goal_state,
                                    start_indicator=Sa((3, 3), 3),
                                    n_experiments=100,
                                    learning_true_count=10)

In [None]:
df_results = np.array(results[0])
df_results.mean(), df_results.std()

In [None]:
results = run_way_after_experiments(pars, sa_encoder, goal_state,
                                    start_indicator=Sa((3, 3), 3),
                                    n_experiments=100,
                                    learning_true_count=1)

In [None]:
df_results = np.array(results[0])
df_results.mean(), df_results.std()