In [None]:
from htm_rl.agent.agent import Agent, AgentRunner
from htm_rl.agent.memory import Memory, TemporalMemory
from htm_rl.agent.planner import Planner
from htm_rl.envs.mdp import Mdp
from htm_rl.common.sa_sdr_encoder import SaSdrEncoder
from htm_rl.common.int_sdr_encoder import IntSdrEncoder, IntSdrFormatter

Basic example for standard MDP environment

In [None]:
mdp = Mdp(transitions = {
            0: {0: 4, 1: 1},
            1: {0: 1, 1: 2},
            2: {0: 2, 1: 3},
            3: {0: 3, 1: 0},
            4: None
        })

In [None]:
mdp.n_states

In [None]:
state_encoder = IntSdrEncoder('state', mdp.n_states,
                              value_bits=10, activation_threshold=7)


action_encoder = IntSdrEncoder('action', mdp.n_actions,
                              value_bits=10, activation_threshold=7)



In [None]:
sa_encoder = SaSdrEncoder(state_encoder, action_encoder)

In [None]:
sa_encoder.total_bits
sa_encoder.activation_threshold

In [None]:
tm = TemporalMemory(n_columns=sa_encoder.total_bits,
                    cells_per_column=2,
                    activation_threshold=sa_encoder.activation_threshold,
                    learning_threshold=action_encoder.activation_threshold,
                    initial_permanence=0.49,
                    connected_permanence=0.5)

In [None]:
sa_formatter = IntSdrFormatter()

In [None]:
memory = Memory(tm, sa_encoder, sa_formatter, sa_encoder.format)

In [None]:
planner = Planner(memory, 10, 2)

In [None]:
agent = Agent(memory, planner, mdp.n_actions)

In [None]:
run = AgentRunner(agent, mdp, 1, 10, 0, 1)

In [None]:
run.run()

Example of Agent in gridworld environment with two-value state.

In [None]:
import sys
sys.path.append('../htm_rl/htm_rl/')

In [None]:
from common.int_sdr_encoder import SequenceSdrEncoder

In [None]:
from envs.mymdp import GridWorld

In [None]:
world_description = [[2,0,0,0,0],
                     [1,1,1,0,0],
                     [0,0,0,0,1],
                     [1,1,0,0,0],
                     [0,0,0,0,0]]

In [None]:
gw = GridWorld(world_description, (5, 5), agent_initial_position={'row': 4, 'column': 0})

In [None]:
gw.world_size

In [None]:
state_encoder = SequenceSdrEncoder('state', encoders=[IntSdrEncoder('distance', gw.world_size[0]+1, 10, 7),
                                                   IntSdrEncoder('surface', 3, 10, 7)],
                                size=2)

In [None]:
print(state_encoder.value_bits)
state_encoder.total_bits

In [None]:
action_encoder = IntSdrEncoder('action', 3,
                              value_bits=10, activation_threshold=7)

In [None]:
sa_encoder = SaSdrEncoder(state_encoder, action_encoder)

In [None]:
tm = TemporalMemory(n_columns=sa_encoder.total_bits,
                    cells_per_column=2,
                    activation_threshold=sa_encoder.activation_threshold,
                    learning_threshold=action_encoder.activation_threshold,
                    initial_permanence=0.49,
                    connected_permanence=0.5)

In [None]:
sa_formatter = IntSdrFormatter()

In [None]:
memory = Memory(tm, sa_encoder, sa_formatter, sa_encoder.format)

In [None]:
planner = Planner(memory, 10, 2)

In [None]:
agent = Agent(memory, planner, gw.n_actions)

In [None]:
run = AgentRunner(agent, gw, 5, 10, 0, 1)

In [None]:
run.run()

Example of agent in two-value-state environment with semantic encoder

In [None]:
import sys
sys.path.append('../htm_rl/htm_rl/')

In [None]:
from common.int_sdr_encoder import SequenceSdrEncoder, IntSemanticSdrEncoder, IntSdrEncoder

In [None]:
from envs.mymdp import GridWorld

In [None]:
world_description = [[2,0,0,0,0],
                     [1,1,1,0,0],
                     [0,0,0,0,1],
                     [1,1,0,0,0],
                     [0,0,0,0,0]]

In [None]:
gw = GridWorld(world_description, (5, 5), agent_initial_position={'row': 4, 'column': 0})

In [None]:
gw.world_size

In [None]:
state_encoder = SequenceSdrEncoder('state',
                                   encoders=[IntSdrEncoder('distance',
                                                                   gw.world_size[0]+1,
                                                                   20,
                                                                   15),
                                                   IntSdrEncoder('surface', 3, 20, 15)],
                                size=2)

In [None]:
print(state_encoder.value_bits)
state_encoder.total_bits

In [None]:
action_encoder = IntSdrEncoder('action', gw.n_actions,
                              value_bits=20, activation_threshold=15)

In [None]:
sa_encoder = SaSdrEncoder(state_encoder, action_encoder)

In [None]:
tm = TemporalMemory(n_columns=sa_encoder.total_bits,
                    cells_per_column=1,
                    activation_threshold=sa_encoder.activation_threshold,
                    learning_threshold=action_encoder.activation_threshold,
                    initial_permanence=0.5,
                    connected_permanence=0.5)

In [None]:
sa_formatter = IntSdrFormatter()

In [None]:
memory = Memory(tm, sa_encoder, sa_formatter, sa_encoder.format)

In [None]:
planner = Planner(memory, 200, 1)

In [None]:
agent = Agent(memory, planner, gw.n_actions)

In [None]:
run = AgentRunner(agent, gw, 300, 500, 5000, 1)

In [None]:
run.run()

In [None]:
# run.n_episodes = 100
# run.max_steps = 1000
# run.run()

In [None]:
import numpy as np

import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(figsize=(10, 7))
steps = np.array(run.train_stats.steps)
plt.plot(np.arange(steps.size), steps, '.')

In [None]:
steps.mean(), steps.std()

In [None]:
agent.planner.inter_episode_goal_memory._set