In [91]:
import numpy as np


class History:
    def __init__(self, block_size: int = 6) -> None:
        self.block_size = block_size
        self.blocks = [[]] * self.block_size

    def list(self) -> list:
        return [element for block in self.blocks for element in block]

    def add_block(self, block: list) -> None:
        self.blocks = self.blocks[1:] + [block]

    def add_action(self, action: int) -> None:
        assert action in [0, 1, 2, 3, 4]
        self.blocks[-1].append(action)

    def add_reward(self, reward: int) -> None:
        self.blocks[-1].append(reward)

    def __repr__(self) -> str:
        return str(self.blocks)

    # def numpy(self) -> list:
    #     return np.array(self.history)


history = History(6)

In [92]:
import gymnasium as gym

env = gym.make("room_env:RoomEnv-v2")
observations, info = env.reset()
observations

{'room': [['room_000', 'north', 'wall', 0],
  ['room_000', 'east', 'room_001', 0],
  ['room_000', 'south', 'wall', 0],
  ['room_000', 'west', 'wall', 0],
  ['agent', 'atlocation', 'room_000', 0]],
 'questions': [['ind_002', 'atlocation', '?', 0],
  ['ind_000', 'atlocation', '?', 0],
  ['dep_003', 'atlocation', '?', 0],
  ['sta_003', 'atlocation', '?', 0],
  ['ind_000', 'atlocation', '?', 0],
  ['dep_002', 'atlocation', '?', 0],
  ['ind_001', 'atlocation', '?', 0],
  ['dep_002', 'atlocation', '?', 0],
  ['ind_000', 'atlocation', '?', 0],
  ['ind_002', 'atlocation', '?', 0]]}

In [93]:
history.add_block(observations["room"])

In [94]:
history

[[], [], [], [], [], [['room_000', 'north', 'wall', 0], ['room_000', 'east', 'room_001', 0], ['room_000', 'south', 'wall', 0], ['room_000', 'west', 'wall', 0], ['agent', 'atlocation', 'room_000', 0]]]

In [98]:
(
    observations,
    reward,
    done,
    truncated,
    info,
) = env.step((["foo"]*10, "east"))

In [119]:
history.add_block(observations["room"])

In [121]:
history.list()

[['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_003', 2],
 ['room_002', 'south', 'wall', 2],
 ['room_002', 'west', 'room_001', 2],
 ['agent', 'atlocation', 'room_002', 2],
 ['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_003', 2],
 ['room_002', 'south', 'wall', 2],
 ['room_002', 'west', 'room_001', 2],
 ['agent', 'atlocation', 'room_002', 2],
 ['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_003', 2],
 ['room_002', 'south', 'wall', 2],
 ['room_002', 'west', 'room_001', 2],
 ['agent', 'atlocation', 'room_002', 2],
 ['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_003', 2],
 ['room_002', 'south', 'wall', 2],
 ['room_002', 'west', 'room_001', 2],
 ['agent', 'atlocation', 'room_002', 2],
 ['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_003', 2],
 ['room_002', 'south', 'wall', 2],
 ['room_002', 'west', 'room_001', 2],
 ['agent', 'atlocation', 'room_002', 2],
 ['room_002', 'north', 'wall', 2],
 ['room_002', 'east', 'room_00

In [74]:
foo.add_action(0)
foo

[[['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 10], [['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0]], [['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0]]

In [77]:
foo.add_reward(10)
foo

[[['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0]], [['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0, 10], [['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 10]]

In [80]:
from explicit_memory.utils import ReplayBuffer

replay_buffer = ReplayBuffer(observation_type="dict", size=4, batch_size=2)

replay_buffer.store(*[foo.list(), 0, 10, foo.list(), False])
replay_buffer.store(*[foo.list(), 0, 10, foo.list(), False])
replay_buffer.store(*[foo.list(), 0, 10, foo.list(), False])
replay_buffer.store(*[foo.list(), 0, 10, foo.list(), False])



In [81]:
replay_buffer.sample_batch()

{'obs': array([list([['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0, 10, ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 10]),
        list([['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0, 10, ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 10])],
       dtype=object),
 'next_obs': array([list([['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0, 10, ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 10]),
        list([['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], ['tae', 'atlocation', 'kitchen', 0], ['tae', 'atlocation', 'bedroom', 0], 0, 0, 10, ['tae', 'atlocation', 'kitchen

In [None]:
import logging

logger = logging.getLogger()
logger.disabled = True

import random

from copy import deepcopy
from tqdm.auto import tqdm
from agent.dqn import DQNMMAgent, DQNExploreAgent

config = {
    "question_prob": 1.0,
    "terminates_at": 99,
    "randomize_observations": "objects",
    "room_size": "l",
    "rewards": {"correct": 1, "wrong": 0, "partial": 0},
    "make_everything_static": False,
    "num_total_questions": 1000,
    "question_interval": 1,
    "include_walls_in_observations": True,
}

params = {
    "env_str": "room_env:RoomEnv-v2",
    "max_epsilon": 1.0,
    "min_epsilon": 0.1,
    "epsilon_decay_until": 100 * 100,
    "gamma": 0.9,
    "capacity": {
        "episodic": 16,
        "episodic_agent": 0,
        "semantic": 16,
        "semantic_map": 0,
        "short": 1,
    },
    "nn_params": {
        "architecture": "lstm",
        "hidden_size": 64,
        "num_layers": 2,
        "embedding_dim": 64,
        "make_categorical_embeddings": False,
        "v1_params": None,
        "v2_params": {},
        "memory_of_interest": [
            "episodic",
            "semantic",
        ],
        "fuse_information": "sum",
        "include_positional_encoding": True,
        "max_timesteps": config["terminates_at"] + 1,
        "max_strength": config["terminates_at"] + 1,
    },
    "num_iterations": 100 * 100,
    "replay_buffer_size": 100 * 100,
    "warm_start": 100 * 100 / 10,
    "batch_size": 32,
    "target_update_interval": 10,
    "pretrain_semantic": False,
    "run_test": True,
    "num_samples_for_results": 10,
    "train_seed": 5,
    "plotting_interval": 10,
    "device": "cpu",
    "test_seed": 0,
    "mm_policy": "neural",
    "mm_agent_path": "trained-agents/lstm-mm/2023-12-28 18:13:03.001952/agent.pkl",
    "env_config": config,
    "qa_policy": "episodic_semantic",
    "ddqn": True,
    "dueling_dqn": True,
    "default_root_dir": "training_results/explore/TRASH",
    "run_handcrafted_baselines": [
        {
            "mm": mm,
            "qa": qa,
            "explore": explore,
            "pretrain_semantic": pretrain_semantic,
        }
        for mm in ["random", "episodic", "semantic"]
        for qa in ["episodic_semantic"]
        for explore in ["random", "avoid_walls"]
        for pretrain_semantic in [False, "exclude_walls"]
    ],
}


agent = DQNExploreAgent(**params)
agent.train()