## DQN baselines LSTM + MLP

In [None]:
import matplotlib

matplotlib.use("Agg")

import logging

logger = logging.getLogger()
logger.disabled = True

import os
import random
from agent import DQNLSTMMLPBaselineAgent
from tqdm.auto import tqdm
import itertools

room_size = "xl"
terminates_at = 99
num_iterations = (terminates_at + 1) * 200
replay_buffer_size = num_iterations // 10
warm_start = num_iterations // 100
validation_starts_at = 0
target_update_interval = 100
batch_size = 32
gamma_explore = 0.9
embedding_dim = 48


for history_block_size in [2, 4, 8, 16, 32]:
    prob_type = (
        "non-equal-object-probs"
        if "different-prob" in room_size
        else "equal-object-probs"
    )

    root_path = (
        f"./training-results/{prob_type}/baselines/room_size={room_size}/"
        f"history_block_size={history_block_size}/"
    )

    for test_seed in [0, 1, 2, 3, 4]:

        params_dict = {
            "env_str": "room_env:RoomEnv-v2",
            "num_iterations": num_iterations,
            "replay_buffer_size": replay_buffer_size,
            "validation_starts_at": validation_starts_at,
            "warm_start": warm_start,
            "batch_size": batch_size,
            "target_update_interval": target_update_interval,
            "epsilon_decay_until": num_iterations,
            "max_epsilon": 1.0,
            "min_epsilon": 0.01,
            "gamma": gamma_explore,
            "history_block_size": history_block_size,
            "lstm_params": {
                "hidden_size": embedding_dim,
                "num_layers": 2,
                "embedding_dim": embedding_dim,
                "bidirectional": False,
            },
            "mlp_params": {
                "hidden_size": embedding_dim,
                "num_hidden_layers": 1,
                "dueling_dqn": True,
            },
            "num_samples_for_results": {"val": 5, "test": 10},
            "validation_interval": 1,
            "plotting_interval": 50,
            "train_seed": test_seed + 5,
            "test_seed": test_seed,
            "device": "cpu",
            "env_config": {
                "question_prob": 1.0,
                "terminates_at": terminates_at,
                "randomize_observations": "objects",
                "room_size": room_size,
                "rewards": {"correct": 1, "wrong": 0, "partial": 0},
                "make_everything_static": False,
                "num_total_questions": 1000,
                "question_interval": 10,
                "include_walls_in_observations": True,
                "deterministic_objects": False,
            },
            "default_root_dir": root_path,
            "run_handcrafted_baselines": True,
        }

        agent = DQNLSTMMLPBaselineAgent(**params_dict)
        agent.train()