In [None]:
from agent import DQNMMAgent
import logging

logger = logging.getLogger()
logger.disabled = True

params = {
    "env_str": "room_env:RoomEnv-v2",
    "num_iterations": 10 * 2,
    "replay_buffer_size": 10 * 2,
    "warm_start": 10 * 1,
    "batch_size": 4,
    "target_update_interval": 10,
    "epsilon_decay_until": 10 * 2,
    "max_epsilon": 1.0,
    "min_epsilon": 0.1,
    "gamma": 0.9,
    "capacity": {
        "episodic": 2,
        "episodic_agent": 0,
        "semantic": 2,
        "semantic_map": 0,
        "short": 1,
    },
    "pretrain_semantic": False,
    "nn_params": {
        "architecture": "lstm",
        "hidden_size": 8,
        "num_layers": 2,
        "embedding_dim": 8,
        "make_categorical_embeddings": False,
        "v1_params": None,
        "v2_params": {},
        "memory_of_interest": [
            "episodic",
            "semantic",
            "short",
        ],
        "fuse_information": "sum",
        "include_positional_encoding": True,
        "max_timesteps": 100,
        "max_strength": 100,
    },
    "run_test": True,
    "num_samples_for_results": 10,
    "train_seed": 5,
    "test_seed": 0,
    "device": "cpu",
    "qa_policy": "episodic_semantic",
    "explore_policy": "avoid_walls",
    "env_config": {
        "question_prob": 1.0,
        "terminates_at": 9,
        "randomize_observations": "objects",
        "room_size": "xxs",
        "rewards": {"correct": 1, "wrong": 0, "partial": 0},
        "make_everything_static": False,
        "num_total_questions": 1000,
        "question_interval": 1,
        "include_walls_in_observations": True,
    },
    "split_reward_training": False,
    "default_root_dir": "./training_results/DQN/mm/LSTM/TRASH",
    "run_handcrafted_baselines": [
        {
            "mm": mm,
            "qa": qa,
            "explore": explore,
            "pretrain_semantic": pretrain_semantic,
        }
        for mm in ["random", "episodic", "semantic"]
        for qa in ["episodic_semantic"]
        for explore in ["random", "avoid_walls"]
        for pretrain_semantic in [False, "exclude_walls"]
    ],
}
agent = DQNMMAgent(**params)
agent.train()

In [None]:
from agent import DQNMMAgent
import logging
from tqdm.auto import tqdm
import random

logger = logging.getLogger()
logger.disabled = True

params_all = []
for _ in range(1000):
    params = {
        "env_str": "room_env:RoomEnv-v2",
        "num_iterations": 10 * 10,
        "replay_buffer_size": random.choice([10, 50, 100]),
        "warm_start": random.choice([10, 50, 100]),
        "batch_size": random.choice([4, 8, 16, 32, 64]),
        "target_update_interval": random.choice([5, 10, 20, 50]),
        "epsilon_decay_until": random.choice([10, 50, 100]),
        "max_epsilon": 1.0,
        "min_epsilon": 0.1,
        "gamma": random.uniform(0.5, 0.999),
        "capacity": {
            "episodic": 4,
            "episodic_agent": 0,
            "semantic": 4,
            "semantic_map": 0,
            "short": 1,
        },
        "pretrain_semantic": False,
        "nn_params": {
            "architecture": "lstm",
            "hidden_size": 64,
            "num_layers": 2,
            "embedding_dim": 64,
            "make_categorical_embeddings": False,
            "v1_params": None,
            "v2_params": {},
            "memory_of_interest": [
                "episodic",
                "semantic",
                "short",
            ],
            "fuse_information": "sum",
            "include_positional_encoding": True,
            "max_timesteps": 100,
            "max_strength": 100,
        },
        "run_test": True,
        "num_samples_for_results": 10,
        "train_seed": 5,
        "test_seed": 0,
        "device": "cpu",
        "qa_policy": "episodic_semantic",
        "explore_policy": "avoid_walls",
        "env_config": {
            "question_prob": 1.0,
            "terminates_at": 9,
            "randomize_observations": "objects",
            "room_size": "s",
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
        },
        "ddqn": random.choice([True, False]),
        "dueling_dqn": random.choice([True, False]),
        "split_reward_training": random.choice([True, False]),
        "default_root_dir": "./training_results/DQN/mm/LSTM/s",
        "run_handcrafted_baselines": [
            {
                "mm": mm,
                "qa": qa,
                "explore": explore,
                "pretrain_semantic": pretrain_semantic,
            }
            for mm in ["random", "episodic", "semantic"]
            for qa in ["episodic_semantic"]
            for explore in ["random", "avoid_walls"]
            for pretrain_semantic in [False, "exclude_walls"]
        ],
    }
    params_all.append(params)

for params in tqdm(params_all):
    try:
        agent = DQNMMAgent(**params)
        agent.train()
    except Exception as e:
        print(e)
        continue