In [None]:
from agent import HandcraftedAgent
from tqdm.auto import tqdm
import numpy as np
import random
import logging

logger = logging.getLogger()
logger.disabled = True


capacity = {"episodic": 32, "episodic_agent": 32, "semantic": 32, "short": 1}

config = {
    "question_prob": 1.0,
    "seed": 0,
    "terminates_at": 99,
    "randomize_observations": True,
    "room_size": "m",
}

results = {}

for mm_policy in ["random"]:
    for qa_policy in ["episodic_semantic"]:
        for explore_policy in ["avoid_walls"]:
            for pretrain_semantic in [False]:
                key = (
                    mm_policy,
                    qa_policy,
                    explore_policy,
                    pretrain_semantic,
                )
                if key not in results:
                    results[key] = []
                print(key)

                for seed in tqdm([0, 1, 2, 3, 4]):
                    config["seed"] = seed

                    agent = HandcraftedAgent(
                        env_str="room_env:RoomEnv-v2",
                        env_config=config,
                        mm_policy=mm_policy,
                        qa_policy=qa_policy,
                        explore_policy=explore_policy,
                        num_samples_for_results=10,
                        capacity=capacity,
                        pretrain_semantic=pretrain_semantic,
                    )
                    agent.test()
                    agent.remove_results_from_disk()
                    to_append = (np.mean(agent.scores), np.std(agent.scores))
                    # print(to_append)
                    results[key].append(to_append)

In [None]:
print(
    f"{'memory mgmt':<20}{'qa':<20}{'explore':<20}{'pretrain_semantic':<20}{'mean':<10}{'std':<10}"
)
print(f"-" * 110)
for key, val in results.items():
    print(
        f"{str(key[0]):<20}{str(key[1]):<20}{str(key[2]):<20}{str(key[3]):<20}{round(np.mean([v[0] for v in val]), 2):<10}{round(np.std([v[0] for v in val]), 2):<10}"
    )

In [None]:
from agent.dqn import DQNMMAgent
import random
import logging

logger = logging.getLogger()
logger.disabled = True

while True:
    batch_size = random.choice([128, 256, 512, 1024])
    warm_start = random.choice(
        [batch_size, 2 * batch_size, 4 * batch_size, 8 * batch_size]
    )
    replay_buffer_size = random.choice(
        [warm_start, 2 * warm_start, 4 * warm_start, 8 * warm_start]
    )
    config = {
        "question_prob": 1.0,
        "terminates_at": 99,
        "randomize_observations": random.choice([True, False]),
        "room_size": "s",
    }
    all_params = {
        "env_str": "room_env:RoomEnv-v2",
        "max_epsilon": 1.0,
        "min_epsilon": 0.1,
        "epsilon_decay_until": 100 * 10,
        "gamma": random.random(),
        "capacity": {
            "episodic": 16,
            "episodic_agent": 16,
            "semantic": 16,
            "short": 1,
        },
        "nn_params": {
            "hidden_size": 64,
            "num_layers": 2,
            "embedding_dim": 32,
            "v1_params": None,
            "v2_params": {},
            "memory_of_interest": [
                "episodic",
                "semantic",
                "short",
            ],
        },
        "num_iterations": 100 * 10,
        "replay_buffer_size": replay_buffer_size,
        "warm_start": warm_start,
        "batch_size": batch_size,
        "target_update_rate": 10,
        "pretrain_semantic": False,
        "run_test": True,
        "num_samples_for_results": 10,
        "train_seed": 5,
        "plotting_interval": 10,
        "device": "cpu",
        "test_seed": 0,
        "qa_policy": "episodic_semantic",
        "explore_policy": "avoid_walls",
        "env_config": config,
        "ddqn": True,
        "dueling_dqn": True,
        "split_reward_training": random.choice([True, False]),
    }

    agent = DQNMMAgent(**all_params)
    agent.train()