## MM


In [None]:
from agent import PPOMMAgent
import logging
from tqdm.auto import tqdm
import random

logger = logging.getLogger()
logger.disabled = True

params_all = []
for _ in range(1000):
    params = {
        "env_str": "room_env:RoomEnv-v2",
        "num_episodes": 10,
        "num_rollouts": random.choice([5, 10, 20]),
        "epoch_per_rollout": random.choice([16, 32, 64]),
        "batch_size": random.choice([32, 64, 128]),
        "gamma": random.uniform(0.7, 0.999),
        "lam": random.uniform(0.7, 0.999),
        "epsilon": 0.2,
        "entropy_weight": random.uniform(0.001, 0.1),
        "capacity": {
            "episodic": 8,
            "episodic_agent": 0,
            "semantic": 8,
            "semantic_map": 0,
            "short": 1,
        },
        "pretrain_semantic": False,
        "nn_params": {
            "architecture": "lstm",
            "hidden_size": 64,
            "num_layers": 2,
            "embedding_dim": 64,
            "make_categorical_embeddings": False,
            "memory_of_interest": [
                "episodic",
                "semantic",
                "short",
            ],
            "fuse_information": "sum",
            "include_positional_encoding": True,
            "max_timesteps": 100,
            "max_strength": 100,
        },
        "run_test": True,
        "num_samples_for_results": 10,
        "train_seed": 5,
        "test_seed": 0,
        "device": "cpu",
        "qa_policy": "episodic_semantic",
        "explore_policy": "avoid_walls",
        "env_config": {
            "question_prob": 1.0,
            "terminates_at": 99,
            "randomize_observations": "objects",
            "room_size": "m",
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
        },
        "default_root_dir": "./training-results/PPO/mm/m/100",
        "run_handcrafted_baselines": False,
    }
    params_all.append(params)

for params in tqdm(params_all):
    try:
        agent = PPOMMAgent(**params)
        agent.train()
    except Exception as e:
        print(e)
        continue

## Explore


In [None]:
from agent import PPOExploreAgent
import logging
from tqdm.auto import tqdm
import random

logger = logging.getLogger()
logger.disabled = True

params_all = []
for _ in range(1000):
    params = {
        "env_str": "room_env:RoomEnv-v2",
        "num_episodes": 10,
        "num_rollouts": random.choice([5, 10, 20, 40, 100]),
        "epoch_per_rollout": random.choice([16, 32, 64]),
        "batch_size": random.choice([8, 16, 32]),
        "gamma": random.uniform(0.5, 0.999),
        "lam": random.uniform(0.5, 0.999),
        "epsilon": 0.2,
        "entropy_weight": random.uniform(0.001, 0.1),
        "capacity": {
            "episodic": 4,
            "episodic_agent": 0,
            "semantic": 4,
            "semantic_map": 0,
            "short": 1,
        },
        "pretrain_semantic": False,
        "nn_params": {
            "architecture": "lstm",
            "hidden_size": 64,
            "num_layers": 2,
            "embedding_dim": 64,
            "make_categorical_embeddings": False,
            "memory_of_interest": [
                "episodic",
                "semantic",
                "short",
            ],
            "fuse_information": "sum",
            "include_positional_encoding": True,
            "max_timesteps": 100,
            "max_strength": 100,
        },
        "run_test": True,
        "num_samples_for_results": 10,
        "train_seed": 5,
        "test_seed": 0,
        "device": "cpu",
        "mm_policy": "neural",
        "mm_agent_path": "./training-results/PPO/mm/s/2024-03-09 12:04:06.565272/agent.pkl",
        "qa_policy": "episodic_semantic",
        "env_config": {
            "question_prob": 1.0,
            "terminates_at": 49,
            "randomize_observations": "objects",
            "room_size": "s",
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
        },
        "default_root_dir": "./training-results/PPO/explore/s",
        "run_handcrafted_baselines": None,
    }
    params_all.append(params)

for params in tqdm(params_all):
    try:
        agent = PPOExploreAgent(**params)
        agent.train()
    except Exception as e:
        print(e)
        continue

## LSTM baseline

In [None]:
from agent import PPOLSTMBaselineAgent
import logging
from tqdm.auto import tqdm
import random

logger = logging.getLogger()
logger.disabled = True

params_all = []
for _ in range(1000):
    params = {
        "env_str": "room_env:RoomEnv-v2",
        "num_episodes": 10,
        "num_rollouts": random.choice([5, 10, 20]),
        "epoch_per_rollout": random.choice([16, 32, 64]),
        "batch_size": random.choice([8, 16, 32]),
        "gamma": random.uniform(0.5, 0.999),
        "lam": random.uniform(0.5, 0.999),
        "epsilon": 0.2,
        "entropy_weight": random.uniform(0.001, 0.1),
        "history_block_size": 6,
        "nn_params": {
            "architecture": "lstm",
            "hidden_size": 64,
            "num_layers": 2,
            "embedding_dim": 64,
            "fuse_information": "sum",
            "include_positional_encoding": True,
            "max_timesteps": 100,
            "max_strength": 100,
        },
        "run_test": True,
        "num_samples_for_results": 10,
        "train_seed": 5,
        "test_seed": 0,
        "device": "cpu",
        "env_config": {
            "question_prob": 1.0,
            "terminates_at": 49,
            "randomize_observations": "objects",
            "room_size": "s",
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
        },
        "default_root_dir": "./training-results/PPO/baselines/s",
        "run_handcrafted_baselines": False,
    }
    params_all.append(params)

for params in tqdm(params_all):
    try:
        agent = PPOLSTMBaselineAgent(**params)
        agent.train()
    except Exception as e:
        print(e)
        continue