## Train MM / explore with random sampling


In [None]:
import matplotlib

matplotlib.use("Agg")

import logging

logger = logging.getLogger()
logger.disabled = True

import os
import torch
import numpy as np

from agent import DQNAgent
from tqdm.auto import tqdm
import random
import itertools

# Number of combinations you want
num_combinations = 500  # Change this to however many combinations you need

# default
room_size = "xl-different-prob"
capacity_max = 12
terminates_at = 99
num_iterations = (terminates_at + 1) * 20
validation_starts_at = 0

prob_type = (
    "non-equal-object-probs" if "different-prob" in room_size else "equal-object-probs"
)
root_path = (
    f"./training-results/{prob_type}/dqn/room_size={room_size}/capacity={capacity_max}/"
)

# root_path = f"training-results/TRASH/{room_size}"

# random
test_seed_ = [i for i in range(num_combinations)]
target_update_interval_ = [10]
gamma_ = [0.9]
semantic_decay_factor_ = [0.8]
pretrain_semantic_ = [False]
relu_between_gcn_layers_ = [True]
dropout_between_gcn_layers_ = [False]
num_layers_ = [2]
batch_size_ = [32]
embedding_dim_ = [32]
triple_qual_weight_ = [0.8]
intrinsic_reward_ = [0.5]
learning_rate_ = [0.001, 0.0001, 0.00001]

replay_buffer_size_ = [num_iterations // 10]


# Generate all combinations
params_all = list(
    itertools.product(
        test_seed_,
        target_update_interval_,
        gamma_,
        semantic_decay_factor_,
        pretrain_semantic_,
        replay_buffer_size_,
        relu_between_gcn_layers_,
        dropout_between_gcn_layers_,
        num_layers_,
        batch_size_,
        embedding_dim_,
        triple_qual_weight_,
        intrinsic_reward_,
        learning_rate_,
    )
)

# Random combinations with weighted agent_capacity_
random_combinations = random.sample(params_all, num_combinations)

for i, params in tqdm(enumerate(random_combinations)):
    (
        test_seed,
        target_update_interval,
        gamma,
        semantic_decay_factor,
        pretrain_semantic,
        replay_buffer_size,
        relu_between_gcn_layers,
        dropout_between_gcn_layers,
        num_layers,
        batch_size,
        embedding_dim,
        triple_qual_weight,
        intrinsic_reward,
        learning_rate,
    ) = params

    params_dict = {
        "env_str": "room_env:RoomEnv-v2",
        "num_iterations": num_iterations,
        "replay_buffer_size": replay_buffer_size,
        "validation_starts_at": validation_starts_at,
        "warm_start": batch_size,
        "batch_size": batch_size,
        "target_update_interval": target_update_interval,
        "epsilon_decay_until": num_iterations,
        "max_epsilon": 1.0,
        "min_epsilon": 0.01,
        "gamma": gamma,
        "learning_rate": learning_rate,
        "capacity": {"long": capacity_max, "short": 15},
        "pretrain_semantic": pretrain_semantic,
        "semantic_decay_factor": semantic_decay_factor,
        "dqn_params": {
            "gcn_layer_params": {
                "type": "stare",
                "embedding_dim": embedding_dim,
                "num_layers": num_layers,
                "gcn_drop": 0.1,
                "triple_qual_weight": triple_qual_weight,
            },
            "relu_between_gcn_layers": relu_between_gcn_layers,
            "dropout_between_gcn_layers": dropout_between_gcn_layers,
            "mlp_params": {"num_hidden_layers": num_layers, "dueling_dqn": True},
        },
        "num_samples_for_results": {"val": 5, "test": 10},
        "validation_interval": 1,
        "plotting_interval": 50,
        "train_seed": test_seed + 5,
        "test_seed": test_seed,
        "device": "cpu",
        "qa_function": "latest_strongest",
        "env_config": {
            "question_prob": 1.0,
            "terminates_at": terminates_at,
            "randomize_observations": "all",
            "room_size": room_size,
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
        },
        "intrinsic_reward": intrinsic_reward,
        "ddqn": True,
        "default_root_dir": root_path,
    }

    agent = DQNAgent(**params_dict)
    agent.train()

In [None]:
data_single = np.array([[['dep_001', 'atlocation', 'room_000', {'current_time': 0}],
        ['room_000', 'south', 'room_004', {'current_time': 0}],
        ['room_000', 'north', 'wall', {'current_time': 0}],
        ['agent', 'atlocation', 'room_000', {'current_time': 0}],
        ['dep_007', 'atlocation', 'room_000', {'current_time': 0}],
        ['room_000', 'west', 'wall', {'current_time': 0}],
        ['room_000', 'east', 'room_001', {'current_time': 0}]]],
      dtype=object)

print(data_single.shape)

data = np.array([list([['dep_007', 'atlocation', 'room_000', {'current_time': 2, 'strength': 1}], ['agent', 'atlocation', 'room_000', {'current_time': 2, 'strength': 1}], ['room_000', 'west', 'wall', {'current_time': 2, 'strength': 1}], ['room_000', 'north', 'wall', {'current_time': 2, 'strength': 1.8}], ['dep_001', 'atlocation', 'room_000', {'current_time': 2, 'timestamp': [0, 1]}], ['room_000', 'south', 'room_004', {'current_time': 2, 'timestamp': [1]}], ['room_000', 'east', 'room_001', {'current_time': 2, 'timestamp': [0], 'strength': 1}]]),
       list([['room_005', 'east', 'room_006', {'current_time': 5}], ['agent', 'atlocation', 'room_005', {'current_time': 5}], ['room_005', 'south', 'wall', {'current_time': 5}], ['room_005', 'north', 'room_001', {'current_time': 5}], ['room_005', 'west', 'room_004', {'current_time': 5}], ['room_000', 'east', 'room_001', {'timestamp': [0], 'strength': 1}], ['agent', 'atlocation', 'room_000', {'timestamp': [0, 2, 3], 'strength': 1}], ['dep_001', 'atlocation', 'room_000', {'timestamp': [0, 1], 'strength': 1.62}], ['dep_007', 'atlocation', 'room_000', {'strength': 1, 'timestamp': [1]}], ['room_000', 'west', 'wall', {'timestamp': [1, 2], 'strength': 1}], ['room_000', 'north', 'wall', {'strength': 1}], ['room_000', 'south', 'room_004', {'timestamp': [3]}], ['room_004', 'south', 'wall', {'timestamp': [4]}], ['room_004', 'north', 'room_000', {'timestamp': [4]}]]),
       list([['agent', 'atlocation', 'room_001', {'current_time': 9, 'strength': 1.3122000000000003, 'timestamp': [6, 7]}], ['room_001', 'west', 'room_000', {'current_time': 9, 'timestamp': [1, 6]}], ['room_001', 'south', 'room_005', {'current_time': 9, 'strength': 2.2680000000000002, 'timestamp': [5]}], ['room_001', 'north', 'wall', {'current_time': 9, 'timestamp': [1, 2], 'strength': 2.52}], ['room_001', 'east', 'wall', {'current_time': 9, 'strength': 2.1222000000000003, 'timestamp': [2, 6]}], ['dep_001', 'atlocation', 'room_000', {'strength': 1.0628820000000003, 'timestamp': [4]}], ['room_000', 'south', 'room_004', {'strength': 1, 'timestamp': [3, 4]}], ['room_000', 'east', 'room_001', {'timestamp': [0, 3]}], ['room_000', 'west', 'wall', {'timestamp': [0]}], ['agent', 'atlocation', 'room_000', {'timestamp': [0], 'strength': 1}], ['room_000', 'north', 'wall', {'timestamp': [0], 'strength': 1}], ['dep_007', 'atlocation', 'room_000', {'strength': 1}]]),
       list([['dep_001', 'atlocation', 'room_000', {'current_time': 4, 'strength': 1.8, 'timestamp': [2]}], ['room_000', 'south', 'room_004', {'current_time': 4, 'timestamp': [3]}], ['room_000', 'east', 'room_001', {'current_time': 4, 'timestamp': [0]}], ['dep_007', 'atlocation', 'room_000', {'current_time': 4, 'strength': 1, 'timestamp': [3]}], ['room_000', 'west', 'wall', {'current_time': 4, 'strength': 1, 'timestamp': [2]}], ['room_000', 'north', 'wall', {'current_time': 4, 'strength': 1}], ['agent', 'atlocation', 'room_000', {'current_time': 4, 'timestamp': [0, 2], 'strength': 1}], ['room_004', 'south', 'wall', {'timestamp': [1]}], ['room_004', 'east', 'room_005', {'strength': 1}]]),
       list([['agent', 'atlocation', 'room_004', {'current_time': 3, 'timestamp': [1, 2]}], ['room_004', 'north', 'room_000', {'current_time': 3, 'strength': 1}], ['room_004', 'south', 'wall', {'current_time': 3, 'strength': 1, 'timestamp': [2]}], ['room_004', 'west', 'wall', {'current_time': 3, 'timestamp': [1, 2]}], ['room_004', 'east', 'room_005', {'current_time': 3, 'timestamp': [1]}], ['room_000', 'west', 'wall', {'strength': 1}], ['room_000', 'south', 'room_004', {'timestamp': [0]}], ['room_000', 'north', 'wall', {'timestamp': [0]}], ['room_000', 'east', 'room_001', {'timestamp': [0]}], ['agent', 'atlocation', 'room_000', {'strength': 1}]]),
       list([['room_004', 'north', 'room_000', {'current_time': 9, 'strength': 1}], ['agent', 'atlocation', 'room_004', {'current_time': 9, 'timestamp': [7], 'strength': 1}], ['room_004', 'south', 'wall', {'current_time': 9, 'strength': 1, 'timestamp': [7]}], ['room_004', 'east', 'room_005', {'current_time': 9}], ['room_004', 'west', 'wall', {'current_time': 9}], ['room_000', 'south', 'room_004', {'strength': 1}], ['room_005', 'north', 'room_001', {'timestamp': [2, 5], 'strength': 1}], ['room_006', 'east', 'room_007', {'timestamp': [3]}], ['room_006', 'west', 'room_005', {'strength': 1.1809800000000004}], ['sta_004', 'atlocation', 'room_006', {'strength': 1.1809800000000004}], ['agent', 'atlocation', 'room_006', {'timestamp': [4]}], ['room_005', 'east', 'room_006', {'strength': 1.4580000000000002}], ['room_005', 'south', 'wall', {'timestamp': [6]}], ['room_005', 'west', 'room_004', {'timestamp': [6]}]]),
       list([['agent', 'atlocation', 'room_000', {'current_time': 4, 'strength': 1}], ['room_000', 'east', 'room_001', {'current_time': 4, 'timestamp': [0, 3]}], ['room_000', 'north', 'wall', {'current_time': 4, 'strength': 1.8}], ['room_000', 'west', 'wall', {'current_time': 4, 'strength': 1, 'timestamp': [1, 3]}], ['dep_001', 'atlocation', 'room_000', {'current_time': 4, 'timestamp': [0, 1, 3]}], ['dep_007', 'atlocation', 'room_000', {'current_time': 4, 'timestamp': [0], 'strength': 1}], ['room_000', 'south', 'room_004', {'current_time': 4, 'timestamp': [0, 1], 'strength': 1}], ['room_004', 'west', 'wall', {'strength': 1}], ['room_004', 'south', 'wall', {'timestamp': [2]}], ['room_004', 'north', 'room_000', {'timestamp': [2]}], ['room_004', 'east', 'room_005', {'strength': 1}]]),
       list([['room_001', 'south', 'room_005', {'current_time': 5}], ['agent', 'atlocation', 'room_001', {'current_time': 5}], ['room_001', 'north', 'wall', {'current_time': 5}], ['room_001', 'east', 'wall', {'current_time': 5}], ['room_001', 'west', 'room_000', {'current_time': 5}], ['agent', 'atlocation', 'room_000', {'strength': 1, 'timestamp': [3]}], ['dep_001', 'atlocation', 'room_000', {'strength': 1, 'timestamp': [1, 2, 3]}], ['room_000', 'south', 'room_004', {'strength': 1.4580000000000002, 'timestamp': [3, 4]}], ['room_000', 'north', 'wall', {'strength': 3.168}], ['room_000', 'west', 'wall', {'strength': 1.62, 'timestamp': [2, 4]}], ['dep_007', 'atlocation', 'room_000', {'strength': 1.8, 'timestamp': [2, 3]}], ['room_000', 'east', 'room_001', {'timestamp': [3], 'strength': 1}]])],
      dtype=object)

print(data.shape)

## Run fixed combinations

In [None]:
import matplotlib

matplotlib.use("Agg")

import logging

logger = logging.getLogger()
logger.disabled = True

import os
from agent import DQNAgent
from tqdm.auto import tqdm
import random
import itertools


room_size = "xl-different-prob"
terminates_at = 99
num_iterations = (terminates_at + 1) * 100
replay_buffer_size = num_iterations // 2
validation_starts_at = 0
warm_start = num_iterations // 4
batch_size = 32
target_update_interval = 20
gamma = 0.99
semantic_decay_factor = 0.8
embedding_dim = 80
num_layers = 2
triple_qual_weight = 0.8

for capacity_max in [12, 24, 48]:
    prob_type = (
        "non-equal-object-probs"
        if "different-prob" in room_size
        else "equal-object-probs"
    )
    root_path = (
        f"./training-results/{prob_type}/dqn/"
        f"room_size={room_size}/capacity={capacity_max}/"
    )
    for pretrain_semantic in [False, "include_walls", "exclude_walls"]:
        for test_seed in [0, 1, 2, 3, 4]:
            params_dict = {
                "env_str": "room_env:RoomEnv-v2",
                "num_iterations": num_iterations,
                "replay_buffer_size": replay_buffer_size,
                "validation_starts_at": validation_starts_at,
                "warm_start": warm_start,
                "batch_size": batch_size,
                "target_update_interval": target_update_interval,
                "epsilon_decay_until": num_iterations,
                "max_epsilon": 1.0,
                "min_epsilon": 0.1,
                "gamma": gamma,
                "capacity": {"long": capacity_max, "short": 15},
                "pretrain_semantic": pretrain_semantic,
                "semantic_decay_factor": semantic_decay_factor,
                "dqn_params": {
                    "gcn_layer_params": {
                        "type": "stare",
                        "embedding_dim": embedding_dim,
                        "num_layers": num_layers,
                        "gcn_drop": 0.1,
                        "triple_qual_weight": triple_qual_weight,
                    },
                    "relu_between_gcn_layers": True,
                    "dropout_between_gcn_layers": False,
                    "mlp_params": {
                        "num_hidden_layers": num_layers,
                        "dueling_dqn": True,
                    },
                },
                "num_samples_for_results": {"val": 5, "test": 10},
                "validation_interval": 5,
                "plotting_interval": 50,
                "train_seed": test_seed + 5,
                "test_seed": test_seed,
                "device": "cpu",
                "qa_function": "latest_strongest",
                "env_config": {
                    "question_prob": 1.0,
                    "terminates_at": terminates_at,
                    "randomize_observations": "all",
                    "room_size": room_size,
                    "rewards": {"correct": 1, "wrong": 0, "partial": 0},
                    "make_everything_static": False,
                    "num_total_questions": 1000,
                    "question_interval": 1,
                    "include_walls_in_observations": True,
                },
                "ddqn": True,
                "default_root_dir": root_path,
            }

            agent = DQNAgent(**params_dict)
            agent.train()