In [None]:
import pathlib
import sys
root_path = pathlib.Path().absolute().parent
sys.path.append(str(root_path))

from src.train import run_experiment, OptimizerType
from src.models.dqn import ModelType
from src.env import FourRoomEnvWithTagging
from src.featurizers import FeaturizerType
from src.metrics import SusMetrics

import torch

torch.set_printoptions(precision=3, sci_mode=False, linewidth=200)

In [None]:
BUF_SIZE = 3000
N_IMPOSTERS = 1
N_JOBS = 5
N_CREW = 4
SEQUENCE_SIZE = 2
env = FourRoomEnvWithTagging(n_imposters=N_IMPOSTERS, n_crew=N_CREW, n_jobs=N_JOBS, debug=False)


model_registry_path = root_path / 'model_registry'

model_registry_path.mkdir(exist_ok=True)

tests_path = model_registry_path / 'test'

tests_path.mkdir(exist_ok=True)

In [None]:
imposter_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 20,
    "n_channels": [7, 5, 3],
    "strides": [1, 1],
    "paddings": [1, 1],
    "kernel_sizes": [3, 3],
    "rnn_layers": 3,
    "rnn_hidden_dim": 64,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_imposter_actions,
    "pretrained_model_path": None
}

crew_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 20,
    "n_channels": [7, 5, 3],
    "strides": [1, 1],
    "paddings": [1, 1],
    "kernel_sizes": [3, 3],
    "rnn_layers": 3,
    "rnn_hidden_dim": 64,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_crew_actions,
    "pretrained_model_path": None
}

In [None]:
# metrics = run_experiment(
#     env=env, 
#     num_steps=100000,
#     imposter_model_args=imposter_model_config,
#     crew_model_args={'n_actions': env.n_crew_actions},
#     imposter_model_type=ModelType.SPATIAL_DQN,
#     crew_model_type=ModelType.RANDOM,
#     featurizer_type=FeaturizerType.GLOBAL,
#     sequence_length = 6,
#     replay_buffer_size=100_000,
#     replay_prepopulate_steps=10_000,
#     batch_size=4,
#     gamma=0.99,
#     scheduler_start_eps=1,
#     scheduler_end_eps=0.05,
#     scheduler_time_steps=1_000_000,
#     train_imposter=True,
#     train_crew=False,
#     experiment_save_path=tests_path,
#     optimizer_type = OptimizerType.ADAM,
#     learning_rate=0.1,
#     train_step_interval=5,
#     num_checkpoint_saves=2,
# )

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np

# episodes = len(metrics[SusMetrics.AVG_CREW_RETURN])
# x = np.arange(episodes)

# # plt.plot(x, metrics[SusMetrics.AVG_CREW_RETURN], label='Crew Return')
# plt.plot(x, metrics[SusMetrics.AVG_IMPOSTER_RETURN], label='Imposter Return')
# plt.legend()

In [None]:
# run_experiment(
#     env=env, 
#     num_steps=1000,
#     imposter_model_args={'n_actions': env.n_imposter_actions},
#     crew_model_args=crew_model_config,
#     crew_model_type=ModelType.SPATIAL_DQN,
#     imposter_model_type=ModelType.RANDOM,
#     featurizer_type=FeaturizerType.GLOBAL,
#     sequence_length = 3,
#     replay_buffer_size=3000,
#     replay_prepopulate_steps=1_000,
#     batch_size=4,
#     gamma=0.99,
#     scheduler_start_eps=1,
#     scheduler_end_eps=0.05,
#     scheduler_time_steps=100_000,
#     train_imposter=False,
#     train_crew=True,
#     experiment_save_path=tests_path,
#     optimizer_type = OptimizerType.ADAM,
#     learning_rate=0.001,
#     train_step_interval=5,
#     num_checkpoint_saves=2,
# )

In [None]:
from src.featurizers import GlobalFeaturizer
from src.train import run_game
from src.models.dqn import RandomEquiprobable

env = FourRoomEnvWithTagging(n_imposters=N_IMPOSTERS, n_crew=N_CREW, n_jobs=N_JOBS, debug=False)

crew_model = RandomEquiprobable(env.n_crew_actions)
imposter_model = RandomEquiprobable(env.n_imposter_actions)

featurizer = GlobalFeaturizer(env)

run_game(env, imposter_model, crew_model, featurizer, SEQUENCE_SIZE)