In [None]:
import pathlib
import sys
root_path = pathlib.Path().absolute().parent
sys.path.append(str(root_path))

from src.train import run_experiment, OptimizerType, run_game
from src.models.dqn import ModelType
from src.env import FourRoomEnvWithTagging
from src.featurizers import FeaturizerType, PerspectiveFeaturizer

import torch

torch.set_printoptions(precision=3, sci_mode=False, linewidth=200)

In [None]:
BUF_SIZE = 3000
N_IMPOSTERS = 1
N_JOBS = 5
N_CREW = 4
SEQUENCE_SIZE = 6
env = FourRoomEnvWithTagging(n_imposters=N_IMPOSTERS, n_crew=N_CREW, n_jobs=N_JOBS, debug=False, time_step_reward=0)


model_registry_path = root_path / 'model_registry'

model_registry_path.mkdir(exist_ok=True)

tests_path = model_registry_path / 'test'

tests_path.mkdir(exist_ok=True)

In [None]:
imposter_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 15,
    "n_channels": [7, 16, 32, 64],
    "strides": [1, 1, 1, 1],
    "paddings": [1, 1,1,1],
    "kernel_size": [3, 3],
    "dilations": [1,1,2,2],
    "rnn_layers": 1,
    "rnn_hidden_dim": 16,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_imposter_actions,
    # "pretrained_model_path":tests_path / 'imposter_spatial_dqn_100%.pt'
    "pretrained_model_path":None
}

# crew_model_config = {
#     "input_image_size": env.n_cols,
#     "non_spatial_input_size": 20,
#     "n_channels": [7, 2],
#     "strides": [2, ],
#     "paddings": [1,],
#     "kernel_size": [3,3],
#     "rnn_layers": 3,
#     "rnn_hidden_dim": 64,
#     "rnn_dropout": 0.2,
#     "mlp_hidden_layer_dims": [16, 16],
#     "n_actions": env.n_crew_actions,
#     "pretrained_model_path": None
# }

In [None]:
# imposter_model_config = {
#     "layer_dims": [(7 * 9 * 9 + 15) * 6, 256,16, 16, 16, env.n_imposter_actions],
#     "pretrained_model_path": tests_path / 'imposter_mlp_100%.pt'
# }

# imposter_model = ModelType.build(model_type=ModelType.MLP, **imposter_model_config)

imposter_model = ModelType.build(model_type=ModelType.SPATIAL_DQN, **imposter_model_config)
crew_model = ModelType.build(model_type=ModelType.RANDOM, n_actions=env.n_crew_actions)

run_game(env, imposter_model, crew_model, PerspectiveFeaturizer(env), SEQUENCE_SIZE)

In [None]:
assert False

In [None]:
metrics, losses = run_experiment(
    env=env, 
    num_steps=100000,
    imposter_model_args=imposter_model_config,
    crew_model_args={'n_actions': env.n_crew_actions},
    imposter_model_type=ModelType.SPATIAL_DQN,
    crew_model_type=ModelType.RANDOM,
    featurizer_type=FeaturizerType.PERPSECTIVE,
    sequence_length = 6,
    replay_buffer_size=100_000,
    replay_prepopulate_steps=10_000,
    batch_size=4,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=75_000,
    train_imposter=True,
    train_crew=False,
    experiment_save_path=tests_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.1,
    train_step_interval=5,
    num_checkpoint_saves=2,
)

In [None]:
import numpy as np
time_steps = np.arange(100000 // 5)
print(len(losses))
imposter_loss = [l[0] for l in losses]
crew_loss = [l[1] for l in losses]

import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 6))
ax = axes
ax[0].plot(time_steps, imposter_loss)
ax[0].set_title('Imposter Loss')
ax[1].plot(time_steps, crew_loss)
ax[1].set_title('Crew Loss')

plt.show()



In [None]:
assert False

In [None]:
import torch
action_values = torch.randint(0, 11, (2, 11))
actions = torch.tensor([1,2]).unsqueeze(-1)
print(actions.shape)
print(actions)
torch.gather(action_values, 1, actions.view(-1, 1))

In [None]:
imposter_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 15,
    "n_channels": [7, 5, 3],
    "strides": [1, 1],
    "paddings": [1, 1],
    "kernel_sizes": [3, 3],
    "rnn_layers": 3,
    "rnn_hidden_dim": 64,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_imposter_actions,
    "pretrained_model_path": None
}


run_experiment(
    env=env, 
    num_steps=100000,
    imposter_model_args=imposter_model_config,
    crew_model_args={'n_actions': env.n_crew_actions},
    imposter_model_type=ModelType.SPATIAL_DQN,
    crew_model_type=ModelType.RANDOM,
    featurizer_type=FeaturizerType.PERPSECTIVE,
    sequence_length = 6,
    replay_buffer_size=100_000,
    replay_prepopulate_steps=10_000,
    batch_size=16,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=1_000_000,
    train_imposter=True,
    train_crew=False,
    experiment_save_path=tests_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.1,
    train_step_interval=5,
    num_checkpoint_saves=2,
)

In [None]:
# run_experiment(
#     env=env, 
#     num_steps=1000,
#     imposter_model_args={'n_actions': env.n_imposter_actions},
#     crew_model_args=crew_model_config,
#     crew_model_type=ModelType.SPATIAL_DQN,
#     imposter_model_type=ModelType.RANDOM,
#     featurizer_type=FeaturizerType.GLOBAL,
#     sequence_length = 3,
#     replay_buffer_size=3000,
#     replay_prepopulate_steps=1_000,
#     batch_size=4,
#     gamma=0.99,
#     scheduler_start_eps=1,
#     scheduler_end_eps=0.05,
#     scheduler_time_steps=100_000,
#     train_imposter=False,
#     train_crew=True,
#     experiment_save_path=tests_path,
#     optimizer_type = OptimizerType.ADAM,
#     learning_rate=0.001,
#     train_step_interval=5,
#     num_checkpoint_saves=2,
# )

In [None]:
from src.featurizers import GlobalFeaturizer
from src.train import run_game
from src.models.dqn import RandomEquiprobable

env = FourRoomEnvWithTagging(n_imposters=N_IMPOSTERS, n_crew=N_CREW, n_jobs=N_JOBS, debug=False)

crew_model = RandomEquiprobable(env.n_crew_actions)
imposter_model = RandomEquiprobable(env.n_imposter_actions)

featurizer = GlobalFeaturizer(env)

run_game(env, imposter_model, crew_model, featurizer, SEQUENCE_SIZE)