In [1]:
import pathlib
import sys
root_path = pathlib.Path().absolute().parent
sys.path.append(str(root_path))

from src.train import run_experiment, OptimizerType
from src.models.dqn import SpatialDQN, ModelType
from src.replay_memory import FastReplayBuffer
from src.env import FourRoomEnvWithTagging
from src.featurizers import FeaturizerType
from src.visualize import StateSequenceVisualizer

import torch

torch.set_printoptions(precision=3, sci_mode=False, linewidth=200)



In [2]:
BUF_SIZE = 3000
N_IMPOSTERS = 1
N_JOBS = 5
N_CREW = 4
SEQUENCE_SIZE = 2
env = FourRoomEnvWithTagging(n_imposters=N_IMPOSTERS, n_crew=N_CREW, n_jobs=N_JOBS, debug=False)


model_registry_path = root_path / 'model_registry'

model_registry_path.mkdir(exist_ok=True)

tests_path = model_registry_path / 'test'

tests_path.mkdir(exist_ok=True)

In [3]:
imposter_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 20,
    "n_channels": [7, 5, 3],
    "strides": [1, 1],
    "paddings": [1, 1],
    "kernel_sizes": [3, 3],
    "rnn_layers": 3,
    "rnn_hidden_dim": 64,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_imposter_actions,
    "pretrained_model_path": None
}

crew_model_config = {
    "input_image_size": env.n_cols,
    "non_spatial_input_size": 20,
    "n_channels": [7, 5, 3],
    "strides": [1, 1],
    "paddings": [1, 1],
    "kernel_sizes": [3, 3],
    "rnn_layers": 3,
    "rnn_hidden_dim": 64,
    "rnn_dropout": 0.2,
    "mlp_hidden_layer_dims": [16, 16],
    "n_actions": env.n_crew_actions,
    "pretrained_model_path": None
}

In [4]:
run_experiment(
    env=env, 
    num_steps=1000,
    imposter_model_args=imposter_model_config,
    crew_model_args={'n_actions': env.n_crew_actions},
    imposter_model_type=ModelType.SPATIAL_DQN,
    crew_model_type=ModelType.RANDOM,
    featurizer_type=FeaturizerType.GLOBAL,
    sequence_length = 10,
    replay_buffer_size=1_000,
    replay_prepopulate_steps=1_000,
    batch_size=4,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=100_000,
    train_imposter=True,
    train_crew=False,
    experiment_save_path=tests_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.001,
    train_step_interval=5,
    num_checkpoint_saves=2,
)

  actions = torch.tensor(batch.actions[team_samples, -1, agent_idx])
  done_mask = torch.tensor(batch.dones[team_samples, -1]).view(-1)
  rewards = torch.tensor(
  1%|          | 11/1000 [00:00<00:09, 99.23it/s]

Model checkpoint saved to /home/jhrudd/dev/Sus-Net/model_registry/test/imposter_spatial_dqn_0.0%.pt


Episode: 5 | Steps: 250 | Epsilon: 0.97 | Imposter Loss: 0.01 | Crew Loss: 0.00: 100%|██████████| 1000/1000 [00:07<00:00, 139.89it/s]

Model checkpoint saved to /home/jhrudd/dev/Sus-Net/model_registry/test/imposter_dqn_100%.pt
Average Metrics: {<SusMetrics.IMP_KILLED_CREW: 'imp_killed_crew'>: 0.8333333333333334, <SusMetrics.IMP_VOTED_OUT: 'imp_voted_out'>: 0.5, <SusMetrics.CREW_VOTED_OUT: 'crew_voted_out'>: 1.3333333333333333, <SusMetrics.SABOTAGED_JOBS: 'sabotaged_jobs'>: 0.0, <SusMetrics.COMPLETED_JOBS: 'completed_jobs'>: 1.5, <SusMetrics.TOTAL_STALEMATES: 'total_stalemates'>: 0.0, <SusMetrics.TOTAL_TIME_STEPS: 'total_time_steps'>: 158.33333333333334, <SusMetrics.IMPOSTER_WON: 'imposter_won'>: 0.5, <SusMetrics.CREW_WON: 'crew_won'>: 0.5}





In [5]:
run_experiment(
    env=env, 
    num_steps=1000,
    imposter_model_args={'n_actions': env.n_imposter_actions},
    crew_model_args=crew_model_config,
    crew_model_type=ModelType.SPATIAL_DQN,
    imposter_model_type=ModelType.RANDOM,
    featurizer_type=FeaturizerType.GLOBAL,
    sequence_length = 3,
    replay_buffer_size=3000,
    replay_prepopulate_steps=1_000,
    batch_size=4,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=100_000,
    train_imposter=False,
    train_crew=True,
    experiment_save_path=tests_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.001,
    train_step_interval=5,
    num_checkpoint_saves=2,
)

  2%|▏         | 21/1000 [00:00<00:04, 202.22it/s]

Model checkpoint saved to /home/jhrudd/dev/Sus-Net/model_registry/test/crew_spatial_dqn_0.0%.pt


Episode: 3 | Steps: 150 | Epsilon: 0.98 | Imposter Loss: 0.00 | Crew Loss: 1.36: 100%|██████████| 1000/1000 [00:04<00:00, 237.53it/s]

Model checkpoint saved to /home/jhrudd/dev/Sus-Net/model_registry/test/crew_dqn_100%.pt
Average Metrics: {<SusMetrics.IMP_KILLED_CREW: 'imp_killed_crew'>: 0.75, <SusMetrics.IMP_VOTED_OUT: 'imp_voted_out'>: 0.0, <SusMetrics.CREW_VOTED_OUT: 'crew_voted_out'>: 2.25, <SusMetrics.SABOTAGED_JOBS: 'sabotaged_jobs'>: 0.75, <SusMetrics.COMPLETED_JOBS: 'completed_jobs'>: 2.0, <SusMetrics.TOTAL_STALEMATES: 'total_stalemates'>: 0.0, <SusMetrics.TOTAL_TIME_STEPS: 'total_time_steps'>: 162.5, <SusMetrics.IMPOSTER_WON: 'imposter_won'>: 1.0, <SusMetrics.CREW_WON: 'crew_won'>: 0.0}





In [6]:

# THIS IS A WORKING MODEL SETUP!!!

imposter_model = SpatialDQN(
            input_image_size=env.n_cols,
            non_spatial_input_size=23,
            n_channels=[8, 5, 3],
            strides=[1, 1],
            paddings=[1, 1],
            kernel_sizes=[3, 3],
            rnn_layers=3,
            rnn_hidden_dim=64,
            rnn_dropout=0.2,
            mlp_hidden_layer_dims=[16, 16],
            n_actions=env.n_imposter_actions,
        )