In [None]:
import pathlib
import sys
root_path = pathlib.Path().absolute().parent
sys.path.append(str(root_path))

from src.train import run_experiment, OptimizerType
from src.models.dqn import ModelType
from src.environment.pred_prey import ImposterTrainingGround
from src.features.model_ready import FeaturizerType
from src.visualize import StateSequenceVisualizer

import torch

torch.set_printoptions(precision=3, sci_mode=False, linewidth=200)

In [None]:
BUF_SIZE = 300_000
N_IMPOSTERS = 1
N_JOBS = 0
N_CREW = 4
SEQUENCE_SIZE = 1
env = ImposterTrainingGround(n_crew=N_CREW, n_jobs=N_JOBS, debug=False, kill_reward=3, sabotage_reward=1, end_of_game_reward=10, time_step_reward=0)

model_registry_path = root_path / 'model_registry'
model_registry_path.mkdir(exist_ok=True)
tests_path = model_registry_path / 'test'
tests_path.mkdir(exist_ok=True)

In [None]:
imput_dim = (env.flattened_state_size + env.n_agents * (env.n_rows + env.n_cols)) * SEQUENCE_SIZE

imposter_model_config = {
    "layer_dims": [imput_dim, 256,16, 16, 16, env.n_imposter_actions]
}

In [None]:
env.flattened_state_size

In [None]:
run_experiment(
    env=env, 
    num_steps=1_000_000,
    imposter_model_args=imposter_model_config,
    crew_model_args={'n_actions': env.n_crew_actions},
    imposter_model_type=ModelType.MLP,
    crew_model_type=ModelType.RANDOM,
    featurizer_type=FeaturizerType.FLAT,
    sequence_length = SEQUENCE_SIZE,
    replay_buffer_size=300_000,
    replay_prepopulate_steps=50_000,
    batch_size=8,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=500_000,
    train_imposter=True,
    train_crew=False,
    experiment_save_path=tests_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.001,
    train_step_interval=5,
    num_checkpoint_saves=2,
)