In [None]:
import pathlib
import sys
root_path = pathlib.Path().absolute().parent
sys.path.append(str(root_path))

from src.train import run_experiment, OptimizerType, run_game
from src.models.dqn import ModelType
from src.environment.pred_prey import ImposterTrainingGround
from src.features.model_ready import FeaturizerType

import torch

torch.set_printoptions(precision=3, sci_mode=False, linewidth=200)

In [None]:
BUF_SIZE = 300_000
N_IMPOSTERS = 1
N_JOBS = 0
N_CREW = 1
SEQUENCE_SIZE = 1
env = ImposterTrainingGround(n_crew=N_CREW, n_jobs=N_JOBS, debug=False, kill_reward=-3, sabotage_reward=0, end_of_game_reward=0, time_step_reward=0)

model_registry_path = root_path / 'model_registry'
model_registry_path.mkdir(exist_ok=True)

experiment_path = model_registry_path / '1v1_imposter_no_walls'
experiment_path.mkdir(exist_ok=True)

featurizer_type = FeaturizerType.FLAT


In [None]:
imput_dim = (4) * SEQUENCE_SIZE

imposter_model_config = {
    "layer_dims": [imput_dim, 256, 128, 64, 16, env.n_imposter_actions]
}

In [None]:
run_experiment(
    env=env, 
    num_steps=1_000,
    imposter_model_args=imposter_model_config,
    crew_model_args={'n_actions': env.n_crew_actions},
    imposter_model_type=ModelType.MLP,
    crew_model_type=ModelType.RANDOM,
    featurizer_type=featurizer_type,
    sequence_length = SEQUENCE_SIZE,
    replay_buffer_size=BUF_SIZE,
    replay_prepopulate_steps=50_000,
    batch_size=8,
    gamma=0.99,
    scheduler_start_eps=1,
    scheduler_end_eps=0.05,
    scheduler_time_steps=750_000,
    train_imposter=True,
    train_crew=False,
    experiment_base_dir=experiment_path,
    optimizer_type = OptimizerType.ADAM,
    learning_rate=0.001,
    train_step_interval=5,
    num_checkpoint_saves=5,
)

In [None]:
imposter_model_config = {
    # "layer_dims": [imput_dim, 256,16, 16, 16, env.n_imposter_actions],
    'pretrained_model_path': './model_registry/1v1_imposter_no_walls/2024-04-24_11-19-34/imposter_mlp_100%.pt''
}

imposter_model = ModelType.build(ModelType.MLP, **imposter_model_config)
crew_model = ModelType.build(ModelType.RANDOM, **{'n_actions': env.n_crew_actions})

featurizer = FeaturizerType.build(FeaturizerType.FLAT, env)

run_game(env, imposter_model, crew_model, featurizer, sequence_length=1)



In [None]:
t_saves = np.linspace(0, num_steps, 5 - 1, endpoint=False, dtype=int)

In [None]:
t_saves