Google Colab Setup
---


In [None]:
#@title << Setup Google Colab by running this cell {display-mode: "form"}
import sys
if 'google.colab' in sys.modules:
    # Clone GitHub repository
    !git clone --single-branch --branch flatland-model-evaluator https://github.com/YanickSchraner/rl-on-trains-workshop.git
        
    # Copy files required to run the code
    !cp -r "rl-on-trains-workshop/utils" "rl-on-trains-workshop/train.py" "rl-on-trains-workshop/tests.yaml" .
    
    # Install packages via pip
    !pip install flatland-rl==2.2.2  aicrowd-cli
    
    # Restart Runtime
    import os
    os.kill(os.getpid(), 9)

Tensorboard
---

Setup tensorboard before starting the training.

In [None]:
""" load tensorboard extension"""
%load_ext tensorboard

In [None]:
""" startup tensorboad """
%tensorboard --logdir runs

Random Network Distillation
---
Train a Flatland-RL policy using DQN and Random Network Distillation

In [None]:
import os
from argparse import Namespace
from pprint import pprint

from train import train_agent
from utils.dddqn_rnd_policy import DDDQNRNDPolicy

Set policy class to the DDQN RND policy that combines DQN and Random Network Distillation

In [None]:
policy_class = DDDQNRNDPolicy

Training Parameters

In [None]:
training_params = {
    'n_episodes': 500,
    'n_evaluation_episodes': 50,
    'checkpoint_interval': 100,
    'eps_start': 1.0,
    'eps_end': 0.01,
    'eps_decay': 0.9,
    'buffer_size': int(1e5),
    'buffer_min_size': 0,
    'batch_size': 128,
    'gamma': 0.95,
    'tau': 1e-3,
    'learning_rate': 1e-4,
    'hidden_layers': [32, 32],
    'update_every': 8,
    'use_gpu': True,
    'num_threads': 1,
    'render': False, # won't work in notebook
    'load_policy': '',
    
    # RND parameters:
    'rnd_hidden_layers': [32, 32],
    'rnd_intrinsic_reward_weight': 1.0,
}

Environment Parameters

In [None]:
training_env_params = {
    "n_agents": 5,
    "x_dim": 25,
    "y_dim": 25,
    "n_cities": 2,
    "max_rails_between_cities": 2,
    "max_rails_in_city": 3,
    "malfunction_rate": 1 / 50,
    "seed": 0
}
evaluation_env_params = training_env_params

Observation Parameters

In [None]:
obs_params = {
    "use_fast_tree_observation": True,
    "observation_tree_depth": 2,
    "observation_radius": 10,
    "observation_max_path_depth": 30
}

Run Training

In [None]:
print("\nTraining parameters:")
pprint(training_params)

print("\nTraining environment parameters:")
pprint(training_env_params)

print("\nEvaluation environment parameters:")
pprint(evaluation_env_params)

print("\nObservation parameters:")
pprint(obs_params)

os.environ["OMP_NUM_THREADS"] = str(training_params['num_threads'])

train_agent(policy_class,
            Namespace(**training_params),
            Namespace(**training_env_params),
            Namespace(**evaluation_env_params),
            Namespace(**obs_params))

Submit Agent
---

In [None]:
!aicrowd login --api-key $API_KEY
!aicrowd submission create -c flatland-workshop -f "runs/2021-04-12_18-46-22/ckpts/ckpt-500.pth"