Google Colab Setup
---

Make sure to select GPU in Runtime > Change runtime type > Hardware accelerator

In [None]:
#@title << Run this to check your runtime is correct {display-mode: "form"}
!nvidia-smi | grep -q 'failed' && echo "STOP! You are using a runtime without a GPU. Change the runtime type before going further!"

In [None]:
#@title << Setup Google Colab by running this cell {display-mode: "form"}
import sys
if 'google.colab' in sys.modules:
    # Clone GitHub repository
    !git clone --single-branch --branch evaluation_setup https://github.com/pacm/rl-workshop.git
        
    # Copy files required to run the code
    !cp -r "rl-workshop/agents" "rl-workshop/env" "rl-workshop/rl_helpers" .
    
    # Install packages via pip
    !pip install -r "rl-workshop/colab-requirements.txt"
    
    # Restart Runtime
    import os
    os.kill(os.getpid(), 9)

Prioritized Experience Replay
---

In [None]:
import datetime
import os
import numpy as np

from agents.dqn import DQNAgent, ConvQNetworkFactory, ConvQNetwork
from agents.curiosity import CuriosityDQNAgent
from agents.random import RandomAgent
from agents.logging import TensorBoardLogger, NoLogger
from agents.per import PERAgent
from env.env import WindowedGridView, DeliveryDrones
from rl_helpers.rl_helpers import MultiAgentTrainer, test_agents, plot_cumulative_rewards, plot_rolling_rewards, render_video

In [None]:
env = WindowedGridView(DeliveryDrones(), radius=3)

# These are the default parameters used for evaluation
env.env_params.update({
    'charge': 20,
    'charge_reward': -0.1,
    'crash_reward': -1,
    'delivery_reward': 1,
    'discharge': 10,
    'drone_density': 0.05,
    'dropzones_factor': 2,
    'n_drones': 10,
    'packets_factor': 3,
    'pickup_reward': 0,
    'rgb_render_rescale': 1.0,
    'skyscrapers_factor': 3,
    'stations_factor': 2
})

In [None]:
# Create 2 DQN agents
dqn_agent_1 = DQNAgent(
    env, ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
    ], dense_layers=[256]),
    gamma=0.95, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64, 
    target_update_interval=500)

dqn_agent_2 = DQNAgent(
    env, ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1}
    ], dense_layers=[64, 64]),
    gamma=0.95, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64, 
    target_update_interval=500)

In [None]:
# Create 1 DQN agent with Prioritized Experience Replay
from agents.logging import NoLogger
per_logger = NoLogger

conv_factory = ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
    ], dense_layers=[256])

per_agent_1 = PERAgent(env, 
                       conv_factory, 
                       gamma=0.95, 
                       epsilon_start=1.0, 
                       epsilon_decay=0.99,
                       epsilon_end=0.01, 
                       memory_size=10000, 
                       batch_size=64, 
                       target_update_interval=500, 
                       alpha=0.6, 
                       beta=0.4, 
                       logger=per_logger)

In [None]:
# Reset environment with those parameters
env.reset()

# Setup random opponents
agents = {drone.index: RandomAgent(env) for drone in env.drones}

# Add the RL drones
agents[0] = dqn_agent_1
agents[1] = dqn_agent_2
agents[2] = per_agent_1

# Create trainer
trainer = MultiAgentTrainer(env, agents, reset_agents=True, seed=0)

In [None]:
trainer.train(1000)
rewards = plot_rolling_rewards(trainer.rewards_log, drones_labels={0: 'DQN1', 1: 'DQN2', 2: 'PER'})

In [None]:
# Evaluation
rewards_log = test_agents(env, agents, n_steps=1000)
plot_cumulative_rewards(rewards_log, drones_labels={0: 'DQN1', 1: 'DQN2', 2: 'PER'})

# Print final evaluation scores
for idx, score in enumerate(np.sum(list(rewards_log.values()), axis=1)):
    print("Agent {}: {}".format(idx, score))

In [None]:
# Save video if you want
path = os.path.join('videos', 'prioritized.mp4')
#render_video(env, agents, path, n_steps=60, fps=1, seed=None)

In [None]:
# Save the agent (you can ignore the warnings)
per_agent_1.save('per-agent-0.pt')

**Experiment a bit then submit to AIcrowd :D**

> https://www.aicrowd.com/challenges/droneracer