Google Colab Setup
---

Make sure to select GPU in Runtime > Change runtime type > Hardware accelerator

In [None]:
#@title << Setup Google Colab by running this cell {display-mode: "form"}
import sys
if 'google.colab' in sys.modules:
    # Clone GitHub repository
    !git clone https://github.com/pacm/rl-workshop.git
        
    # Copy files required to run the code
    !cp -r "rl-workshop/agents" "rl-workshop/env" "rl-workshop/helpers" .
    
    # Install packages via pip
    !pip install -r "rl-workshop/colab-requirements.txt"
    
    # Restart Runtime
    import os
    os.kill(os.getpid(), 9)

PER and curiosity
---

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline
%run env/env.py
%run helpers/rl-helpers.py

In [None]:
import datetime
from agents.dqn import DQNAgent, ConvQNetworkFactory
from agents.random import RandomAgent
from agents.logging import TensorBoardLogger
from agents.curiosity import CuriosityDQNAgent

In [None]:
env = WindowedGridView(DeliveryDrones(), radius=3)
env.env_params.update({'n_drones': 11, 'pickup_reward': 1, 'discharge': 2, 'rgb_render_rescale': 2.0})

In [None]:
"""DQN with conv. Q-network"""
dqn_agent_1_logger = TensorBoardLogger(os.path.join('logs', 'dqn1'), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
dqn_agent_1 = DQNAgent(
    env, ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
    ], dense_layers=[256]),
    gamma=0.95, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64, 
    target_update_interval=500, logger=dqn_agent_1_logger)

dqn_agent_2_logger = TensorBoardLogger(os.path.join('logs', 'dqn2'), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
dqn_agent_2 = DQNAgent(
    env, ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1}
    ], dense_layers=[256]),
    gamma=0.95, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64, 
    target_update_interval=500, logger=dqn_agent_2_logger)

In [None]:
"""DQN with intrinsic curiosity module)"""
dqn_factory = ConvQNetworkFactory(env, conv_layers=[
        {'out_channels': 32, 'kernel_size': 3, 'stride': 2, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1},
    ], dense_layers=[256])

curiosity_agent_1_logger = TensorBoardLogger(os.path.join('logs', 'curiosity1'), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
curiosity_agent_1 = CuriosityDQNAgent(env, dqn_factory, gamma=0.95, epsilon_start=1, epsilon_decay=0.99, 
                                      epsilon_end=0.01, memory_size=10000, batch_size=64, 
                                      target_update_interval=500, eta=0.1,logger=curiosity_agent_1_logger)

In [None]:
# Reset environment with those parameters
env.reset()

# Setup random opponents
agents = {drone.index: RandomAgent(env) for drone in env.drones}
agents[0] = dqn_agent_1
agents[1] = dqn_agent_2
agents[2] = curiosity_agent_1

# Create trainer
trainer = MultiAgentTrainer(env, agents, reset_agents=True, seed=0)

In [None]:
for agent in agents.values():
    agent.is_greedy = False

trainer.train(10000)
plot_rolling_rewards(trainer.rewards_log)

In [None]:
for agent in agents.values():
    agent.is_greedy = True

rewards_log = test_agents(env, agents, n_steps=1000)
plot_cumulative_rewards(rewards_log)