## Load the environment

---

In [1]:
from unityagents import UnityEnvironment
import numpy as np
from ppo_controller import PPOController
from ddpg_controller import DDPGController
from nstep_ddpg_controller import NStepDDPGController
import matplotlib.pyplot as plt
%matplotlib inline

Next, we will start the environment!  **_Before running the code cell below_**, change the `file_name` parameter to match the location of the Unity environment that you downloaded.

- **Mac**: `"path/to/Reacher.app"`
- **Windows** (x86): `"path/to/Reacher_Windows_x86/Reacher.exe"`
- **Windows** (x86_64): `"path/to/Reacher_Windows_x86_64/Reacher.exe"`
- **Linux** (x86): `"path/to/Reacher_Linux/Reacher.x86"`
- **Linux** (x86_64): `"path/to/Reacher_Linux/Reacher.x86_64"`
- **Linux** (x86, headless): `"path/to/Reacher_Linux_NoVis/Reacher.x86"`
- **Linux** (x86_64, headless): `"path/to/Reacher_Linux_NoVis/Reacher.x86_64"`

For instance, if you are using a Mac, then you downloaded `Reacher.app`.  If this file is in the same folder as the notebook, then the line below should appear as follows:
```
env = UnityEnvironment(file_name="Reacher.app")
```

In [2]:
env = UnityEnvironment(file_name='../Reacher_20/Reacher.app')
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
def rolling_avg_scores(scores, window):
    if len(scores) <= window:
        return [np.mean(scores)]
    else:
        return [np.mean(scores[x:x+window]) for x in range(len(scores) - window + 1)]

def plot_results(series, names):
    for serie, name in zip(series, names):
        plt.plot(np.arange(len(serie)), serie)
        plt.plot(np.arange(19, len(serie)), rolling_avg_scores(serie, 20))
        plt.ylabel(name)
        plt.xlabel('Episode #')
        plt.show()

## PPO agent

---

In [None]:
class Config:
    def __init__(self):
        self.num_episodes = 1200
        self.std = 0.5
        self.epsilon_start = 0.1
        self.epsilon_decay = 1
        self.learning_rate = 5e-4
        self.train_iterations = 4
        self.gamma = 0.99
        self.mlp_specs = (200, 150, 150)
        self.update_every = 200
        self.max_memory = 200
        self.tau = 1e-3
        
    def as_dict(self):
        return self.__dict__
    

controller = PPOController(env, brain_name, Config())
ppo_scores, surrogates = controller.solve()
plot_results(ppo_scores, surrogates)

Episode 1087/1200 | Average Score: 24.91 | Model surrogate: 0.42672   

## DDPG agent

---

In [None]:
class Config:
    def __init__(self):
        self.num_episodes = 100
        self.batch_size = 64
        self.learning_rate = 5e-4
        self.gamma = 0.99
        self.mlp_specs = (200, 150)
        self.update_every = 4
        self.max_memory = 1e5
        self.tau = 1e-3
        
    def as_dict(self):
        return self.__dict__
    

controller = DDPGController(env, brain_name, Config())
ddpg_scores, surrogates, critic_losses = controller.solve()
plot_results((ddpg_scores, surrogates, critic_losses), ('Score', 'Surrogate', 'Critic loss'))

## DDPG agent with n_step

---

In [None]:
class Config:
    def __init__(self):
        self.num_episodes = 50
        self.batch_size = 64
        self.n_step = 3
        self.learning_rate = 5e-4
        self.gamma = 0.99
        self.mlp_specs = (200, 150)
        self.update_every = 4
        self.max_memory = 1e5
        self.tau = 1e-3
        
    def as_dict(self):
        return self.__dict__
    

controller = NStepDDPGController(env, brain_name, Config())
nstep_ddpg_scores, surrogates, critic_losses = controller.solve()
plot_results((nstep_ddpg_scores, surrogates, critic_losses), ('Score', 'Surrogate', 'Critic loss'))

## Comparing results

---

In [None]:
fig, ax = plt.subplots(figsize=(12,8))
plt.plot(range(1200), ppo_scores, label='PPO')
plt.plot(range(100), ddpg_scores, label='DDPG')
plt.plot(range(100), nstep_ddpg_scores, label='N-step DDPG')
plt.plot(1200, np.ones(1200) * 30, label='Win threshold')
plt.ylabel('Score')
plt.xlabel('Episode #')
ax.legend()
plt.show()