# Navigation
---

### 1. Start the Environment

If the code cell below returns an error, please revisit the project instructions to double-check that you have installed [Unity ML-Agents](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md)

In [None]:
from unityagents import UnityEnvironment
from collections import defaultdict, deque

import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
%matplotlib inline

### 1.1 Starting the Unity environment

In [None]:
env = UnityEnvironment(file_name="env/Banana.app")

### 2. Training the agents

The simulation contains a single agent that navigates a large environment.  At each time step, it has four actions at its disposal:
- `0` - walk forward 
- `1` - walk backward
- `2` - turn left
- `3` - turn right

The state space has `37` dimensions and contains the agent's velocity, along with ray-based perception of objects around agent's forward direction.  A reward of `+1` is provided for collecting a yellow banana, and a reward of `-1` is provided for collecting a blue banana. 

Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. We'll use by default the first available brain to be controlled through Python.

In [None]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size

# examine the state space 
state = env_info.vector_observations[0]
state_size = len(env_info.vector_observations[0])

##### Deep Q-Learning

In [None]:
def step_env(action):
    
    env_info = env.step(action)[brain_name]       # sends the action for the environment
    next_state = env_info.vector_observations[0]  # get the next state
    reward =  env_info.rewards[0]                 # get the reward for the sent action
    done = env_info.local_done[0]                 # check if episode has finished
    
    return next_state, reward, done, env_info
    

def dqn(agent, n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    
    """
    Run the agent in a given environment following the Deep Q-Learning algorithm and returns the scores.
    
    Args:
        agent (DQNAgent): agent to be trained using this Deep Q-Learning algorithm
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
        filename (string) a file name to store the parameters of the model
    """
    
    scores = []                        # list with the scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    scores_window_mean =  []           # the mean of the last 100 episodes
    scores_window_std  =  []           # std of the last 100 episodes
    epsilon = eps_start                # initialize epsilon
    
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=True)[brain_name] # reset the environment
        state = env_info.vector_observations[0]           # get the current state
        score = 0                                         # initialize the score
        for t in range(max_t):
            action = agent.act(state, epsilon)            # selects an action
            next_state, reward, done, _ = step_env(action)# sends the action and obtain next_state, reward, 
                                                          # done, and info
            
            agent.step(state, action, reward, next_state, done) # execute the action
            
            state = next_state # update the current state
            score += reward    # update the score
            
            if done:
                break
            
        scores_window.append(score)                       # save most recent score
        scores.append(score)                              # save most recent score
        w_score_mean = np.mean(scores_window)
        scores_window_mean.append(w_score_mean)
        scores_window_std.append(np.std(scores_window))
        
        epsilon = max(eps_end, eps_decay * epsilon) # decrease epsilon
        
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, w_score_mean), end="")
        
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, w_score_mean))     
        
        if w_score_mean >= 13.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode, w_score_mean))
            
            filename = "checkpoints/{}_checkpoint.pth".format(type(agent).__name__)
            torch.save(agent.qnetwork_local.state_dict(), filename)
            break
            
    return (scores, scores_window_mean, scores_window_std)

In [None]:
def plot_scores(scores, rolling_mean, rolling_std, title, filename=''):
    
    """Plot the scores and including the moving average"""
    
    fig = plt.figure(figsize=(8,4))
    ax = fig.add_subplot(111)
    
    ax.axhline(y=13., xmin=0.0, xmax=1.0, color='r', linestyle='--', alpha=0.9, label="Score threshold baseline")
    
    plt.plot(np.arange(len(scores)), scores);
    plt.plot(np.arange(len(rolling_mean)), rolling_mean,label='Moving average');
    plt.fill_between(np.arange(len(rolling_mean)), rolling_mean + rolling_std, rolling_mean - rolling_std, 
                     facecolor='green', alpha=0.4, label="Std of the moving average");
    plt.ylabel("Score"); plt.xlabel("Episode #"); plt.title(title);
    ax.legend(loc='upper left')
    
    if filename != '':
        fig.savefig(filename, format='pdf')

#### 2.1 Training a DQN agent

In [None]:
from dqn.dqn_agent import DQNAgent

agent = DQNAgent(state_size, action_size, seed=42)
scores_dqn, scores_window_mean_dqn, scores_window_std_dqn = dqn(agent)

In [None]:
plot_scores(scores_dqn, np.array(scores_window_mean_dqn), np.array(scores_window_std_dqn), 
            title='DQN', filename='report/figures/dqn_score.pdf');

#### 2.2 Training a  DDQN agent

In [None]:
from dqn.dqn_agent import DDQNAgent

ddqn_agent = DDQNAgent(state_size, action_size, seed=42)
scores_ddqn, scores_window_mean_ddqn, scores_window_std_ddqn = dqn(ddqn_agent)

In [None]:
plot_scores(scores_ddqn, 
            np.array(scores_window_mean_ddqn), 
            np.array(scores_window_std_ddqn), 
           title='DDQN', filename='report/figures/ddqn_score.pdf');

#### Close the environment

In [None]:
env.close()