# Banana collector

---

In this notebook, we will train DQN agent to pick up yellow bananas while avoiding blue ones in Unity ML-Agents environment.

### 1. Start the Environment

We begin by importing some necessary packages.  If the code cell below returns an error, please revisit the project instructions to double-check that you have installed necessary packages.

In [None]:
import sys
sys.path.append('../')

from unityagents import UnityEnvironment
#from agents import *
from collections import deque

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import config 
import pprint
import torch

In [None]:
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]


### 2. Configure  and create instance of DQN agent

DQN agent's hyperparameters are saved and loaded from config.py file. Current values are result of coarse hyperparameter tuing. Feel free to try different hyperparameter values. 

In [None]:
from agents.ddqn_agent import DDQN

# Load parameters from file
hparams = config.HYPERPARAMS['DDQN']
params = config.TRAINPARAMS['BananaCollector']

# Create agent instance
agent = DDQN(hparams)
print("Created agent with following hyperparameter values:")
pprint.pprint(hparams)

### 3. Train DQN agent!

In [None]:
# Maximum number of training episodes
n_episodes = params['n_episodes']
# Initialize epsilon
epsilon = params['epsilon_start']
# List containing scores from each episode
scores = []
# Store last 100 scores
scores_window = deque(maxlen=params['scores_window_size'])

# Train loop
for i_episode in range(1, n_episodes+1):
    # Reset environment
    env_info = env.reset(train_mode=True)[brain_name]

    # Observe current state
    state = env_info.vector_observations[0]

    # Reset score and done flag
    score = 0
    done = False

    # Loop each episode
    while not done:

        # Select action with e-greedy policy
        action = agent.act(state, epsilon)

        # Take action
        env_info = env.step(action)[brain_name]

        # Observe the next state
        next_state = env_info.vector_observations[0]

        # Get the reward
        reward = env_info.rewards[0]

        # Check if episode is finished
        done = env_info.local_done[0]

        # Store experience
        agent.step(state, action, reward, next_state, done)

        # State transition
        state = next_state

        # Update total score
        score += reward

    # Save most recent score
    scores_window.append(score)
    scores.append([score, np.mean(scores_window)])

    # Decay epsilon
    epsilon = max(params['epsilon_final'], params['epsilon_decay']*epsilon)

    # Print learning progress
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
    if i_episode % params['scores_window_size'] == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
    if np.mean(scores_window)>=params['solve_score']:
        print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
        # Filename string
        filename = "{:s}_lr{:.1E}_batch{:d}_model{:d}x{:d}_solved{:d}"
        filename = filename.format(hparams['name'], hparams['learning_rate'], hparams['batch_size'], hparams['fc1_units'], hparams['fc2_units'], i_episode-100)
        torch.save(agent.qnetwork_local.state_dict(), 'models/{:s}.pth'.format(filename))
        break


### 4. Plot score

In [None]:
# Save score
df = pd.DataFrame(scores,columns=['scores','average_scores'])
df.to_csv('scores/{:s}.csv'.format(filename))

# Plot scores
plt.figure(figsize=(10,5))
plt.axhline(13, color='red', lw=1, alpha=0.3)
plt.plot( df.index, 'scores', data=df, color='lime', lw=1, label="score", alpha=0.4)
plt.plot( df.index, 'average_scores', data=df, color='green', lw=2, label="average score")
# Set labels and legends
plt.xlabel('Episode')
plt.xlim(0, len(df.index))
plt.xticks(50*np.arange(int(len(df.index)/50+1)))
plt.ylabel('Score')
plt.yticks(3*np.arange(8))
plt.title('DQN agent')
plt.grid(True, alpha=0.3, linestyle='--')
plt.legend()
# Save figure
plt.savefig('graphs/{:s}.png'.format(filename), bbox_inches='tight')
plt.show()

In [None]:
# Close the environment
env.close()