# Navigation Train

In this notebook we train a DQN Agent to learn to pick yellow bananas from a Unity Environment.

## 1. Import all dependencies

In [2]:
from agent import Agent
from collections import deque
import numpy as np
import torch
import matplotlib.pyplot as plt
%matplotlib inline
from unityagents import UnityEnvironment

## 2. Training function

This function returns a trained agent after observing N episodes.

In [3]:
def dqn_train(env, num_episodes=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    
    agent = Agent(state_size, action_size, seed)
    scores = []
    scores_window = deque(maxlen=100)
    eps = eps_start

    for i in range(1,num_episodes+1):
        # Reset the environment and get the initial state
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]  
        # Initialize the score
        score = 0                                         

        # Take steps
        #for t in range(num_steps_per_episode):
        while True:
            # Get an action given the current state
            action = agent.act(state)
            # Send the action to the environment
            env_info = env.step(action)[brain_name] 
            # Get the resulting state and reward
            reward = env_info.rewards[0]
            next_state = env_info.vector_observations[0]
            done = env_info.local_done[0]
            # Update score for this episode
            score += reward

            # Take a learning step if we have enough data, 
            # otherwise just store this experience in our 
            # experience memory.
            agent.step(state, action, reward, next_state, done)

            # Update state
            state = next_state    

            if done:
                break

        # Update epsilon
        eps = max(eps_end, eps_decay*eps)
        # Update scores
        scores.append(score)
        scores_window.append(score)

        # Print progress
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i, np.mean(scores_window)), end="")
        if i % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i, np.mean(scores_window)))

    return agent, scores

## 3. Load the Unity Environment

In [4]:
env = UnityEnvironment(file_name="environment/Banana.app")

# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
seed = 42
state_size = brain.vector_observation_space_size
action_size = brain.vector_action_space_size

Mono path[0] = '/Users/alonso/Desktop/DRL_Course/code/drl_navigation/environment/Banana.app/Contents/Resources/Data/Managed'
Mono config path = '/Users/alonso/Desktop/DRL_Course/code/drl_navigation/environment/Banana.app/Contents/MonoBleedingEdge/etc'


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


## 4. Train the agent

After training we plot the scores obtained across all episodes and save the trained model and scores to disk.

In [5]:
# Train the agent
agent, scores = dqn_train(env)

# Close the environment
env.close()

# Save the model
torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
np.savetxt('scores.csv', scores, delimiter=',')    

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 52	Average Score: -0.10

KeyboardInterrupt: 