# Navigation

In [1]:
# Import all dependenciees
from agent import Agent
from collections import deque
import numpy as np

import torch
import matplotlib.pyplot as plt
%matplotlib inline

from unityagents import UnityEnvironment

In [2]:
# Load the Unity environment
env = UnityEnvironment(file_name="/Users/alonso/Desktop/DRL_Course/code/drl_navigation/Banana.app")

# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
seed = 42
state_size = brain.vector_observation_space_size
action_size = brain.vector_action_space_size

Mono path[0] = '/Users/alonso/Desktop/DRL_Course/code/drl_navigation/Banana.app/Contents/Resources/Data/Managed'
Mono config path = '/Users/alonso/Desktop/DRL_Course/code/drl_navigation/Banana.app/Contents/MonoBleedingEdge/etc'


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [None]:
# Initialize an agent
agent = Agent(state_size, action_size, seed)

# Set the learning parameters
num_episodes = 1000
#num_steps_per_episode = 10000
eps_start = 1.0
eps_end = 0.01 
eps_decay = 0.995

# Train the agent
scores = []
scores_window = deque(maxlen=100)
eps = eps_start

for i in range(1,num_episodes+1):
    # Reset the environment and get the initial state
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]  
    # Initialize the score
    score = 0                                         

    # Take steps
    #for t in range(num_steps_per_episode):
    while True:
        # Get an action given the current state
        action = agent.act(state)
        # Send the action to the environment
        env_info = env.step(action)[brain_name] 
        # Get the resulting state and reward
        reward = env_info.rewards[0]
        next_state = env_info.vector_observations[0]
        done = env_info.local_done[0]
        # Update score for this episode
        score += reward
        
        # Take a learning step if we have enough data, 
        # otherwise just store this experience in our 
        # experience memory.
        agent.step(state, action, reward, next_state, done)
        
        # Update state
        state = next_state    
        
        if done:
            break
    
    # Update epsilon
    eps = max(eps_end, eps_decay*eps)
    # Update scores
    scores.append(score)
    scores_window.append(score)
    
    # Print progress
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i, np.mean(scores_window)), end="")
    if i % 100 == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i, np.mean(scores_window)))
    

# Save the model
torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
np.savetxt('scores.csv', scores, delimiter=',')    

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [3]:
# Test trained agent 
test_agent = Agent(state_size, action_size, seed)
test_agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))
num_test_runs=10
                                      
# Take steps
#for t in range(num_steps_per_episode):
for i in range(1, num_test_runs+1):
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]
    score = 0
    while True:
        # Get an action given the current state
        action = test_agent.act(state)
        # Send the action to the environment
        env_info = env.step(action)[brain_name]
        # Get the resulting state and reward
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        state = env_info.vector_observations[0]
        score += reward

        if done:
            break
    print("Test episode {} Score: {}".format(i, score))
    

Test episode 1 Score: 20.0
Test episode 2 Score: 13.0
Test episode 3 Score: 20.0
Test episode 4 Score: 12.0
Test episode 5 Score: 20.0
Test episode 6 Score: 17.0
Test episode 7 Score: 4.0
Test episode 8 Score: 23.0
Test episode 9 Score: 24.0
Test episode 10 Score: 16.0


In [4]:
# Close the environment
env.close()