# Navigation

---

In this notebook, we will visualize how the agent performs whilst picking up bananas

### 1. Start the Environment

In [1]:
import matplotlib
%matplotlib inline

from unityagents import UnityEnvironment
import numpy as np

In [2]:
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64")

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
print("Num brains: ", len(env.brain_names))
print("Brain name: ", brain_name)

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
state_size = len(state)
print('States have length:', state_size)

Num brains:  1
Brain name:  BananaBrain
Number of agents: 1
Number of actions: 4
States have length: 37


In [4]:
import time
from dqn_agent import Agent

print ("Waiting a few seconds for the Unity environment to initialize...")
time.sleep(10)

checkpoint_file = 'checkpoint.pth'
agent = Agent(state_size=state_size, action_size=brain.vector_action_space_size, seed=0)
print ("Loading agent parameters from checkpoint: ", checkpoint_file)
agent.load(checkpoint_file)

DEVICE being used ==> cuda:0
Waiting a few seconds for the Unity environment to initialize...
QNetwork(
  (fc1): Linear(in_features=37, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=4, bias=True)
)
Loading agent parameters from checkpoint:  checkpoint.pth


In [6]:
print ("### '.' implies a yellow banana and 'x' indicates a blue banana.")
import time
env_info = env.reset(train_mode=False)[brain_name] # reset the environment
state = env_info.vector_observations[0]            # get the current state
score = 0                                          # initialize the score
while True:
    action = agent.act(state)                      # select an action
    env_info = env.step(action)[brain_name]        # send the action to the environment
    next_state = env_info.vector_observations[0]   # get the next state
    reward = env_info.rewards[0]                   # get the reward
    done = env_info.local_done[0]                  # see if episode has finished
    score += reward                                # update the score
    state = next_state                             # roll over the state to next time step
    if reward > 0:
        print (".", end="")
    elif reward < 0:
        print ("x", end="")
    if done:                                       # exit loop if episode finished
        break
    time.sleep(0.25)
    
print()
print("Score: {}".format(score))

..........x.....
Score: 14.0


In [7]:
env.close()