# Play with Smart Agent

In this notebook, we play the game with an agent trained by a Double-DQN.

In [7]:
from unityagents import UnityEnvironment
import numpy as np
import random
import torch
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
env = UnityEnvironment(file_name="Banana.app", worker_id=21, seed=1)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [5]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

Examine environment.

In [6]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

Number of agents: 1
Number of actions: 4
States look like: [0.         1.         0.         0.         0.27946243 0.
 1.         0.         0.         0.74556106 0.         0.
 1.         0.         0.48277503 0.         1.         0.
 0.         0.30341193 0.         0.         1.         0.
 0.46494457 0.         0.         1.         0.         0.08466676
 0.         1.         0.         0.         0.95967758 0.
 0.        ]
States have length: 37


In [8]:
env_info = env.reset(train_mode=True)[brain_name]

In [11]:
from double_dqn_agent import Agent
from model import QNetwork

# load saved network
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
network =  QNetwork(state_size, action_size, 0).to(device)

network.load_state_dict(torch.load('double_agent_checkpoint.pth'))

smart_agent = Agent(state_size=state_size, action_size=action_size, seed=0, network=network)

yes


In [12]:
# function to play game

def playGame(agent, n_episodes=100, max_t=1000, eps_start=.01, eps_end=0.001, eps_decay=0.995):
    eps = eps_start 
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=False)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            env_info = env.step(action)[brain_name]
            
            next_state = env_info.vector_observations[0]   # get the next state
            reward = env_info.rewards[0]                   # get the reward
            done = env_info.local_done[0]     
    
            # next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            
            if done:
                break 
        print("Score at episode %s : %s" % (i_episode, score))

In [14]:
playGame(smart_agent, n_episodes=100)