In [None]:
from unityagents import UnityEnvironment
import numpy as np

In [None]:
env = UnityEnvironment(file_name = "../../Desktop/Banana_Windows_x86_64/Banana.exe")

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)
print ('Vector Obeservation:', env_info.vector_observations)

## Taking random actions

In [None]:
env_info = env.reset(train_mode=False)[brain_name] 
state = env_info.vector_observations[0]           
score = 0                                       
while True:
    action = np.random.randint(action_size)     
    env_info = env.step(action)[brain_name]        
    next_state = env_info.vector_observations[0] 
    reward = env_info.rewards[0]             
    done = env_info.local_done[0]                  
    score += reward                                
    state = next_state                            
    if done:                                       
        break
    
print("Score:", score)

## Training the agent with DQN

In [None]:
import torch
from collections import deque
import matplotlib.pyplot as plt
from agent import Agent

n_episodes = 3000
max_t = 1000
eps_start = 1.0
eps_end = 0.01
eps_decay = 0.999
            
from collections import deque
import matplotlib.pyplot as plt
from dqn_agent import Agent

# HYPERPARAMETERS
n_episodes = 4000
max_t = 1000
eps_start = 1.0
eps_end = 0.01
eps_decay = 0.999

agent = Agent(state_size = 37, action_size = 4, seed = 0)
scores = []                        
scores_window = deque(maxlen=50)   
eps = eps_start 

# from utils import keep_awake
# for j in keep_awake(range(n_episodes)):

for i_episode in range(n_episodes):
    env_info = env.reset(train_mode= True)[brain_name]
    state = env_info.vector_observations[0]
    score = 0
    for t in range(max_t):
        action = agent.act(state, eps)
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations[0]   
        reward = env_info.rewards[0]                  
        done = env_info.local_done[0]

        agent.step(state, action, reward, next_state, done)
        state = next_state
        score += reward
        if done:
            break 

    scores_window.append(score) 
    scores.append(score)
    eps = max(eps_end, eps_decay*eps) # decrease epsilon
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")

    if i_episode % 100 == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')

    if np.mean(scores_window) >= 13.0:
        print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
        torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
        break

### Plot the scores

In [None]:
# plot the scores
score = scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(score)), score)
plt.ylabel('Score')
plt.xlabel('Episode Number')
plt.show()

## Watch a trained agent

In [None]:
from dqn_agent import Agent
agent = Agent(state_size = 37, action_size = 4, seed = 0)
agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

for i in range(1000000):
    env_info = env.reset(train_mode= False)[brain_name]
    state = env_info.vector_observations[0]
    for j in range(200):
        score = 0
        action = agent.act(state)
        env_info = env.step(int(action))[brain_name]
        next_state = env_info.vector_observations[0]   
        reward = env_info.rewards[0]                   
        done = env_info.local_done[0]

        agent.step(state, action, reward, next_state, done)
        state = next_state
        score += reward
        if done:
            break

print ("Score: ", score)

In [None]:
env.close()