In [None]:

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
def plot_training_scores(file_name, title, lim):
    scores = np.load(file_name)
    means = []
    for i in range(100,scores.shape[0]):
        means.append((i,np.mean(scores[i-100:i])))
    means = np.array(means)
    plt.figure(figsize=(12,6))
    plt.rc('font', size=20)          # controls default text sizes
    plt.rc('axes', titlesize=20)     # fontsize of the axes title
    plt.rc('axes', labelsize=20)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=20)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=20)    # fontsize of the tick labels
    plt.rc('legend', fontsize=20)    # legend fontsize
    plt.rc('figure', titlesize=20)  # fontsize of the figure title
    plt.xlim(0,lim)
    plt.ylim(-5,25)
    plt.plot(scores)
    plt.plot(means[:,0],means[:,1],'r')
    plt.grid()
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.title(title)


In [None]:
plot_training_scores('DQN_scores.npy','DQN',1000)
plot_training_scores('Double_DQN_scores.npy','DDQN',1500)
plot_training_scores('Dueling_DDQN_scores.npy','Dueling-DDQN',1000)


In [None]:
from agent import Agent
import torch
import time
from unityagents import UnityEnvironment

env = UnityEnvironment(file_name="Banana.exe")

test_alg='double_deep_Q_learning'
test_network_path='Double_DQN_checkpoint.pth'

# test_alg='dueling_deep_Q_learning'
# test_network_path='Dueling_DDQN_checkpoint.pth'

# test_alg='deep_Q_learning'
# test_network_path='DQN_checkpoint.pth'

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

agent = Agent(state_size=37, action_size=4, seed=0, learning_alg=test_alg)
agent.qnetwork_local.load_state_dict(torch.load(test_network_path))


In [None]:
n_episodes = 10
max_t =100000

eps = 0.

for i_episode in range(1, n_episodes+1):
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]
    score = 0
    for t in range(max_t):
        action = agent.act(state, eps)
        env_info = env.step(action)[brain_name]
        next_state, reward, done = env_info.vector_observations[0], \
                                   env_info.rewards[0], \
                                   1. if env_info.local_done[0] == True else 0.
        state = next_state
        score += reward
        print('\rEpisode {}\tScore: {:.2f}'.format(i_episode, score), end="")
        time.sleep(0.05)  
        if done == 1.:
            break

env.close()   
