In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

from collections import deque

from ignite.engine import Engine, Events
from torch.utils.tensorboard import SummaryWriter
import datetime

from unityagents import UnityEnvironment

In [2]:
from dqn_per_dueling_agent import Agent

seed = 0
timesteps = list(range(10000))

# env = gym.make('LunarLander-v2')
# env.seed(seed)
env = UnityEnvironment(file_name="../Banana_Windows_x86_64/Banana.exe", no_graphics=True)
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

action_size = brain.vector_action_space_size
env_info = env.reset(train_mode=True)[brain_name]
state_size = len(env_info.vector_observations[0])

torch.manual_seed(seed)

eps_start = 1.
eps_end = 0.01
eps_decay = 0.9  # 309 ep # 0.95 prev, 322

now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# writer = SummaryWriter("logs/unity-dueling/{}".format(now))

agent = Agent(state_size=state_size, action_size=action_size, seed=0) #, writer=writer)

scores = []
mean_scores = [] # for logging porpoises

cuda


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
EPISODE_STARTED = Events.EPOCH_STARTED
EPISODE_COMPLETED = Events.EPOCH_COMPLETED

def run_single_timestep(engine, timestep):
    eps = engine.state.eps
    state = engine.state.current_state
    #  (np.int32 because https://github.com/xkiwilabs/DQN-using-PyTorch-and-ML-Agents/issues/2)
    action = agent.act(state, eps).astype(np.int32)
    
#     next_state, reward, done, _ = env.step(action)
    env_info = env.step(action)[brain_name]
    next_state = env_info.vector_observations[0]   # get the next state
    reward = env_info.rewards[0]                   # get the reward
    done = env_info.local_done[0]
    
    agent.step(state, action, reward, next_state, done)
    
    engine.state.current_state = next_state
    engine.state.score += reward
    
    if done:
        engine.terminate_epoch()
        engine.state.timestep = timestep

trainer = Engine(run_single_timestep)

@trainer.on(Events.STARTED)
def initialize(engine):
    # lists containing scores from each episode
    engine.state.scores = []                        
    engine.state.scores_window = deque(maxlen=100)
    engine.state.eps = eps_start


@trainer.on(EPISODE_STARTED)
def reset_environment_state(engine):
#     engine.state.current_state = env.reset()
    env_info = env.reset(train_mode=False)[brain_name]
    engine.state.current_state = env_info.vector_observations[0]
    engine.state.score = 0

@trainer.on(EPISODE_COMPLETED)
def update_model(engine):
    engine.state.eps = max(eps_end, eps_decay*engine.state.eps) # decrease epsilon
    
    score = engine.state.score
    engine.state.scores.append(score)
    engine.state.scores_window.append(score)

    scores.append(score)
    mean_scores.append(np.mean(engine.state.scores_window))
    
# @trainer.on(EPISODE_COMPLETED(every=10))
# def log_episode_to_tensorboard(engine):
#     i = engine.state.epoch
# #     writer.add_scalar('running reward', engine.state.running_reward, i_episode)
#     writer.add_scalar('Average episode score', np.mean(engine.state.scores_window), i)
#     writer.add_scalar('Average environment score', np.mean(engine.state.scores), i)

@trainer.on(EPISODE_COMPLETED)
def should_finish_training(engine):
    if np.mean(engine.state.scores_window)>=13.0:
        print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(engine.state.epoch, np.mean(engine.state.scores_window)))
        torch.save(agent.qnetwork_local.state_dict(), 'checkpoint-per.pth')
        np.save('mean_scores.npy', np.array(mean_scores))
        np.save('scores.npy', np.array(scores))
        engine.should_terminate = True

In [4]:
trainer.run(timesteps, max_epochs=2000)

INFO:ignite.engine.engine.Engine:Engine run starting with max_epochs=2000.
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[1] Complete. Time taken: 00:00:56
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[2] Complete. Time taken: 00:00:56
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[3] Complete. Time taken: 00:00:56
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[4] Complete. Time taken: 00:00:56
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled.

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[39] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[40] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[41] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[42] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.

INFO:ignite.engine.engine.Engine:Epoch[77] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[78] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[79] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[80] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[81] Complete. Time taken: 00:00:32
INFO:ignite.engine.engine.Engine:Terminate current epoch is signal

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[116] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[117] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[118] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[119] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finis

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[154] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[155] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[156] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[157] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finis

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[192] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[193] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[194] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[195] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finis

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[230] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[231] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[232] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[233] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finis

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[268] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[269] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[270] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[271] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finis

INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[306] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[307] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[308] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Terminate current epoch is signaled. Current epoch iteration will stop after current iteration is finished.
INFO:ignite.engine.engine.Engine:Epoch[309] Complete. Time taken: 00:00:31
INFO:ignite.engine.engine.Engine:Engine run complete. Time taken: 02:41:04



Environment solved in 309 episodes!	Average Score: 13.02


State:
	iteration: 92700
	epoch: 309
	epoch_length: 10000
	max_epochs: 2000
	output: <class 'NoneType'>
	batch: 299
	metrics: <class 'dict'>
	dataloader: <class 'list'>
	seed: <class 'NoneType'>
	times: <class 'dict'>
	scores: <class 'list'>
	scores_window: <class 'collections.deque'>
	eps: 0.01
	current_state: <class 'numpy.ndarray'>
	score: 18.0
	timestep: 299