# Nature DQN to Play Atari

Unofficial implementation of
- V. Minh, et. al, “Human-level control through deep reinforcement learning”, Nature 2015.

In [1]:
%matplotlib inline
import os
import sys
import time
import itertools
import logging

import numpy as np
np.random.seed(0)
import pandas as pd
import gym
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import matplotlib.pyplot as plt

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
        format='%(asctime)s [%(levelname)s] %(message)s')

2019-02-13 08:50:51,770 [DEBUG] Loaded backend module://ipykernel.pylab.backend_inline version unknown.


In [2]:
# env_spec_id = 'BreakoutDeterministic-v4'
# env_spec_id = 'PongDeterministic-v4'
# env_spec_id = 'SeaquestDeterministic-v4'
env_spec_id = 'SpaceInvadersDeterministic-v4'
# env_spec_id = 'BeamRiderDeterministic-v4'
env = gym.make(env_spec_id)
print('observation_space = {}'.format(env.observation_space))
print('action_space = {}'.format(env.action_space))
print(env.__dict__)
env.seed(0)

  result = entry_point.load(False)


observation_space = Box(210, 160, 3)
action_space = Discrete(6)
{'env': <gym.envs.atari.atari_env.AtariEnv object at 0x0000027E9B79A4E0>, 'action_space': Discrete(6), 'observation_space': Box(210, 160, 3), 'reward_range': (-inf, inf), 'metadata': {'render.modes': ['human', 'rgb_array']}, '_max_episode_seconds': None, '_max_episode_steps': 100000, '_elapsed_steps': 0, '_episode_started_at': None}


[0, 592379725]

In [3]:
class DQNReplayer:
    def __init__(self, capacity):
        self.memory = pd.DataFrame(index=range(capacity),
                columns=['observation', 'action', 'reward',
                'next_observation', 'done'])
        self.i = 0
        self.count = 0
        self.capacity = capacity
    
    def store(self, *args):
        self.memory.loc[self.i] = args
        self.i = (self.i + 1) % self.capacity
        self.count = min(self.count + 1, self.capacity)
        
    def sample(self, size):
        indices = np.random.choice(self.count, size=size)
        return tuple(np.stack(self.memory.loc[indices, field]) for \
                field in self.memory.columns)


class DQNAgent:
    def __init__(self, env, input_shape, learning_rate=0.00025,
            load_path=None, gamma=0.99,
            replay_memory_size=1000000, batch_size=32,
            replay_start_size=0,
            epsilon=1., epsilon_decrease_rate=9e-7, min_epsilon=0.1,
            random_inital_steps=0,
            clip_reward=True, rescale_state=True,
            update_freq=1, target_network_update_freq=1):
        
        self.action_n = env.action_space.n
        self.gamma = gamma
        
        self.replay_memory_size = replay_memory_size
        self.replay_start_size = replay_start_size
        self.batch_size = batch_size
        
        self.img_shape = (input_shape[-1], input_shape[-2])
        self.img_stack = input_shape[-3]
        
        self.epsilon = epsilon
        self.epsilon_decrease_rate = epsilon_decrease_rate
        self.min_epsilon = min_epsilon
        self.random_inital_steps = random_inital_steps
        
        self.clip_reward = clip_reward
        self.rescale_state = rescale_state
        
        self.update_freq = update_freq
        self.target_network_update_freq = target_network_update_freq
        
        self.replayer = DQNReplayer(replay_memory_size)
        
        self.step = 0
        self.fit_count = 0

        self.evaluate_net = self.build_network(
                input_shape=input_shape, output_size=self.action_n,
                conv_activation=tf.nn.relu,
                fc_hidden_sizes=[512,], fc_activation=tf.nn.relu,
                learning_rate=learning_rate, load_path=load_path)
        self.evaluate_net.summary() # print summary
        
        self.target_net = self.build_network(
                input_shape=input_shape, output_size=self.action_n,
                conv_activation=tf.nn.relu,
                fc_hidden_sizes=[512,], fc_activation=tf.nn.relu,
                )
        
        self.update_target_network()
        

    def build_network(self, input_shape, output_size, conv_activation,
            fc_hidden_sizes, fc_activation, output_activation=None,
            learning_rate=0.001, load_path=None):
        # network input: (samples, channels, rows, cols)
        model = keras.models.Sequential()
        # change from (channel, width, height) -> (width, height, channel)
        model.add(keras.layers.Permute((2, 3, 1), input_shape=input_shape))
        
        model.add(keras.layers.Conv2D(32, 8, strides=(4, 4),
                activation=conv_activation))
        model.add(keras.layers.Conv2D(64, 4, strides=(2, 2),
                activation=conv_activation))
        model.add(keras.layers.Conv2D(64, 3, strides=(1, 1),
                activation=conv_activation))
        
        model.add(keras.layers.Flatten())
        
        for hidden_size in fc_hidden_sizes:
            model.add(keras.layers.Dense(hidden_size,
                    activation=fc_activation))
        model.add(keras.layers.Dense(output_size,
                activation=output_activation))

        if load_path is not None:
            logging.info('Loading weights from file {}.'.format(load_path))
            model.load_weights(load_path)

        try: # tf2
            optimizer = keras.optimizers.RMSprop(learning_rate, 0.95,
                    momentum=0.95, epsilon=0.01)
        except: # tf1
            optimizer = tf.train.RMSPropOptimizer(learning_rate, 0.95,
                    momentum=0.95, epsilon=0.01)
        model.compile(loss=keras.losses.mse, optimizer=optimizer)
        return model
        
    def get_next_state(self, state=None, observation=None):
        img = Image.fromarray(observation, 'RGB') 
        img = img.resize(self.img_shape).convert('L') # To gray-scale and Resize
        img = np.asarray(img.getdata(), dtype=np.uint8
                ).reshape(img.size[1], img.size[0]) # Convert image to array
        
        if state is None:
            next_state = np.array([img,] * self.img_stack)
        else:
            next_state = np.append(state[1:], [img,], axis=0)
        return next_state
    
    def decide(self, state, test=False, step=None):
        if step is not None and step < self.random_inital_steps:
            epsilon = 1.
        elif test:
            epsilon = 0.05
        else:
            epsilon = self.epsilon
        if np.random.rand() < epsilon:
            action = np.random.choice(self.action_n)
        else:
            if self.rescale_state:
                state = state / 128. - 1.
            q_values = self.evaluate_net.predict(state[np.newaxis])[0]
            action = np.argmax(q_values)
        return action

    def learn(self, state, action, reward, next_state, done):
        self.replayer.store(state, action, reward, next_state, done)

        self.step += 1
        
        if self.step % self.update_freq == 0 and \
                self.replayer.count >= self.replay_start_size:

            states, actions, rewards, next_states, dones = \
                    self.replayer.sample(self.batch_size)

            if self.rescale_state:
                states = states / 128. - 1.
                next_states = next_states / 128. - 1.
            if self.clip_reward:
                rewards = np.clip(rewards, -1., 1.)
            
            next_qs = self.target_net.predict(next_states)
            next_max_qs = next_qs.max(axis=-1)
            targets = self.evaluate_net.predict(states)
            targets[range(self.batch_size), actions] = rewards + \
                    self.gamma * next_max_qs * (1. - dones)

            h = self.evaluate_net.fit(states, targets, verbose=0)
            self.fit_count += 1
            
            if self.fit_count % 100 == 0:
                logging.info(('fit-count : {}, epsilon = {}, ' +
                        'memory = {}, loss = {}').format(self.fit_count,
                        self.epsilon, self.replayer.count,
                        h.history['loss'][0]))
            
            if self.fit_count % self.target_network_update_freq == 0:
                self.update_target_network()
        
        # update_epsilon : Linear epsilon annealing
        # Decreases the probability of picking a random action,
        # to improve exploitation.
        if self.step >= self.replay_start_size:
            self.epsilon = max(self.epsilon - self.epsilon_decrease_rate,
                               self.min_epsilon)

    def update_target_network(self):
        logging.info('target network updated.')
        self.target_net.set_weights(self.evaluate_net.get_weights())

    def save_network(self, path):
        dirname = os.path.dirname(save_path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
            logging.info('create directory = {}'.format(dirname))
        self.evaluate_net.save_weights(path)
        logging.info('model saves to {}'.format(path))

In [4]:
def test(env, agent, episodes=100, render=False, verbose=True):
    steps, episode_rewards = [], []
    for episode in range(episodes):
        episode_reward = 0
        observation = env.reset()
        state = agent.get_next_state(None, observation)
        for step in itertools.count():
            if render:
                env.render()
            action = agent.decide(state, test=True, step=step)
            observation, reward, done, info = env.step(action)
            state = agent.get_next_state(state, observation)
            episode_reward += reward
            if done:
                break
        step += 1
        steps.append(step)
        episode_rewards.append(episode_reward)
        logging.info('[test] episode {}: step {}, reward {}, total_step {}'
                .format(episode, step, episode_reward, np.sum(steps)))
            
    if verbose:
        logging.info(('[test summary] step: mean = {}, min = {}, max = {}.'
                + ' episode reward: mean = {}, min = {}, max = {}').format(
                np.mean(steps), np.min(steps), np.max(steps),
                np.mean(episode_rewards), np.min(episode_rewards),
                np.max(episode_rewards)))
    return episode_rewards, steps

In [None]:
render = False
load_path = None
save_path = './output/' + env.unwrapped.spec.id + '-' + \
        time.strftime('%Y%m%d-%H%M%S') + '/model.h5'

"""
Large-scale parameters: designated by Nature DQN
"""
input_shape = (4, 110, 84) # input_shape of network
batch_size = 32
replay_memory_size = 1000000
target_network_update_freq = 10000
gamma = 0.99
update_freq = 4 # frequency (number of steps) with which to train the DQN
learning_rate = 0.00025 # learning rate for optimizer
epsilon = 1. # initial exploration rate for the agent
min_epsilon = 0.1 # final exploration rate for the agent
epsilon_decrease = 9e-7 # rate at which to linearly decrease epsilon
replay_start_size = 50000 # minimum transitions before starting training
random_inital_steps = 30 # number of random actions to be performed by the agent at the beginning of each episode
frames = 50000000 # maximum number of frames during the whole algorithm
test_freq = 50000 # number of frames to test the agent's performance
test_episodes = 100

"""
Median-scale parameters : finish in days
"""
batch_size = 32
replay_memory_size = 100000
target_network_update_freq = 5000
replay_start_size = 20000
random_inital_steps = 30
frames = 1000000
test_freq = 40000
test_episodes = 100

"""
Small-scale parameters : finish in minutes
"""
# batch_size = 6
# replay_memory_size = 5000
# target_network_update_freq = 80
# replay_start_size = 500
# random_inital_steps = 30
# frames = 7500
# test_freq = 2500
# test_episodes = 10


agent = DQNAgent(env, input_shape=input_shape, batch_size=batch_size,
        replay_memory_size=replay_memory_size,        
        learning_rate=learning_rate, gamma=gamma,
        epsilon=epsilon, epsilon_decrease_rate=epsilon_decrease,
        min_epsilon=min_epsilon,
        random_inital_steps=random_inital_steps,
        load_path=load_path,
        update_freq=update_freq,
        target_network_update_freq=target_network_update_freq)

logging.info("training starts")

frame = 0
max_mean_episode_reward = float("-inf")
for episode in itertools.count():
    observation = env.reset()
    episode_reward = 0
    state = agent.get_next_state(None, observation)
    for step in itertools.count():
        if render:
            env.render()
        frame += 1
        action = agent.decide(state, step=step)
        observation, reward, done, _ = env.step(action)
        next_state = agent.get_next_state(state, observation)
        episode_reward += reward
        agent.learn(state, action, reward, next_state, done)
        
        if frame % test_freq == 0 or \
                (done and (frame + 1) % test_freq == 0):
            test_episode_rewards, test_steps = test(env=env,
                    agent=agent, episodes=test_episodes, render=render)
            if max_mean_episode_reward < np.mean(test_episode_rewards):
                max_mean_episode_reward = np.mean(test_episode_rewards)
                agent.save_network(save_path)
                path = save_path[:-2] + str(agent.fit_count) + '.h5'
                agent.save_network(path)
        
        if done:
            step += 1
            frame += 1
            break
        state = next_state
    
    logging.info("episode {}, step {}, reward {}, frame {}".format(
            episode, step, episode_reward, frame))
    
    if frame > frames:
        break

logging.info("training ends")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute (Permute)            (None, 110, 84, 4)        0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 20, 32)        8224      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 12, 9, 64)         32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 7, 64)         36928     
_________________________________________________________________
flatten (Flatten)            (None, 4480)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               2294272   
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 3078      
Total para

2019-02-13 09:15:11,148 [INFO] fit-count : 4100, epsilon = 0.9852409000004859, memory = 16400, loss = 0.00013710367784369737
2019-02-13 09:15:36,347 [INFO] episode 25, step 680, reward 120.0, frame 16708
2019-02-13 09:15:47,273 [INFO] fit-count : 4200, epsilon = 0.9848809000004978, memory = 16800, loss = 0.0001744408073136583
2019-02-13 09:16:23,246 [INFO] fit-count : 4300, epsilon = 0.9845209000005096, memory = 17200, loss = 0.0052605741657316685
2019-02-13 09:16:44,947 [INFO] episode 26, step 759, reward 165.0, frame 17468
2019-02-13 09:16:59,957 [INFO] fit-count : 4400, epsilon = 0.9841609000005215, memory = 17600, loss = 0.0001078589993994683
2019-02-13 09:17:36,624 [INFO] fit-count : 4500, epsilon = 0.9838009000005333, memory = 18000, loss = 0.005181307904422283
2019-02-13 09:17:48,668 [INFO] episode 27, step 687, reward 135.0, frame 18156
2019-02-13 09:18:13,851 [INFO] fit-count : 4600, epsilon = 0.9834409000005452, memory = 18400, loss = 7.745139009784907e-05
2019-02-13 09:18:36

2019-02-13 09:47:49,404 [INFO] fit-count : 9100, epsilon = 0.9672409000010785, memory = 36400, loss = 0.005364275071769953
2019-02-13 09:48:32,543 [INFO] fit-count : 9200, epsilon = 0.9668809000010904, memory = 36800, loss = 0.00013514110469259322
2019-02-13 09:49:05,191 [INFO] episode 51, step 1442, reward 430.0, frame 37155
2019-02-13 09:49:15,577 [INFO] fit-count : 9300, epsilon = 0.9665209000011022, memory = 37200, loss = 0.004842420108616352
2019-02-13 09:49:57,998 [INFO] fit-count : 9400, epsilon = 0.9661609000011141, memory = 37600, loss = 0.005250522401183844
2019-02-13 09:50:13,653 [INFO] episode 52, step 642, reward 105.0, frame 37798
2019-02-13 09:50:41,135 [INFO] fit-count : 9500, epsilon = 0.965800900001126, memory = 38000, loss = 0.00022713519865646958
2019-02-13 09:51:23,188 [INFO] episode 53, step 644, reward 135.0, frame 38443
2019-02-13 09:51:24,546 [INFO] fit-count : 9600, epsilon = 0.9654409000011378, memory = 38400, loss = 0.0001077071501640603
2019-02-13 09:52:07,

2019-02-13 10:27:52,901 [INFO] [test] episode 75: step 902, reward 155.0, total_step 63245
2019-02-13 10:28:30,540 [INFO] [test] episode 76: step 1400, reward 455.0, total_step 64645
2019-02-13 10:28:55,644 [INFO] [test] episode 77: step 951, reward 110.0, total_step 65596
2019-02-13 10:29:21,069 [INFO] [test] episode 78: step 951, reward 390.0, total_step 66547
2019-02-13 10:29:36,580 [INFO] [test] episode 79: step 581, reward 50.0, total_step 67128
2019-02-13 10:30:00,254 [INFO] [test] episode 80: step 885, reward 230.0, total_step 68013
2019-02-13 10:30:19,931 [INFO] [test] episode 81: step 733, reward 80.0, total_step 68746
2019-02-13 10:30:47,070 [INFO] [test] episode 82: step 1043, reward 315.0, total_step 69789
2019-02-13 10:31:05,049 [INFO] [test] episode 83: step 665, reward 160.0, total_step 70454
2019-02-13 10:31:21,415 [INFO] [test] episode 84: step 617, reward 125.0, total_step 71071
2019-02-13 10:31:38,500 [INFO] [test] episode 85: step 641, reward 110.0, total_step 71712

2019-02-13 10:58:13,887 [INFO] episode 74, step 610, reward 155.0, frame 52763
2019-02-13 10:58:25,490 [INFO] fit-count : 13200, epsilon = 0.9524809000015645, memory = 52800, loss = 0.0001735777477733791
2019-02-13 10:59:04,429 [INFO] episode 75, step 481, reward 80.0, frame 53245
2019-02-13 10:59:07,900 [INFO] fit-count : 13300, epsilon = 0.9521209000015763, memory = 53200, loss = 0.00012492078531067818
2019-02-13 10:59:50,077 [INFO] fit-count : 13400, epsilon = 0.9517609000015882, memory = 53600, loss = 0.0002642504405230284
2019-02-13 11:00:17,299 [INFO] episode 76, step 681, reward 155.0, frame 53927
2019-02-13 11:00:33,497 [INFO] fit-count : 13500, epsilon = 0.9514009000016, memory = 54000, loss = 0.009859294630587101
2019-02-13 11:01:16,376 [INFO] fit-count : 13600, epsilon = 0.9510409000016119, memory = 54400, loss = 0.00016405939823016524
2019-02-13 11:01:29,873 [INFO] episode 77, step 675, reward 180.0, frame 54603
2019-02-13 11:01:58,964 [INFO] fit-count : 13700, epsilon = 0.

2019-02-13 11:36:15,521 [INFO] episode 101, step 965, reward 265.0, frame 72231
2019-02-13 11:36:50,749 [INFO] fit-count : 18100, epsilon = 0.9348409000021453, memory = 72400, loss = 0.00016553049499634653
2019-02-13 11:37:11,847 [INFO] episode 102, step 442, reward 35.0, frame 72674
2019-02-13 11:37:41,698 [INFO] fit-count : 18200, epsilon = 0.9344809000021571, memory = 72800, loss = 0.00013588719593826681
2019-02-13 11:38:18,771 [INFO] episode 103, step 517, reward 110.0, frame 73192
2019-02-13 11:38:33,600 [INFO] fit-count : 18300, epsilon = 0.934120900002169, memory = 73200, loss = 0.005464256275445223
2019-02-13 11:39:25,351 [INFO] fit-count : 18400, epsilon = 0.9337609000021808, memory = 73600, loss = 8.008220174815506e-05
2019-02-13 11:39:42,400 [INFO] episode 104, step 638, reward 120.0, frame 73831
2019-02-13 11:40:19,653 [INFO] fit-count : 18500, epsilon = 0.9334009000021927, memory = 74000, loss = 0.00014075220678932965
2019-02-13 11:40:53,310 [INFO] episode 105, step 533, r

2019-02-13 13:15:12,779 [INFO] [test] episode 55: step 1147, reward 260.0, total_step 43507
2019-02-13 13:17:07,739 [INFO] [test] episode 56: step 950, reward 355.0, total_step 44457
2019-02-13 13:18:59,682 [INFO] [test] episode 57: step 925, reward 220.0, total_step 45382
2019-02-13 13:21:01,229 [INFO] [test] episode 58: step 1019, reward 245.0, total_step 46401
2019-02-13 13:22:58,942 [INFO] [test] episode 59: step 1005, reward 310.0, total_step 47406
2019-02-13 13:24:33,182 [INFO] [test] episode 60: step 773, reward 240.0, total_step 48179
2019-02-13 13:25:21,224 [INFO] [test] episode 61: step 401, reward 65.0, total_step 48580
2019-02-13 13:27:14,864 [INFO] [test] episode 62: step 911, reward 195.0, total_step 49491
2019-02-13 13:28:49,300 [INFO] [test] episode 63: step 775, reward 180.0, total_step 50266
2019-02-13 13:30:46,929 [INFO] [test] episode 64: step 953, reward 110.0, total_step 51219
2019-02-13 13:33:22,826 [INFO] [test] episode 65: step 1251, reward 305.0, total_step 52

2019-02-13 14:44:56,168 [INFO] fit-count : 22200, epsilon = 0.9200809000026312, memory = 88800, loss = 0.0048592728562653065
2019-02-13 14:45:57,089 [INFO] fit-count : 22300, epsilon = 0.9197209000026431, memory = 89200, loss = 4.4431792048271745e-05
2019-02-13 14:46:12,452 [INFO] episode 127, step 529, reward 105.0, frame 89429
2019-02-13 14:46:59,290 [INFO] fit-count : 22400, epsilon = 0.9193609000026549, memory = 89600, loss = 7.145700510591269e-05
2019-02-13 14:47:52,069 [INFO] episode 128, step 636, reward 110.0, frame 90066
2019-02-13 14:48:01,814 [INFO] fit-count : 22500, epsilon = 0.9190009000026668, memory = 90000, loss = 9.062470780918375e-05
2019-02-13 14:49:03,433 [INFO] fit-count : 22600, epsilon = 0.9186409000026786, memory = 90400, loss = 0.005112146493047476
2019-02-13 14:49:09,949 [INFO] episode 129, step 503, reward 110.0, frame 90570
2019-02-13 14:50:04,106 [INFO] fit-count : 22700, epsilon = 0.9182809000026905, memory = 90800, loss = 0.0012901739683002234
2019-02-13

2019-02-13 15:37:43,345 [INFO] fit-count : 26900, epsilon = 0.9031609000031883, memory = 100000, loss = 0.00016081274952739477
2019-02-13 15:38:19,489 [INFO] episode 156, step 631, reward 155.0, frame 107959
2019-02-13 15:38:56,135 [INFO] fit-count : 27000, epsilon = 0.9028009000032001, memory = 100000, loss = 6.123675120761618e-05
2019-02-13 15:40:07,784 [INFO] episode 157, step 591, reward 60.0, frame 108551
2019-02-13 15:40:09,172 [INFO] fit-count : 27100, epsilon = 0.902440900003212, memory = 100000, loss = 6.827740435255691e-05
2019-02-13 15:41:21,228 [INFO] fit-count : 27200, epsilon = 0.9020809000032238, memory = 100000, loss = 5.6124677939806134e-05
2019-02-13 15:42:24,319 [INFO] episode 158, step 759, reward 135.0, frame 109311
2019-02-13 15:42:33,231 [INFO] fit-count : 27300, epsilon = 0.9017209000032357, memory = 100000, loss = 0.004717187490314245
2019-02-13 15:43:44,556 [INFO] fit-count : 27400, epsilon = 0.9013609000032475, memory = 100000, loss = 0.00014222023310139775
2

2019-02-13 17:38:55,892 [INFO] [test] episode 34: step 787, reward 135.0, total_step 25400
2019-02-13 17:40:54,749 [INFO] [test] episode 35: step 799, reward 165.0, total_step 26199
2019-02-13 17:41:52,015 [INFO] [test] episode 36: step 394, reward 30.0, total_step 26593
2019-02-13 17:42:50,033 [INFO] [test] episode 37: step 391, reward 80.0, total_step 26984
2019-02-13 17:44:24,813 [INFO] [test] episode 38: step 644, reward 55.0, total_step 27628
2019-02-13 17:46:10,192 [INFO] [test] episode 39: step 715, reward 55.0, total_step 28343
2019-02-13 17:47:07,568 [INFO] [test] episode 40: step 391, reward 80.0, total_step 28734
2019-02-13 17:48:04,794 [INFO] [test] episode 41: step 391, reward 80.0, total_step 29125
2019-02-13 17:49:02,970 [INFO] [test] episode 42: step 405, reward 50.0, total_step 29530
2019-02-13 17:49:59,742 [INFO] [test] episode 43: step 391, reward 80.0, total_step 29921
2019-02-13 17:51:32,063 [INFO] [test] episode 44: step 628, reward 55.0, total_step 30549
2019-02-

2019-02-13 19:45:29,719 [INFO] fit-count : 31200, epsilon = 0.8876809000036979, memory = 100000, loss = 0.00016899986076168716
2019-02-13 19:46:54,009 [INFO] fit-count : 31300, epsilon = 0.8873209000037098, memory = 100000, loss = 0.0001109539734898135
2019-02-13 19:47:37,413 [INFO] episode 179, step 838, reward 180.0, frame 125581
2019-02-13 19:48:20,839 [INFO] fit-count : 31400, epsilon = 0.8869609000037216, memory = 100000, loss = 7.18183146091178e-05
2019-02-13 19:49:01,458 [INFO] episode 180, step 381, reward 35.0, frame 125963
2019-02-13 19:49:47,733 [INFO] fit-count : 31500, epsilon = 0.8866009000037335, memory = 100000, loss = 0.005158647429198027
2019-02-13 19:51:11,444 [INFO] fit-count : 31600, epsilon = 0.8862409000037453, memory = 100000, loss = 9.409078484168276e-05
2019-02-13 19:52:07,465 [INFO] episode 181, step 874, reward 240.0, frame 126838
2019-02-13 19:52:39,834 [INFO] fit-count : 31700, epsilon = 0.8858809000037572, memory = 100000, loss = 8.41118089738302e-05
2019

2019-02-13 20:49:47,117 [INFO] fit-count : 35900, epsilon = 0.870760900004255, memory = 100000, loss = 7.38703238312155e-05
2019-02-13 20:50:59,986 [INFO] fit-count : 36000, epsilon = 0.8704009000042668, memory = 100000, loss = 7.546898268628865e-05
2019-02-13 20:51:32,435 [INFO] episode 207, step 655, reward 120.0, frame 144383
2019-02-13 20:52:13,689 [INFO] fit-count : 36100, epsilon = 0.8700409000042787, memory = 100000, loss = 0.005216426681727171
2019-02-13 20:53:11,636 [INFO] episode 208, step 546, reward 60.0, frame 144930
2019-02-13 20:53:25,508 [INFO] fit-count : 36200, epsilon = 0.8696809000042905, memory = 100000, loss = 0.006697160191833973
2019-02-13 20:54:38,441 [INFO] fit-count : 36300, epsilon = 0.8693209000043024, memory = 100000, loss = 0.0049988338723778725
2019-02-13 20:55:36,142 [INFO] episode 209, step 793, reward 210.0, frame 145724
2019-02-13 20:55:51,070 [INFO] fit-count : 36400, epsilon = 0.8689609000043143, memory = 100000, loss = 0.005228124093264341
2019-02

2019-02-13 22:06:53,196 [INFO] [test] episode 12: step 667, reward 90.0, total_step 11702
2019-02-13 22:08:44,404 [INFO] [test] episode 13: step 708, reward 30.0, total_step 12410
2019-02-13 22:09:38,631 [INFO] [test] episode 14: step 339, reward 5.0, total_step 12749
2019-02-13 22:12:21,635 [INFO] [test] episode 15: step 963, reward 315.0, total_step 13712
2019-02-13 22:15:25,091 [INFO] [test] episode 16: step 1073, reward 220.0, total_step 14785
2019-02-13 22:16:30,834 [INFO] [test] episode 17: step 405, reward 65.0, total_step 15190
2019-02-13 22:18:18,175 [INFO] [test] episode 18: step 685, reward 110.0, total_step 15875
2019-02-13 22:20:05,532 [INFO] [test] episode 19: step 695, reward 175.0, total_step 16570
2019-02-13 22:22:03,072 [INFO] [test] episode 20: step 703, reward 120.0, total_step 17273
2019-02-13 22:25:23,372 [INFO] [test] episode 21: step 1168, reward 450.0, total_step 18441
2019-02-13 22:28:05,307 [INFO] [test] episode 22: step 962, reward 195.0, total_step 19403
20

2019-02-14 01:11:28,005 [INFO] episode 232, step 339, reward 40.0, frame 160002
2019-02-14 01:12:17,785 [INFO] fit-count : 40000, epsilon = 0.8560009000047409, memory = 100000, loss = 0.005310897249728441
2019-02-14 01:12:17,797 [INFO] target network updated.
2019-02-14 01:13:43,421 [INFO] fit-count : 40100, epsilon = 0.8556409000047528, memory = 100000, loss = 8.88607683009468e-05
2019-02-14 01:14:25,786 [INFO] episode 233, step 828, reward 225.0, frame 160831
2019-02-14 01:15:10,301 [INFO] fit-count : 40200, epsilon = 0.8552809000047646, memory = 100000, loss = 5.045244324719533e-05
2019-02-14 01:16:34,669 [INFO] fit-count : 40300, epsilon = 0.8549209000047765, memory = 100000, loss = 0.005045093595981598
2019-02-14 01:17:58,033 [INFO] fit-count : 40400, epsilon = 0.8545609000047883, memory = 100000, loss = 8.783515659160912e-05
2019-02-14 01:19:25,784 [INFO] fit-count : 40500, epsilon = 0.8542009000048002, memory = 100000, loss = 0.00530151417478919
2019-02-14 01:19:32,920 [INFO] ep

2019-02-14 02:21:21,469 [INFO] fit-count : 44800, epsilon = 0.8387209000053099, memory = 100000, loss = 0.010597466491162777
2019-02-14 02:21:35,422 [INFO] episode 259, step 892, reward 485.0, frame 179535
2019-02-14 02:22:34,179 [INFO] fit-count : 44900, epsilon = 0.8383609000053217, memory = 100000, loss = 0.00017876453057397157
2019-02-14 02:23:31,829 [INFO] episode 260, step 643, reward 105.0, frame 180179
2019-02-14 02:23:46,868 [INFO] fit-count : 45000, epsilon = 0.8380009000053336, memory = 100000, loss = 0.00031478257733397186
2019-02-14 02:23:46,877 [INFO] target network updated.
2019-02-14 02:25:01,502 [INFO] fit-count : 45100, epsilon = 0.8376409000053454, memory = 100000, loss = 4.572648322209716e-05
2019-02-14 02:25:38,474 [INFO] episode 261, step 680, reward 135.0, frame 180860
2019-02-14 02:26:16,073 [INFO] fit-count : 45200, epsilon = 0.8372809000053573, memory = 100000, loss = 0.00492597371339798
2019-02-14 02:27:33,520 [INFO] fit-count : 45300, epsilon = 0.83692090000

2019-02-14 03:21:49,520 [INFO] fit-count : 49700, epsilon = 0.8210809000058906, memory = 100000, loss = 0.00021463852317538112
2019-02-14 03:21:59,995 [INFO] episode 285, step 794, reward 210.0, frame 199142
2019-02-14 03:23:03,384 [INFO] fit-count : 49800, epsilon = 0.8207209000059025, memory = 100000, loss = 0.004946551751345396
2019-02-14 03:24:17,281 [INFO] fit-count : 49900, epsilon = 0.8203609000059143, memory = 100000, loss = 0.00011008008732460439
2019-02-14 03:24:20,712 [INFO] episode 286, step 765, reward 70.0, frame 199908
2019-02-14 03:26:17,730 [INFO] [test] episode 0: step 798, reward 185.0, total_step 798
2019-02-14 03:28:08,480 [INFO] [test] episode 1: step 877, reward 255.0, total_step 1675
2019-02-14 03:31:03,794 [INFO] [test] episode 2: step 1370, reward 735.0, total_step 3045
2019-02-14 03:32:52,697 [INFO] [test] episode 3: step 874, reward 210.0, total_step 3919
2019-02-14 03:34:38,223 [INFO] [test] episode 4: step 784, reward 385.0, total_step 4703
2019-02-14 03:3

2019-02-14 05:56:17,357 [INFO] [test] episode 85: step 789, reward 140.0, total_step 61330
2019-02-14 05:57:40,645 [INFO] [test] episode 86: step 642, reward 125.0, total_step 61972
2019-02-14 05:59:23,675 [INFO] [test] episode 87: step 791, reward 160.0, total_step 62763
2019-02-14 06:00:25,959 [INFO] [test] episode 88: step 496, reward 70.0, total_step 63259
2019-02-14 06:01:45,375 [INFO] [test] episode 89: step 626, reward 125.0, total_step 63885
2019-02-14 06:03:43,074 [INFO] [test] episode 90: step 922, reward 240.0, total_step 64807
2019-02-14 06:05:22,231 [INFO] [test] episode 91: step 783, reward 360.0, total_step 65590
2019-02-14 06:07:08,243 [INFO] [test] episode 92: step 821, reward 410.0, total_step 66411
2019-02-14 06:09:39,674 [INFO] [test] episode 93: step 955, reward 245.0, total_step 67366
2019-02-14 06:11:50,247 [INFO] [test] episode 94: step 805, reward 200.0, total_step 68171
2019-02-14 06:12:51,426 [INFO] [test] episode 95: step 390, reward 85.0, total_step 68561
2

2019-02-14 07:18:27,081 [INFO] fit-count : 53900, epsilon = 0.8059609000063884, memory = 100000, loss = 0.0101300198584795
2019-02-14 07:19:18,752 [INFO] episode 310, step 654, reward 120.0, frame 216138
2019-02-14 07:19:57,250 [INFO] fit-count : 54000, epsilon = 0.8056009000064003, memory = 100000, loss = 8.681006147526205e-05
2019-02-14 07:21:27,037 [INFO] fit-count : 54100, epsilon = 0.8052409000064121, memory = 100000, loss = 0.005542210768908262
2019-02-14 07:21:46,296 [INFO] episode 311, step 661, reward 115.0, frame 216800
2019-02-14 07:22:55,712 [INFO] fit-count : 54200, epsilon = 0.804880900006424, memory = 100000, loss = 0.0001635618828004226
2019-02-14 07:23:04,338 [INFO] episode 312, step 352, reward 75.0, frame 217153
2019-02-14 07:24:25,700 [INFO] fit-count : 54300, epsilon = 0.8045209000064358, memory = 100000, loss = 0.00013699055125471205
2019-02-14 07:24:36,058 [INFO] episode 313, step 405, reward 30.0, frame 217559
2019-02-14 07:25:54,171 [INFO] fit-count : 54400, ep

2019-02-14 08:25:09,751 [INFO] fit-count : 58700, epsilon = 0.7886809000069573, memory = 100000, loss = 9.6558389486745e-05
2019-02-14 08:25:28,257 [INFO] episode 336, step 825, reward 160.0, frame 235235
2019-02-14 08:26:25,118 [INFO] fit-count : 58800, epsilon = 0.7883209000069692, memory = 100000, loss = 0.00013911753194406629
2019-02-14 08:26:41,675 [INFO] episode 337, step 393, reward 35.0, frame 235629
2019-02-14 08:27:40,720 [INFO] fit-count : 58900, epsilon = 0.787960900006981, memory = 100000, loss = 8.940051338868216e-05
2019-02-14 08:28:53,322 [INFO] fit-count : 59000, epsilon = 0.7876009000069929, memory = 100000, loss = 0.00015458415145985782
2019-02-14 08:30:05,384 [INFO] episode 338, step 1095, reward 220.0, frame 236725
2019-02-14 08:30:08,468 [INFO] fit-count : 59100, epsilon = 0.7872409000070048, memory = 100000, loss = 9.759663953445852e-05
2019-02-14 08:31:22,087 [INFO] fit-count : 59200, epsilon = 0.7868809000070166, memory = 100000, loss = 0.0002955244272015989
20

2019-02-14 10:30:49,978 [INFO] [test] episode 66: step 709, reward 150.0, total_step 48479
2019-02-14 10:31:51,410 [INFO] [test] episode 67: step 435, reward 95.0, total_step 48914
2019-02-14 10:34:43,476 [INFO] [test] episode 68: step 1190, reward 445.0, total_step 50104
2019-02-14 10:36:54,535 [INFO] [test] episode 69: step 904, reward 355.0, total_step 51008
2019-02-14 10:38:40,419 [INFO] [test] episode 70: step 710, reward 110.0, total_step 51718
2019-02-14 10:41:33,428 [INFO] [test] episode 71: step 1164, reward 210.0, total_step 52882
2019-02-14 10:43:10,197 [INFO] [test] episode 72: step 658, reward 120.0, total_step 53540
2019-02-14 10:44:19,192 [INFO] [test] episode 73: step 482, reward 35.0, total_step 54022
2019-02-14 10:45:46,422 [INFO] [test] episode 74: step 608, reward 30.0, total_step 54630
2019-02-14 10:47:57,442 [INFO] [test] episode 75: step 931, reward 225.0, total_step 55561
2019-02-14 10:49:35,174 [INFO] [test] episode 76: step 834, reward 360.0, total_step 56395


2019-02-14 12:02:34,478 [INFO] fit-count : 62700, epsilon = 0.7742809000074314, memory = 100000, loss = 0.005142488516867161
2019-02-14 12:04:05,456 [INFO] fit-count : 62800, epsilon = 0.7739209000074433, memory = 100000, loss = 5.298672112985514e-05
2019-02-14 12:04:26,053 [INFO] episode 363, step 615, reward 160.0, frame 251659
2019-02-14 12:05:33,072 [INFO] fit-count : 62900, epsilon = 0.7735609000074551, memory = 100000, loss = 8.156571857398376e-05
2019-02-14 12:07:05,617 [INFO] fit-count : 63000, epsilon = 0.773200900007467, memory = 100000, loss = 0.00035572724300436676
2019-02-14 12:08:35,316 [INFO] episode 364, step 1099, reward 440.0, frame 252759
2019-02-14 12:08:36,930 [INFO] fit-count : 63100, epsilon = 0.7728409000074788, memory = 100000, loss = 0.005175672937184572
2019-02-14 12:10:05,229 [INFO] fit-count : 63200, epsilon = 0.7724809000074907, memory = 100000, loss = 3.831936192000285e-05
2019-02-14 12:11:16,913 [INFO] episode 365, step 723, reward 135.0, frame 253483
20

2019-02-14 13:14:42,899 [INFO] episode 388, step 517, reward 115.0, frame 270529
2019-02-14 13:15:40,216 [INFO] fit-count : 67600, epsilon = 0.7566409000080122, memory = 100000, loss = 0.00017011714226100594
2019-02-14 13:17:07,707 [INFO] episode 389, step 654, reward 135.0, frame 271184
2019-02-14 13:17:09,388 [INFO] fit-count : 67700, epsilon = 0.7562809000080241, memory = 100000, loss = 0.00016813381807878613
2019-02-14 13:18:38,897 [INFO] fit-count : 67800, epsilon = 0.7559209000080359, memory = 100000, loss = 0.0050561572425067425
2019-02-14 13:19:37,354 [INFO] episode 390, step 663, reward 50.0, frame 271848
2019-02-14 13:20:08,434 [INFO] fit-count : 67900, epsilon = 0.7555609000080478, memory = 100000, loss = 0.014901885762810707
2019-02-14 13:21:35,667 [INFO] fit-count : 68000, epsilon = 0.7552009000080596, memory = 100000, loss = 5.7130073400912806e-05
2019-02-14 13:23:03,357 [INFO] episode 391, step 938, reward 405.0, frame 272787
2019-02-14 13:23:04,996 [INFO] fit-count : 68

2019-02-14 15:22:38,682 [INFO] [test] episode 47: step 683, reward 175.0, total_step 43392
2019-02-14 15:25:59,013 [INFO] [test] episode 48: step 1281, reward 445.0, total_step 44673
2019-02-14 15:28:56,109 [INFO] [test] episode 49: step 1059, reward 160.0, total_step 45732
2019-02-14 15:31:57,642 [INFO] [test] episode 50: step 1130, reward 225.0, total_step 46862
2019-02-14 15:34:57,814 [INFO] [test] episode 51: step 1099, reward 220.0, total_step 47961
2019-02-14 15:37:56,614 [INFO] [test] episode 52: step 1131, reward 260.0, total_step 49092
2019-02-14 15:40:58,102 [INFO] [test] episode 53: step 1123, reward 320.0, total_step 50215
2019-02-14 15:43:32,978 [INFO] [test] episode 54: step 974, reward 165.0, total_step 51189
2019-02-14 15:46:02,170 [INFO] [test] episode 55: step 1023, reward 400.0, total_step 52212
2019-02-14 15:47:36,784 [INFO] [test] episode 56: step 665, reward 150.0, total_step 52877
2019-02-14 15:49:09,510 [INFO] [test] episode 57: step 639, reward 105.0, total_ste

2019-02-14 17:38:25,093 [INFO] fit-count : 71800, epsilon = 0.74152090000851, memory = 100000, loss = 0.00514769833534956
2019-02-14 17:38:57,518 [INFO] episode 411, step 944, reward 240.0, frame 287754
2019-02-14 17:39:53,431 [INFO] fit-count : 71900, epsilon = 0.7411609000085219, memory = 100000, loss = 6.798323738621548e-05
2019-02-14 17:41:23,518 [INFO] fit-count : 72000, epsilon = 0.7408009000085337, memory = 100000, loss = 7.74208820075728e-05
2019-02-14 17:42:06,282 [INFO] episode 412, step 845, reward 220.0, frame 288600
2019-02-14 17:42:55,940 [INFO] fit-count : 72100, epsilon = 0.7404409000085456, memory = 100000, loss = 0.010122999548912048
2019-02-14 17:43:59,084 [INFO] episode 413, step 505, reward 30.0, frame 289106
2019-02-14 17:44:22,868 [INFO] fit-count : 72200, epsilon = 0.7400809000085574, memory = 100000, loss = 4.7120778617681935e-05
2019-02-14 17:45:52,927 [INFO] fit-count : 72300, epsilon = 0.7397209000085693, memory = 100000, loss = 0.009928001090884209
2019-02-

2019-02-14 18:48:12,922 [INFO] fit-count : 76500, epsilon = 0.7246009000090671, memory = 100000, loss = 0.00010335652041248977
2019-02-14 18:48:39,740 [INFO] episode 439, step 603, reward 80.0, frame 306582
2019-02-14 18:49:27,432 [INFO] fit-count : 76600, epsilon = 0.7242409000090789, memory = 100000, loss = 0.005285081919282675
2019-02-14 18:50:42,941 [INFO] fit-count : 76700, epsilon = 0.7238809000090908, memory = 100000, loss = 0.00017630195361562073
2019-02-14 18:51:32,224 [INFO] episode 440, step 930, reward 260.0, frame 307513
2019-02-14 18:51:55,942 [INFO] fit-count : 76800, epsilon = 0.7235209000091026, memory = 100000, loss = 0.008038320578634739
2019-02-14 18:52:45,566 [INFO] episode 441, step 395, reward 75.0, frame 307909
2019-02-14 18:53:09,135 [INFO] fit-count : 76900, epsilon = 0.7231609000091145, memory = 100000, loss = 0.0001825130166253075
2019-02-14 18:54:22,707 [INFO] fit-count : 77000, epsilon = 0.7228009000091263, memory = 100000, loss = 0.005097773391753435
2019

2019-02-14 20:05:26,985 [INFO] [test] episode 26: step 933, reward 210.0, total_step 19320
2019-02-14 20:06:39,122 [INFO] [test] episode 27: step 643, reward 110.0, total_step 19963
2019-02-14 20:07:40,085 [INFO] [test] episode 28: step 543, reward 65.0, total_step 20506
2019-02-14 20:09:01,755 [INFO] [test] episode 29: step 719, reward 140.0, total_step 21225
2019-02-14 20:10:46,626 [INFO] [test] episode 30: step 934, reward 205.0, total_step 22159
2019-02-14 20:11:58,387 [INFO] [test] episode 31: step 649, reward 110.0, total_step 22808
2019-02-14 20:12:53,271 [INFO] [test] episode 32: step 501, reward 60.0, total_step 23309
2019-02-14 20:14:55,610 [INFO] [test] episode 33: step 1087, reward 230.0, total_step 24396
2019-02-14 20:17:39,960 [INFO] [test] episode 34: step 1471, reward 520.0, total_step 25867
2019-02-14 20:18:42,847 [INFO] [test] episode 35: step 581, reward 65.0, total_step 26448
2019-02-14 20:20:33,444 [INFO] [test] episode 36: step 991, reward 425.0, total_step 27439


2019-02-14 21:28:31,063 [INFO] fit-count : 80700, epsilon = 0.7094809000095649, memory = 100000, loss = 7.851310510886833e-05
2019-02-14 21:29:10,436 [INFO] episode 463, step 996, reward 305.0, frame 323537
2019-02-14 21:29:29,920 [INFO] fit-count : 80800, epsilon = 0.7091209000095767, memory = 100000, loss = 9.65730578172952e-05
2019-02-14 21:30:15,659 [INFO] episode 464, step 413, reward 80.0, frame 323951
2019-02-14 21:30:32,533 [INFO] fit-count : 80900, epsilon = 0.7087609000095886, memory = 100000, loss = 4.8420326493214816e-05
2019-02-14 21:31:22,102 [INFO] episode 465, step 453, reward 80.0, frame 324405
2019-02-14 21:31:31,475 [INFO] fit-count : 81000, epsilon = 0.7084009000096004, memory = 100000, loss = 0.004957431927323341
2019-02-14 21:32:34,172 [INFO] fit-count : 81100, epsilon = 0.7080409000096123, memory = 100000, loss = 6.546131044160575e-05
2019-02-14 21:33:14,439 [INFO] episode 466, step 724, reward 120.0, frame 325130
2019-02-14 21:33:34,388 [INFO] fit-count : 81200,

2019-02-14 22:17:20,770 [INFO] fit-count : 85500, epsilon = 0.6922009000101338, memory = 100000, loss = 0.009548219852149487
2019-02-14 22:18:22,290 [INFO] fit-count : 85600, epsilon = 0.6918409000101456, memory = 100000, loss = 0.00010915363964159042
2019-02-14 22:18:29,267 [INFO] episode 490, step 727, reward 380.0, frame 342936
2019-02-14 22:19:23,728 [INFO] fit-count : 85700, epsilon = 0.6914809000101575, memory = 100000, loss = 0.0001984296104637906
2019-02-14 22:19:33,086 [INFO] episode 491, step 415, reward 15.0, frame 343352
2019-02-14 22:20:24,620 [INFO] fit-count : 85800, epsilon = 0.6911209000101693, memory = 100000, loss = 0.00016907541430555284
2019-02-14 22:20:58,568 [INFO] episode 492, step 566, reward 45.0, frame 343919
2019-02-14 22:21:24,992 [INFO] fit-count : 85900, epsilon = 0.6907609000101812, memory = 100000, loss = 0.00020372447033878416
2019-02-14 22:22:25,847 [INFO] fit-count : 86000, epsilon = 0.690400900010193, memory = 100000, loss = 8.292730490211397e-05
20

2019-02-14 23:10:48,183 [INFO] [test] episode 10: step 533, reward 45.0, total_step 7294
2019-02-14 23:12:01,932 [INFO] [test] episode 11: step 812, reward 230.0, total_step 8106
2019-02-14 23:12:32,285 [INFO] [test] episode 12: step 436, reward 40.0, total_step 8542
2019-02-14 23:13:51,210 [INFO] [test] episode 13: step 965, reward 545.0, total_step 9507
2019-02-14 23:14:53,267 [INFO] [test] episode 14: step 644, reward 110.0, total_step 10151
2019-02-14 23:15:40,733 [INFO] [test] episode 15: step 634, reward 85.0, total_step 10785
2019-02-14 23:16:25,780 [INFO] [test] episode 16: step 583, reward 60.0, total_step 11368
2019-02-14 23:17:09,573 [INFO] [test] episode 17: step 720, reward 180.0, total_step 12088
2019-02-14 23:18:01,942 [INFO] [test] episode 18: step 835, reward 210.0, total_step 12923
2019-02-14 23:18:51,693 [INFO] [test] episode 19: step 824, reward 150.0, total_step 13747
2019-02-14 23:19:27,127 [INFO] [test] episode 20: step 610, reward 215.0, total_step 14357
2019-02

2019-02-15 00:49:34,005 [INFO] episode 513, step 455, reward 135.0, frame 360002
2019-02-15 00:49:54,223 [INFO] fit-count : 89900, epsilon = 0.6763609000106553, memory = 100000, loss = 8.591924415668473e-05
2019-02-15 00:51:08,523 [INFO] fit-count : 90000, epsilon = 0.6760009000106671, memory = 100000, loss = 0.0033695793244987726
2019-02-15 00:51:08,523 [INFO] target network updated.
2019-02-15 00:52:24,459 [INFO] fit-count : 90100, epsilon = 0.675640900010679, memory = 100000, loss = 0.004771546460688114
2019-02-15 00:52:31,511 [INFO] episode 514, step 954, reward 230.0, frame 360957
2019-02-15 00:53:30,860 [INFO] fit-count : 90200, epsilon = 0.6752809000106909, memory = 100000, loss = 0.0002791171136777848
2019-02-15 00:54:34,843 [INFO] fit-count : 90300, epsilon = 0.6749209000107027, memory = 100000, loss = 0.0003514678101055324
2019-02-15 00:54:38,945 [INFO] episode 515, step 785, reward 335.0, frame 361743
2019-02-15 00:55:43,723 [INFO] fit-count : 90400, epsilon = 0.674560900010

2019-02-15 08:15:35,760 [INFO] fit-count : 94600, epsilon = 0.6594409000112124, memory = 100000, loss = 0.00019968929700553417
2019-02-15 08:16:35,585 [INFO] fit-count : 94700, epsilon = 0.6590809000112242, memory = 100000, loss = 0.009698327630758286
2019-02-15 08:16:51,559 [INFO] episode 541, step 585, reward 90.0, frame 379446
2019-02-15 08:17:35,071 [INFO] fit-count : 94800, epsilon = 0.6587209000112361, memory = 100000, loss = 0.00014453852782025933
2019-02-15 08:18:27,557 [INFO] episode 542, step 636, reward 110.0, frame 380083
2019-02-15 08:18:36,094 [INFO] fit-count : 94900, epsilon = 0.6583609000112479, memory = 100000, loss = 8.638619328849018e-05
2019-02-15 08:19:36,569 [INFO] fit-count : 95000, epsilon = 0.6580009000112598, memory = 100000, loss = 0.005077006760984659
2019-02-15 08:19:36,579 [INFO] target network updated.
2019-02-15 08:19:49,224 [INFO] episode 543, step 544, reward 70.0, frame 380628
2019-02-15 08:20:34,595 [INFO] fit-count : 95100, epsilon = 0.657640900011

2019-02-15 09:00:30,915 [INFO] fit-count : 99300, epsilon = 0.6425209000117694, memory = 100000, loss = 0.00010384533379692584
2019-02-15 09:01:27,441 [INFO] fit-count : 99400, epsilon = 0.6421609000117813, memory = 100000, loss = 0.00481942156329751
2019-02-15 09:01:27,579 [INFO] episode 569, step 953, reward 230.0, frame 398172
2019-02-15 09:02:22,427 [INFO] fit-count : 99500, epsilon = 0.6418009000117931, memory = 100000, loss = 4.737111885333434e-05
2019-02-15 09:03:04,681 [INFO] episode 570, step 699, reward 160.0, frame 398872
2019-02-15 09:03:18,144 [INFO] fit-count : 99600, epsilon = 0.641440900011805, memory = 100000, loss = 0.000373795279301703
2019-02-15 09:04:12,243 [INFO] fit-count : 99700, epsilon = 0.6410809000118168, memory = 100000, loss = 0.00033525656908750534
2019-02-15 09:04:53,029 [INFO] episode 571, step 798, reward 175.0, frame 399671
2019-02-15 09:05:06,812 [INFO] fit-count : 99800, epsilon = 0.6407209000118287, memory = 100000, loss = 0.00011608750355662778
20

2019-02-15 10:00:45,169 [INFO] [test] episode 80: step 583, reward 140.0, total_step 69024
2019-02-15 10:01:32,385 [INFO] [test] episode 81: step 1016, reward 285.0, total_step 70040
2019-02-15 10:02:04,703 [INFO] [test] episode 82: step 703, reward 180.0, total_step 70743
2019-02-15 10:02:45,999 [INFO] [test] episode 83: step 859, reward 235.0, total_step 71602
2019-02-15 10:03:10,820 [INFO] [test] episode 84: step 530, reward 115.0, total_step 72132
2019-02-15 10:03:44,955 [INFO] [test] episode 85: step 745, reward 190.0, total_step 72877
2019-02-15 10:04:37,780 [INFO] [test] episode 86: step 1140, reward 380.0, total_step 74017
2019-02-15 10:05:25,410 [INFO] [test] episode 87: step 1020, reward 295.0, total_step 75037
2019-02-15 10:06:02,616 [INFO] [test] episode 88: step 794, reward 80.0, total_step 75831
2019-02-15 10:06:34,457 [INFO] [test] episode 89: step 682, reward 110.0, total_step 76513
2019-02-15 10:07:15,158 [INFO] [test] episode 90: step 855, reward 290.0, total_step 773

2019-02-15 10:45:23,481 [INFO] fit-count : 103400, epsilon = 0.6277609000122554, memory = 100000, loss = 0.0001762400643201545
2019-02-15 10:46:18,330 [INFO] fit-count : 103500, epsilon = 0.6274009000122672, memory = 100000, loss = 0.00020108360331505537
2019-02-15 10:46:41,819 [INFO] episode 591, step 946, reward 225.0, frame 414764
2019-02-15 10:47:12,868 [INFO] fit-count : 103600, epsilon = 0.6270409000122791, memory = 100000, loss = 0.005166370887309313
2019-02-15 10:48:04,262 [INFO] episode 592, step 611, reward 155.0, frame 415376
2019-02-15 10:48:06,597 [INFO] fit-count : 103700, epsilon = 0.6266809000122909, memory = 100000, loss = 7.840039324946702e-05
2019-02-15 10:49:01,162 [INFO] fit-count : 103800, epsilon = 0.6263209000123028, memory = 100000, loss = 0.004803222604095936
2019-02-15 10:49:18,619 [INFO] episode 593, step 545, reward 110.0, frame 415922
2019-02-15 10:49:54,349 [INFO] fit-count : 103900, epsilon = 0.6259609000123146, memory = 100000, loss = 0.0049898284487426

2019-02-15 11:28:48,306 [INFO] episode 616, step 993, reward 350.0, frame 433448
2019-02-15 11:29:39,142 [INFO] fit-count : 108300, epsilon = 0.6101209000128361, memory = 100000, loss = 0.0003221447695977986
2019-02-15 11:30:16,046 [INFO] episode 617, step 642, reward 155.0, frame 434091
2019-02-15 11:30:32,543 [INFO] fit-count : 108400, epsilon = 0.609760900012848, memory = 100000, loss = 0.004598892759531736
2019-02-15 11:31:26,658 [INFO] fit-count : 108500, epsilon = 0.6094009000128598, memory = 100000, loss = 0.004485447891056538
2019-02-15 11:31:36,138 [INFO] episode 618, step 595, reward 105.0, frame 434687
2019-02-15 11:32:21,096 [INFO] fit-count : 108600, epsilon = 0.6090409000128717, memory = 100000, loss = 0.005537000019103289
2019-02-15 11:32:42,823 [INFO] episode 619, step 493, reward 55.0, frame 435181
2019-02-15 11:33:15,232 [INFO] fit-count : 108700, epsilon = 0.6086809000128836, memory = 100000, loss = 0.005218787584453821
2019-02-15 11:34:02,328 [INFO] episode 620, ste

2019-02-15 12:21:03,321 [INFO] [test] episode 60: step 884, reward 145.0, total_step 49232
2019-02-15 12:21:45,532 [INFO] [test] episode 61: step 929, reward 150.0, total_step 50161
2019-02-15 12:22:27,206 [INFO] [test] episode 62: step 909, reward 140.0, total_step 51070
2019-02-15 12:23:05,143 [INFO] [test] episode 63: step 837, reward 120.0, total_step 51907
2019-02-15 12:23:46,685 [INFO] [test] episode 64: step 904, reward 155.0, total_step 52811
2019-02-15 12:24:12,431 [INFO] [test] episode 65: step 557, reward 100.0, total_step 53368
2019-02-15 12:24:43,580 [INFO] [test] episode 66: step 675, reward 45.0, total_step 54043
2019-02-15 12:25:28,349 [INFO] [test] episode 67: step 975, reward 350.0, total_step 55018
2019-02-15 12:26:12,158 [INFO] [test] episode 68: step 903, reward 250.0, total_step 55921
2019-02-15 12:26:31,567 [INFO] [test] episode 69: step 440, reward 60.0, total_step 56361
2019-02-15 12:27:36,626 [INFO] [test] episode 70: step 1383, reward 455.0, total_step 57744


2019-02-15 13:11:09,040 [INFO] fit-count : 112500, epsilon = 0.5950009000133339, memory = 100000, loss = 0.00022661533148493618
2019-02-15 13:11:28,542 [INFO] episode 640, step 617, reward 155.0, frame 450787
2019-02-15 13:12:03,486 [INFO] fit-count : 112600, epsilon = 0.5946409000133458, memory = 100000, loss = 0.00021523433679249138
2019-02-15 13:12:58,465 [INFO] fit-count : 112700, epsilon = 0.5942809000133576, memory = 100000, loss = 0.00028066791128367186
2019-02-15 13:13:20,834 [INFO] episode 641, step 813, reward 160.0, frame 451601
2019-02-15 13:13:54,951 [INFO] fit-count : 112800, epsilon = 0.5939209000133695, memory = 100000, loss = 0.00010858719178941101
2019-02-15 13:14:50,912 [INFO] fit-count : 112900, epsilon = 0.5935609000133814, memory = 100000, loss = 0.00017611570365261286
2019-02-15 13:15:38,006 [INFO] episode 642, step 977, reward 315.0, frame 452579
2019-02-15 13:15:46,527 [INFO] fit-count : 113000, epsilon = 0.5932009000133932, memory = 100000, loss = 0.0001299098

2019-02-15 13:52:53,719 [INFO] episode 667, step 804, reward 210.0, frame 469115
2019-02-15 13:53:42,027 [INFO] fit-count : 117200, epsilon = 0.578080900013891, memory = 100000, loss = 0.00024340426898561418
2019-02-15 13:54:11,307 [INFO] episode 668, step 561, reward 135.0, frame 469677
2019-02-15 13:54:37,665 [INFO] fit-count : 117300, epsilon = 0.5777209000139029, memory = 100000, loss = 9.300171950599179e-05
2019-02-15 13:55:32,023 [INFO] fit-count : 117400, epsilon = 0.5773609000139147, memory = 100000, loss = 0.00922311469912529
2019-02-15 13:55:59,093 [INFO] episode 669, step 785, reward 210.0, frame 470463
2019-02-15 13:56:27,463 [INFO] fit-count : 117500, epsilon = 0.5770009000139266, memory = 100000, loss = 0.0061037917621433735
2019-02-15 13:57:23,430 [INFO] fit-count : 117600, epsilon = 0.5766409000139384, memory = 100000, loss = 0.00011008799629053101
2019-02-15 13:58:18,245 [INFO] fit-count : 117700, epsilon = 0.5762809000139503, memory = 100000, loss = 6.324343121377751e

2019-02-15 14:41:21,780 [INFO] [test] episode 40: step 647, reward 205.0, total_step 32221
2019-02-15 14:41:49,870 [INFO] [test] episode 41: step 649, reward 65.0, total_step 32870
2019-02-15 14:42:19,969 [INFO] [test] episode 42: step 677, reward 160.0, total_step 33547
2019-02-15 14:42:39,288 [INFO] [test] episode 43: step 436, reward 55.0, total_step 33983
2019-02-15 14:43:21,436 [INFO] [test] episode 44: step 945, reward 250.0, total_step 34928
2019-02-15 14:44:18,282 [INFO] [test] episode 45: step 1281, reward 435.0, total_step 36209
2019-02-15 14:44:46,242 [INFO] [test] episode 46: step 646, reward 105.0, total_step 36855
2019-02-15 14:45:26,285 [INFO] [test] episode 47: step 929, reward 245.0, total_step 37784
2019-02-15 14:46:08,721 [INFO] [test] episode 48: step 963, reward 275.0, total_step 38747
2019-02-15 14:46:32,968 [INFO] [test] episode 49: step 547, reward 125.0, total_step 39294
2019-02-15 14:47:10,269 [INFO] [test] episode 50: step 846, reward 160.0, total_step 40140


2019-02-15 15:28:50,442 [INFO] episode 691, step 773, reward 210.0, frame 486194
2019-02-15 15:29:03,892 [INFO] fit-count : 121400, epsilon = 0.5629609000143888, memory = 100000, loss = 0.00016114820027723908
2019-02-15 15:29:59,594 [INFO] fit-count : 121500, epsilon = 0.5626009000144007, memory = 100000, loss = 0.00020984161528758705
2019-02-15 15:30:54,374 [INFO] fit-count : 121600, epsilon = 0.5622409000144125, memory = 100000, loss = 0.00022743632143829018
2019-02-15 15:31:47,493 [INFO] fit-count : 121700, epsilon = 0.5618809000144244, memory = 100000, loss = 0.01020622719079256
2019-02-15 15:31:54,350 [INFO] episode 692, step 1351, reward 425.0, frame 487546
2019-02-15 15:32:41,605 [INFO] fit-count : 121800, epsilon = 0.5615209000144362, memory = 100000, loss = 0.00023050178424455225
2019-02-15 15:33:19,187 [INFO] episode 693, step 621, reward 35.0, frame 488168
2019-02-15 15:33:36,211 [INFO] fit-count : 121900, epsilon = 0.5611609000144481, memory = 100000, loss = 0.0001678654516

2019-02-15 16:13:09,732 [INFO] fit-count : 126200, epsilon = 0.5456809000149577, memory = 100000, loss = 0.00019632140174508095
2019-02-15 16:14:05,344 [INFO] fit-count : 126300, epsilon = 0.5453209000149696, memory = 100000, loss = 0.005317814648151398
2019-02-15 16:14:15,515 [INFO] episode 717, step 951, reward 245.0, frame 505991
2019-02-15 16:15:00,591 [INFO] fit-count : 126400, epsilon = 0.5449609000149814, memory = 100000, loss = 0.0002587635244708508
2019-02-15 16:15:55,714 [INFO] fit-count : 126500, epsilon = 0.5446009000149933, memory = 100000, loss = 0.004827425815165043
2019-02-15 16:16:21,115 [INFO] episode 718, step 911, reward 155.0, frame 506903
2019-02-15 16:16:50,291 [INFO] fit-count : 126600, epsilon = 0.5442409000150051, memory = 100000, loss = 0.00013073577429167926
2019-02-15 16:17:28,234 [INFO] episode 719, step 490, reward 90.0, frame 507394
2019-02-15 16:17:45,183 [INFO] fit-count : 126700, epsilon = 0.543880900015017, memory = 100000, loss = 0.00036496980465017

2019-02-15 17:01:27,089 [INFO] [test] episode 22: step 789, reward 320.0, total_step 19766
2019-02-15 17:01:56,450 [INFO] [test] episode 23: step 619, reward 110.0, total_step 20385
2019-02-15 17:02:34,396 [INFO] [test] episode 24: step 782, reward 180.0, total_step 21167
2019-02-15 17:03:04,004 [INFO] [test] episode 25: step 619, reward 110.0, total_step 21786
2019-02-15 17:03:42,237 [INFO] [test] episode 26: step 818, reward 210.0, total_step 22604
2019-02-15 17:04:20,822 [INFO] [test] episode 27: step 814, reward 180.0, total_step 23418
2019-02-15 17:04:50,153 [INFO] [test] episode 28: step 617, reward 105.0, total_step 24035
2019-02-15 17:05:27,840 [INFO] [test] episode 29: step 797, reward 320.0, total_step 24832
2019-02-15 17:06:03,838 [INFO] [test] episode 30: step 759, reward 335.0, total_step 25591
2019-02-15 17:06:33,269 [INFO] [test] episode 31: step 619, reward 105.0, total_step 26210
2019-02-15 17:07:02,934 [INFO] [test] episode 32: step 619, reward 110.0, total_step 26829

2019-02-15 17:53:50,956 [INFO] fit-count : 130200, epsilon = 0.5312809000154318, memory = 100000, loss = 0.005018100142478943
2019-02-15 17:54:45,759 [INFO] fit-count : 130300, epsilon = 0.5309209000154437, memory = 100000, loss = 0.0003421325236558914
2019-02-15 17:54:50,487 [INFO] episode 740, step 647, reward 125.0, frame 521975
2019-02-15 17:55:41,442 [INFO] fit-count : 130400, epsilon = 0.5305609000154555, memory = 100000, loss = 0.005018744617700577
2019-02-15 17:56:14,722 [INFO] episode 741, step 607, reward 50.0, frame 522583
2019-02-15 17:56:35,962 [INFO] fit-count : 130500, epsilon = 0.5302009000154674, memory = 100000, loss = 0.00010240529081784189
2019-02-15 17:57:25,138 [INFO] episode 742, step 521, reward 45.0, frame 523105
2019-02-15 17:57:30,197 [INFO] fit-count : 130600, epsilon = 0.5298409000154792, memory = 100000, loss = 0.005194671452045441
2019-02-15 17:58:24,539 [INFO] fit-count : 130700, epsilon = 0.5294809000154911, memory = 100000, loss = 5.4049905884312466e-0

2019-02-15 18:36:50,461 [INFO] episode 767, step 478, reward 100.0, frame 540452
2019-02-15 18:37:33,601 [INFO] fit-count : 135000, epsilon = 0.5140009000160007, memory = 100000, loss = 0.005511165130883455
2019-02-15 18:37:33,609 [INFO] target network updated.
2019-02-15 18:38:20,492 [INFO] episode 768, step 671, reward 105.0, frame 541124
2019-02-15 18:38:26,462 [INFO] fit-count : 135100, epsilon = 0.5136409000160126, memory = 100000, loss = 0.00046290759928524494
2019-02-15 18:39:21,648 [INFO] fit-count : 135200, epsilon = 0.5132809000160244, memory = 100000, loss = 0.00022367443307302892
2019-02-15 18:40:16,225 [INFO] episode 769, step 835, reward 180.0, frame 541960
2019-02-15 18:40:17,804 [INFO] fit-count : 135300, epsilon = 0.5129209000160363, memory = 100000, loss = 0.00013780477456748486
2019-02-15 18:41:11,854 [INFO] fit-count : 135400, epsilon = 0.5125609000160481, memory = 100000, loss = 4.347354843048379e-05
2019-02-15 18:41:26,621 [INFO] episode 770, step 523, reward 120.

2019-02-15 19:19:53,469 [INFO] fit-count : 139700, epsilon = 0.4970809000163777, memory = 100000, loss = 0.004689256194978952
2019-02-15 19:19:54,060 [INFO] episode 794, step 724, reward 215.0, frame 559600
2019-02-15 19:20:46,503 [INFO] fit-count : 139800, epsilon = 0.4967209000163674, memory = 100000, loss = 0.000101904159237165
2019-02-15 19:21:08,394 [INFO] [test] episode 0: step 521, reward 25.0, total_step 521
2019-02-15 19:21:23,639 [INFO] [test] episode 1: step 384, reward 45.0, total_step 905
2019-02-15 19:22:07,640 [INFO] [test] episode 2: step 1063, reward 345.0, total_step 1968
2019-02-15 19:22:30,764 [INFO] [test] episode 3: step 561, reward 95.0, total_step 2529
2019-02-15 19:23:09,242 [INFO] [test] episode 4: step 917, reward 150.0, total_step 3446
2019-02-15 19:23:47,569 [INFO] [test] episode 5: step 920, reward 110.0, total_step 4366
2019-02-15 19:24:15,152 [INFO] [test] episode 6: step 658, reward 105.0, total_step 5024
2019-02-15 19:25:02,215 [INFO] [test] episode 7:

2019-02-15 20:12:13,811 [INFO] [test] episode 87: step 683, reward 55.0, total_step 74726
2019-02-15 20:12:44,578 [INFO] [test] episode 88: step 687, reward 75.0, total_step 75413
2019-02-15 20:13:35,615 [INFO] [test] episode 89: step 1135, reward 410.0, total_step 76548
2019-02-15 20:14:15,113 [INFO] [test] episode 90: step 937, reward 165.0, total_step 77485
2019-02-15 20:15:02,705 [INFO] [test] episode 91: step 1117, reward 190.0, total_step 78602
2019-02-15 20:15:49,017 [INFO] [test] episode 92: step 1105, reward 185.0, total_step 79707
2019-02-15 20:16:26,136 [INFO] [test] episode 93: step 814, reward 195.0, total_step 80521
2019-02-15 20:17:12,187 [INFO] [test] episode 94: step 1000, reward 135.0, total_step 81521
2019-02-15 20:18:02,612 [INFO] [test] episode 95: step 1063, reward 200.0, total_step 82584
2019-02-15 20:18:41,181 [INFO] [test] episode 96: step 860, reward 60.0, total_step 83444
2019-02-15 20:19:22,067 [INFO] [test] episode 97: step 899, reward 235.0, total_step 843

2019-02-15 20:55:52,225 [INFO] episode 819, step 348, reward 45.0, frame 575820
2019-02-15 20:56:20,074 [INFO] fit-count : 143800, epsilon = 0.4823209000159533, memory = 100000, loss = 0.00018313042528461665
2019-02-15 20:57:16,516 [INFO] fit-count : 143900, epsilon = 0.48196090001594294, memory = 100000, loss = 0.007768768351525068
2019-02-15 20:58:13,144 [INFO] fit-count : 144000, epsilon = 0.4816009000159326, memory = 100000, loss = 0.00494477478787303
2019-02-15 20:58:39,714 [INFO] episode 820, step 1191, reward 350.0, frame 577012
2019-02-15 20:59:09,274 [INFO] fit-count : 144100, epsilon = 0.48124090001592223, memory = 100000, loss = 0.009715026244521141
2019-02-15 20:59:50,968 [INFO] episode 821, step 505, reward 100.0, frame 577518
2019-02-15 21:00:05,212 [INFO] fit-count : 144200, epsilon = 0.4808809000159119, memory = 100000, loss = 0.005248533561825752
2019-02-15 21:00:48,877 [INFO] episode 822, step 414, reward 80.0, frame 577933
2019-02-15 21:01:01,290 [INFO] fit-count : 1

2019-02-15 21:43:54,675 [INFO] fit-count : 148600, epsilon = 0.4650409000154564, memory = 100000, loss = 0.010097788646817207
2019-02-15 21:44:51,029 [INFO] fit-count : 148700, epsilon = 0.46468090001544604, memory = 100000, loss = 0.00022717643878422678
2019-02-15 21:45:19,114 [INFO] episode 845, step 727, reward 95.0, frame 595844
2019-02-15 21:45:47,646 [INFO] fit-count : 148800, epsilon = 0.4643209000154357, memory = 100000, loss = 0.0002257719897897914
2019-02-15 21:46:40,238 [INFO] episode 846, step 571, reward 95.0, frame 596416
2019-02-15 21:46:44,415 [INFO] fit-count : 148900, epsilon = 0.46396090001542534, memory = 100000, loss = 0.005415762774646282
2019-02-15 21:47:41,342 [INFO] fit-count : 149000, epsilon = 0.463600900015415, memory = 100000, loss = 7.807546353433281e-05
2019-02-15 21:47:50,871 [INFO] episode 847, step 502, reward 60.0, frame 596919
2019-02-15 21:48:37,643 [INFO] fit-count : 149100, epsilon = 0.46324090001540463, memory = 100000, loss = 0.00511038489639759

2019-02-15 22:43:54,712 [INFO] [test] episode 68: step 779, reward 165.0, total_step 59739
2019-02-15 22:44:47,043 [INFO] [test] episode 69: step 1226, reward 190.0, total_step 60965
2019-02-15 22:45:56,616 [INFO] [test] episode 70: step 1407, reward 305.0, total_step 62372
2019-02-15 22:46:33,266 [INFO] [test] episode 71: step 667, reward 80.0, total_step 63039
2019-02-15 22:47:14,747 [INFO] [test] episode 72: step 611, reward 100.0, total_step 63650
2019-02-15 22:47:39,416 [INFO] [test] episode 73: step 385, reward 55.0, total_step 64035
2019-02-15 22:48:09,944 [INFO] [test] episode 74: step 369, reward 50.0, total_step 64404
2019-02-15 22:49:29,670 [INFO] [test] episode 75: step 1131, reward 270.0, total_step 65535
2019-02-15 22:49:58,717 [INFO] [test] episode 76: step 690, reward 115.0, total_step 66225
2019-02-15 22:50:32,090 [INFO] [test] episode 77: step 789, reward 150.0, total_step 67014
2019-02-15 22:51:11,410 [INFO] [test] episode 78: step 945, reward 220.0, total_step 67959

2019-02-15 23:33:08,123 [INFO] fit-count : 152800, epsilon = 0.4499209000150216, memory = 100000, loss = 0.00011827289563370869
2019-02-15 23:33:26,011 [INFO] episode 869, step 687, reward 135.0, frame 612208
2019-02-15 23:34:00,306 [INFO] fit-count : 152900, epsilon = 0.44956090001501126, memory = 100000, loss = 0.013525966554880142
2019-02-15 23:34:53,573 [INFO] fit-count : 153000, epsilon = 0.4492009000150009, memory = 100000, loss = 0.0001125983108067885
2019-02-15 23:35:48,269 [INFO] fit-count : 153100, epsilon = 0.44884090001499055, memory = 100000, loss = 0.00024453975493088365
2019-02-15 23:36:04,128 [INFO] episode 870, step 1181, reward 540.0, frame 613390
2019-02-15 23:36:43,059 [INFO] fit-count : 153200, epsilon = 0.4484809000149802, memory = 100000, loss = 0.005079846829175949
2019-02-15 23:37:40,803 [INFO] fit-count : 153300, epsilon = 0.44812090001496985, memory = 100000, loss = 0.010506223887205124
2019-02-15 23:38:13,688 [INFO] episode 871, step 913, reward 105.0, frame

2019-02-16 09:14:51,716 [INFO] fit-count : 157700, epsilon = 0.43228090001451436, memory = 100000, loss = 0.00012541429896373302
2019-02-16 09:15:32,093 [INFO] episode 894, step 880, reward 330.0, frame 631997
2019-02-16 09:15:44,917 [INFO] fit-count : 157800, epsilon = 0.431920900014504, memory = 100000, loss = 0.00972350686788559
2019-02-16 09:16:39,467 [INFO] fit-count : 157900, epsilon = 0.43156090001449365, memory = 100000, loss = 0.00029664812609553337
2019-02-16 09:16:59,496 [INFO] episode 895, step 647, reward 135.0, frame 632645
2019-02-16 09:17:33,031 [INFO] fit-count : 158000, epsilon = 0.4312009000144833, memory = 100000, loss = 0.00012517735012806952
2019-02-16 09:18:26,663 [INFO] fit-count : 158100, epsilon = 0.43084090001447295, memory = 100000, loss = 0.0001563244150020182
2019-02-16 09:19:20,081 [INFO] episode 896, step 1047, reward 385.0, frame 633693
2019-02-16 09:19:20,578 [INFO] fit-count : 158200, epsilon = 0.4304809000144626, memory = 100000, loss = 0.00034758914

2019-02-16 09:58:49,175 [INFO] [test] episode 52: step 929, reward 200.0, total_step 38566
2019-02-16 09:59:12,162 [INFO] [test] episode 53: step 597, reward 155.0, total_step 39163
2019-02-16 09:59:33,901 [INFO] [test] episode 54: step 576, reward 120.0, total_step 39739
2019-02-16 09:59:56,165 [INFO] [test] episode 55: step 579, reward 120.0, total_step 40318
2019-02-16 10:00:17,967 [INFO] [test] episode 56: step 562, reward 105.0, total_step 40880
2019-02-16 10:00:40,742 [INFO] [test] episode 57: step 597, reward 155.0, total_step 41477
2019-02-16 10:00:55,601 [INFO] [test] episode 58: step 396, reward 105.0, total_step 41873
2019-02-16 10:01:26,970 [INFO] [test] episode 59: step 805, reward 155.0, total_step 42678
2019-02-16 10:01:48,409 [INFO] [test] episode 60: step 563, reward 105.0, total_step 43241
2019-02-16 10:02:10,610 [INFO] [test] episode 61: step 578, reward 105.0, total_step 43819
2019-02-16 10:02:32,642 [INFO] [test] episode 62: step 570, reward 110.0, total_step 44389

2019-02-16 10:38:10,492 [INFO] fit-count : 162000, epsilon = 0.4168009000140692, memory = 100000, loss = 0.00010675614612409845
2019-02-16 10:38:21,914 [INFO] episode 915, step 639, reward 165.0, frame 648999
2019-02-16 10:39:03,856 [INFO] fit-count : 162100, epsilon = 0.41644090001405887, memory = 100000, loss = 0.012119708582758904
2019-02-16 10:39:49,599 [INFO] episode 916, step 658, reward 75.0, frame 649658
2019-02-16 10:39:57,261 [INFO] fit-count : 162200, epsilon = 0.4160809000140485, memory = 100000, loss = 0.004518907051533461
2019-02-16 10:40:50,989 [INFO] fit-count : 162300, epsilon = 0.41572090001403816, memory = 100000, loss = 0.00013499660417437553
2019-02-16 10:41:11,276 [INFO] episode 917, step 612, reward 180.0, frame 650271
2019-02-16 10:41:43,737 [INFO] fit-count : 162400, epsilon = 0.4153609000140278, memory = 100000, loss = 0.00015884553431533277
2019-02-16 10:42:36,835 [INFO] fit-count : 162500, epsilon = 0.41500090001401746, memory = 100000, loss = 0.000734399771

2019-02-16 11:21:48,038 [INFO] episode 939, step 663, reward 85.0, frame 668534
2019-02-16 11:21:48,907 [INFO] fit-count : 166900, epsilon = 0.39916090001356197, memory = 100000, loss = 0.0046827904880046844
2019-02-16 11:22:36,304 [INFO] episode 940, step 371, reward 60.0, frame 668906
2019-02-16 11:22:40,648 [INFO] fit-count : 167000, epsilon = 0.3988009000135516, memory = 100000, loss = 0.010398086160421371
2019-02-16 11:23:34,151 [INFO] fit-count : 167100, epsilon = 0.39844090001354127, memory = 100000, loss = 0.0005289798718877137
2019-02-16 11:24:25,413 [INFO] fit-count : 167200, epsilon = 0.3980809000135309, memory = 100000, loss = 0.005316052120178938
2019-02-16 11:25:18,532 [INFO] fit-count : 167300, epsilon = 0.39772090001352056, memory = 100000, loss = 0.0005429456359706819
2019-02-16 11:25:40,862 [INFO] episode 941, step 1404, reward 580.0, frame 670311
2019-02-16 11:26:11,288 [INFO] fit-count : 167400, epsilon = 0.3973609000135102, memory = 100000, loss = 0.008803485892713

2019-02-16 16:39:25,457 [INFO] [test] episode 39: step 733, reward 90.0, total_step 31199
2019-02-16 16:39:44,212 [INFO] [test] episode 40: step 484, reward 35.0, total_step 31683
2019-02-16 16:40:08,063 [INFO] [test] episode 41: step 647, reward 50.0, total_step 32330
2019-02-16 16:40:30,649 [INFO] [test] episode 42: step 566, reward 110.0, total_step 32896
2019-02-16 16:41:00,908 [INFO] [test] episode 43: step 691, reward 130.0, total_step 33587
2019-02-16 16:41:35,379 [INFO] [test] episode 44: step 803, reward 95.0, total_step 34390
2019-02-16 16:42:05,371 [INFO] [test] episode 45: step 837, reward 160.0, total_step 35227
2019-02-16 16:42:26,963 [INFO] [test] episode 46: step 589, reward 45.0, total_step 35816
2019-02-16 16:42:49,270 [INFO] [test] episode 47: step 625, reward 75.0, total_step 36441
2019-02-16 16:43:24,496 [INFO] [test] episode 48: step 977, reward 225.0, total_step 37418
2019-02-16 16:43:57,105 [INFO] [test] episode 49: step 917, reward 145.0, total_step 38335
2019-

2019-02-16 17:40:05,271 [INFO] episode 959, step 840, reward 210.0, frame 685789
2019-02-16 17:41:14,568 [INFO] fit-count : 171300, epsilon = 0.3833209000131065, memory = 100000, loss = 0.005532814189791679
2019-02-16 17:42:31,002 [INFO] fit-count : 171400, epsilon = 0.38296090001309613, memory = 100000, loss = 0.0003589978441596031
2019-02-16 17:43:06,490 [INFO] episode 960, step 954, reward 295.0, frame 686744
2019-02-16 17:43:46,710 [INFO] fit-count : 171500, epsilon = 0.3826009000130858, memory = 100000, loss = 0.01438647136092186
2019-02-16 17:44:55,278 [INFO] fit-count : 171600, epsilon = 0.3822409000130754, memory = 100000, loss = 0.005629939027130604
2019-02-16 17:45:50,434 [INFO] episode 961, step 916, reward 280.0, frame 687661
2019-02-16 17:46:11,023 [INFO] fit-count : 171700, epsilon = 0.38188090001306507, memory = 100000, loss = 0.0002730439300648868
2019-02-16 17:47:29,188 [INFO] fit-count : 171800, epsilon = 0.3815209000130547, memory = 100000, loss = 0.00525222718715667

2019-02-16 21:21:32,058 [INFO] fit-count : 176000, epsilon = 0.36640090001261993, memory = 100000, loss = 0.00045347341801971197
2019-02-16 21:22:28,297 [INFO] fit-count : 176100, epsilon = 0.3660409000126096, memory = 100000, loss = 0.00018350683967582881
2019-02-16 21:22:30,635 [INFO] episode 986, step 659, reward 120.0, frame 705403
2019-02-16 21:23:23,357 [INFO] fit-count : 176200, epsilon = 0.36568090001259923, memory = 100000, loss = 0.00012625801900867373
2019-02-16 21:23:55,381 [INFO] episode 987, step 614, reward 130.0, frame 706018
2019-02-16 21:24:19,589 [INFO] fit-count : 176300, epsilon = 0.3653209000125889, memory = 100000, loss = 0.00026860658545047045
2019-02-16 21:25:14,760 [INFO] fit-count : 176400, epsilon = 0.3649609000125785, memory = 100000, loss = 0.00929733645170927
2019-02-16 21:25:28,349 [INFO] episode 988, step 663, reward 165.0, frame 706682
2019-02-16 21:26:11,537 [INFO] fit-count : 176500, epsilon = 0.3646009000125682, memory = 100000, loss = 0.00015855606

2019-02-16 22:05:31,399 [INFO] [test] episode 18: step 872, reward 160.0, total_step 13367
2019-02-16 22:06:11,340 [INFO] [test] episode 19: step 923, reward 145.0, total_step 14290
2019-02-16 22:06:46,403 [INFO] [test] episode 20: step 672, reward 115.0, total_step 14962
2019-02-16 22:07:20,530 [INFO] [test] episode 21: step 819, reward 175.0, total_step 15781
2019-02-16 22:07:36,407 [INFO] [test] episode 22: step 371, reward 50.0, total_step 16152
2019-02-16 22:07:58,951 [INFO] [test] episode 23: step 501, reward 20.0, total_step 16653
2019-02-16 22:08:56,774 [INFO] [test] episode 24: step 1313, reward 300.0, total_step 17966
2019-02-16 22:09:26,909 [INFO] [test] episode 25: step 743, reward 65.0, total_step 18709
2019-02-16 22:09:42,744 [INFO] [test] episode 26: step 369, reward 25.0, total_step 19078
2019-02-16 22:10:25,357 [INFO] [test] episode 27: step 924, reward 125.0, total_step 20002
2019-02-16 22:10:51,563 [INFO] [test] episode 28: step 639, reward 75.0, total_step 20641
201

2019-02-16 22:54:43,755 [INFO] episode 1010, step 759, reward 105.0, frame 721460
2019-02-16 22:55:36,071 [INFO] fit-count : 180200, epsilon = 0.35128090001218515, memory = 100000, loss = 0.005229226779192686
2019-02-16 22:56:06,366 [INFO] episode 1011, step 547, reward 105.0, frame 722008
2019-02-16 22:56:36,935 [INFO] fit-count : 180300, epsilon = 0.3509209000121748, memory = 100000, loss = 0.005315201357007027
2019-02-16 22:57:39,384 [INFO] fit-count : 180400, epsilon = 0.35056090001216444, memory = 100000, loss = 0.00023602833971381187
2019-02-16 22:58:23,689 [INFO] episode 1012, step 894, reward 180.0, frame 722903
2019-02-16 22:58:39,354 [INFO] fit-count : 180500, epsilon = 0.3502009000121541, memory = 100000, loss = 0.010597197338938713
2019-02-16 22:59:37,835 [INFO] fit-count : 180600, epsilon = 0.34984090001214374, memory = 100000, loss = 0.00020903200493194163
2019-02-16 23:00:41,369 [INFO] fit-count : 180700, epsilon = 0.3494809000121334, memory = 100000, loss = 0.0002336626

2019-02-16 23:45:07,077 [INFO] target network updated.
2019-02-16 23:45:08,632 [INFO] episode 1035, step 1082, reward 330.0, frame 741045
2019-02-16 23:46:08,649 [INFO] fit-count : 185100, epsilon = 0.3336409000116779, memory = 100000, loss = 0.004788387566804886
2019-02-16 23:46:12,177 [INFO] episode 1036, step 413, reward 85.0, frame 741459
2019-02-16 23:47:11,637 [INFO] fit-count : 185200, epsilon = 0.33328090001166755, memory = 100000, loss = 0.004964132327586412
2019-02-16 23:47:48,199 [INFO] episode 1037, step 619, reward 205.0, frame 742079
2019-02-16 23:48:14,006 [INFO] fit-count : 185300, epsilon = 0.3329209000116572, memory = 100000, loss = 0.00020134901569690555
2019-02-16 23:49:21,726 [INFO] fit-count : 185400, epsilon = 0.33256090001164684, memory = 100000, loss = 0.0002792041632346809
2019-02-16 23:49:47,845 [INFO] episode 1038, step 728, reward 125.0, frame 742808
2019-02-16 23:50:20,076 [INFO] fit-count : 185500, epsilon = 0.3322009000116365, memory = 100000, loss = 0.0

2019-02-17 00:27:10,471 [INFO] fit-count : 189700, epsilon = 0.3170809000112017, memory = 100000, loss = 0.00014932191697880626
2019-02-17 00:28:05,929 [INFO] [test] episode 0: step 1105, reward 255.0, total_step 1105
2019-02-17 00:28:42,429 [INFO] [test] episode 1: step 1071, reward 265.0, total_step 2176
2019-02-17 00:29:05,095 [INFO] [test] episode 2: step 640, reward 165.0, total_step 2816
2019-02-17 00:29:45,055 [INFO] [test] episode 3: step 1145, reward 305.0, total_step 3961
2019-02-17 00:30:23,902 [INFO] [test] episode 4: step 1100, reward 360.0, total_step 5061
2019-02-17 00:30:52,237 [INFO] [test] episode 5: step 821, reward 185.0, total_step 5882
2019-02-17 00:31:21,688 [INFO] [test] episode 6: step 846, reward 340.0, total_step 6728
2019-02-17 00:32:03,464 [INFO] [test] episode 7: step 1194, reward 385.0, total_step 7922
2019-02-17 00:32:26,904 [INFO] [test] episode 8: step 656, reward 125.0, total_step 8578
2019-02-17 00:32:49,063 [INFO] [test] episode 9: step 638, reward 

2019-02-17 09:45:06,916 [INFO] [test] episode 89: step 644, reward 140.0, total_step 75885
2019-02-17 09:46:11,379 [INFO] [test] episode 90: step 947, reward 435.0, total_step 76832
2019-02-17 09:46:31,900 [INFO] [test] episode 91: step 439, reward 50.0, total_step 77271
2019-02-17 09:47:11,110 [INFO] [test] episode 92: step 681, reward 165.0, total_step 77952
2019-02-17 09:48:23,447 [INFO] [test] episode 93: step 1177, reward 225.0, total_step 79129
2019-02-17 09:49:04,084 [INFO] [test] episode 94: step 646, reward 145.0, total_step 79775
2019-02-17 09:50:19,310 [INFO] [test] episode 95: step 1107, reward 215.0, total_step 80882
2019-02-17 09:50:56,213 [INFO] [test] episode 96: step 620, reward 85.0, total_step 81502
2019-02-17 09:52:24,024 [INFO] [test] episode 97: step 1701, reward 310.0, total_step 83203
2019-02-17 09:53:03,350 [INFO] [test] episode 98: step 773, reward 140.0, total_step 83976
2019-02-17 09:53:51,394 [INFO] [test] episode 99: step 783, reward 185.0, total_step 8475

2019-02-17 10:33:26,239 [INFO] fit-count : 194000, epsilon = 0.30160090001075657, memory = 100000, loss = 0.00026029179571196437
2019-02-17 10:34:17,086 [INFO] fit-count : 194100, epsilon = 0.3012409000107462, memory = 100000, loss = 0.005393995903432369
2019-02-17 10:35:08,632 [INFO] fit-count : 194200, epsilon = 0.30088090001073586, memory = 100000, loss = 0.00015086359053384513
2019-02-17 10:36:00,189 [INFO] fit-count : 194300, epsilon = 0.3005209000107255, memory = 100000, loss = 0.00011199055734323338
2019-02-17 10:36:17,211 [INFO] episode 1084, step 1723, reward 755.0, frame 778421
2019-02-17 10:36:50,138 [INFO] fit-count : 194400, epsilon = 0.30016090001071516, memory = 100000, loss = 0.0002704352082218975
2019-02-17 10:37:41,154 [INFO] fit-count : 194500, epsilon = 0.2998009000107048, memory = 100000, loss = 0.00022735761012881994
2019-02-17 10:38:15,993 [INFO] episode 1085, step 941, reward 290.0, frame 779363
2019-02-17 10:38:31,548 [INFO] fit-count : 194600, epsilon = 0.2994

2019-02-17 11:18:05,416 [INFO] episode 1109, step 379, reward 30.0, frame 796124
2019-02-17 11:18:32,317 [INFO] fit-count : 198800, epsilon = 0.28432090001025967, memory = 100000, loss = 0.00039319691131822765
2019-02-17 11:19:27,876 [INFO] fit-count : 198900, epsilon = 0.2839609000102493, memory = 100000, loss = 0.014564699493348598
2019-02-17 11:19:43,171 [INFO] episode 1110, step 691, reward 170.0, frame 796816
2019-02-17 11:20:27,678 [INFO] fit-count : 199000, epsilon = 0.28360090001023897, memory = 100000, loss = 0.004950305446982384
2019-02-17 11:21:24,073 [INFO] episode 1111, step 684, reward 90.0, frame 797501
2019-02-17 11:21:25,518 [INFO] fit-count : 199100, epsilon = 0.2832409000102286, memory = 100000, loss = 0.0002999989956151694
2019-02-17 11:22:22,810 [INFO] fit-count : 199200, epsilon = 0.28288090001021826, memory = 100000, loss = 0.004507761914283037
2019-02-17 11:23:23,296 [INFO] fit-count : 199300, epsilon = 0.2825209000102079, memory = 100000, loss = 0.0201235599815

2019-02-17 12:04:22,430 [INFO] [test] episode 72: step 806, reward 155.0, total_step 62367
2019-02-17 12:04:56,484 [INFO] [test] episode 73: step 986, reward 240.0, total_step 63353
2019-02-17 12:05:24,534 [INFO] [test] episode 74: step 816, reward 180.0, total_step 64169
2019-02-17 12:05:50,879 [INFO] [test] episode 75: step 795, reward 135.0, total_step 64964
2019-02-17 12:06:17,899 [INFO] [test] episode 76: step 806, reward 180.0, total_step 65770
2019-02-17 12:06:51,199 [INFO] [test] episode 77: step 986, reward 240.0, total_step 66756
2019-02-17 12:07:18,122 [INFO] [test] episode 78: step 811, reward 185.0, total_step 67567
2019-02-17 12:08:07,250 [INFO] [test] episode 79: step 1442, reward 320.0, total_step 69009
2019-02-17 12:08:34,663 [INFO] [test] episode 80: step 815, reward 210.0, total_step 69824
2019-02-17 12:09:03,546 [INFO] [test] episode 81: step 839, reward 210.0, total_step 70663
2019-02-17 12:09:37,180 [INFO] [test] episode 82: step 986, reward 240.0, total_step 7164

2019-02-17 12:46:08,996 [INFO] episode 1131, step 386, reward 110.0, frame 812963
2019-02-17 12:46:30,183 [INFO] fit-count : 203000, epsilon = 0.2692009000098249, memory = 100000, loss = 0.012504416517913342
2019-02-17 12:47:21,272 [INFO] fit-count : 203100, epsilon = 0.26884090000981453, memory = 100000, loss = 0.010142376646399498
2019-02-17 12:47:44,474 [INFO] episode 1132, step 756, reward 220.0, frame 813720
2019-02-17 12:48:10,950 [INFO] fit-count : 203200, epsilon = 0.2684809000098042, memory = 100000, loss = 0.005374473053961992
2019-02-17 12:49:01,471 [INFO] fit-count : 203300, epsilon = 0.26812090000979383, memory = 100000, loss = 0.00018777215154841542
2019-02-17 12:49:46,325 [INFO] episode 1133, step 966, reward 145.0, frame 814687
2019-02-17 12:49:51,936 [INFO] fit-count : 203400, epsilon = 0.2677609000097835, memory = 100000, loss = 0.00011501614062581211
2019-02-17 12:50:42,808 [INFO] fit-count : 203500, epsilon = 0.2674009000097731, memory = 100000, loss = 0.00029042668

2019-02-17 13:26:16,777 [INFO] fit-count : 207700, epsilon = 0.25228090000933834, memory = 100000, loss = 0.0002382822276558727
2019-02-17 13:26:29,432 [INFO] episode 1158, step 625, reward 180.0, frame 832060
2019-02-17 13:27:07,093 [INFO] fit-count : 207800, epsilon = 0.251920900009328, memory = 100000, loss = 0.00502084381878376
2019-02-17 13:27:52,252 [INFO] episode 1159, step 656, reward 75.0, frame 832717
2019-02-17 13:27:57,107 [INFO] fit-count : 207900, epsilon = 0.25156090000931763, memory = 100000, loss = 0.004846454132348299
2019-02-17 13:28:46,285 [INFO] fit-count : 208000, epsilon = 0.2512009000093073, memory = 100000, loss = 0.015560108236968517
2019-02-17 13:29:07,381 [INFO] episode 1160, step 612, reward 135.0, frame 833330
2019-02-17 13:29:36,323 [INFO] fit-count : 208100, epsilon = 0.25084090000929693, memory = 100000, loss = 0.00021887969342060387
2019-02-17 13:30:13,698 [INFO] episode 1161, step 527, reward 70.0, frame 833858
2019-02-17 13:30:26,468 [INFO] fit-count

2019-02-17 14:08:17,291 [INFO] [test] episode 50: step 633, reward 90.0, total_step 43058
2019-02-17 14:08:42,639 [INFO] [test] episode 51: step 722, reward 130.0, total_step 43780
2019-02-17 14:09:41,479 [INFO] [test] episode 52: step 1393, reward 635.0, total_step 45173
2019-02-17 14:10:28,056 [INFO] [test] episode 53: step 911, reward 155.0, total_step 46084
2019-02-17 14:10:59,684 [INFO] [test] episode 54: step 814, reward 290.0, total_step 46898
2019-02-17 14:11:17,634 [INFO] [test] episode 55: step 541, reward 90.0, total_step 47439
2019-02-17 14:11:58,434 [INFO] [test] episode 56: step 1247, reward 320.0, total_step 48686
2019-02-17 14:12:10,482 [INFO] [test] episode 57: step 385, reward 65.0, total_step 49071
2019-02-17 14:12:30,887 [INFO] [test] episode 58: step 641, reward 90.0, total_step 49712
2019-02-17 14:13:17,506 [INFO] [test] episode 59: step 1275, reward 200.0, total_step 50987
2019-02-17 14:13:54,237 [INFO] [test] episode 60: step 1120, reward 145.0, total_step 52107

2019-02-17 14:56:51,518 [INFO] fit-count : 211800, epsilon = 0.23752090000929876, memory = 100000, loss = 0.00019650146714411676
2019-02-17 14:57:59,184 [INFO] fit-count : 211900, epsilon = 0.2371609000092995, memory = 100000, loss = 0.0052046277560293674
2019-02-17 14:59:03,153 [INFO] fit-count : 212000, epsilon = 0.23680090000930026, memory = 100000, loss = 0.0005301081691868603
2019-02-17 14:59:10,049 [INFO] episode 1183, step 935, reward 250.0, frame 849234
2019-02-17 15:00:02,780 [INFO] fit-count : 212100, epsilon = 0.236440900009301, memory = 100000, loss = 0.00010205936268903315
2019-02-17 15:01:20,381 [INFO] fit-count : 212200, epsilon = 0.23608090000930176, memory = 100000, loss = 0.00490886764600873
2019-02-17 15:01:59,008 [INFO] episode 1184, step 980, reward 355.0, frame 850215
2019-02-17 15:02:34,936 [INFO] fit-count : 212300, epsilon = 0.23572090000930251, memory = 100000, loss = 0.0002420795790385455
2019-02-17 15:03:37,655 [INFO] episode 1185, step 563, reward 120.0, fr

2019-02-17 15:41:21,097 [INFO] fit-count : 216600, epsilon = 0.22024090000933477, memory = 100000, loss = 0.0046622660011053085
2019-02-17 15:42:10,909 [INFO] fit-count : 216700, epsilon = 0.21988090000933552, memory = 100000, loss = 0.009748965501785278
2019-02-17 15:43:01,405 [INFO] episode 1208, step 1019, reward 210.0, frame 868400
2019-02-17 15:43:02,847 [INFO] fit-count : 216800, epsilon = 0.21952090000933627, memory = 100000, loss = 0.005359025206416845
2019-02-17 15:44:00,774 [INFO] fit-count : 216900, epsilon = 0.21916090000933702, memory = 100000, loss = 0.00013525273243431002
2019-02-17 15:44:01,333 [INFO] episode 1209, step 413, reward 50.0, frame 868814
2019-02-17 15:44:52,562 [INFO] fit-count : 217000, epsilon = 0.21880090000933777, memory = 100000, loss = 0.005580397322773933
2019-02-17 15:45:06,144 [INFO] episode 1210, step 497, reward 80.0, frame 869312
2019-02-17 15:45:45,757 [INFO] fit-count : 217100, epsilon = 0.21844090000933852, memory = 100000, loss = 0.000410085

2019-02-17 16:27:35,929 [INFO] [test] episode 33: step 748, reward 190.0, total_step 26435
2019-02-17 16:28:24,607 [INFO] [test] episode 34: step 1228, reward 515.0, total_step 27663
2019-02-17 16:28:53,373 [INFO] [test] episode 35: step 726, reward 225.0, total_step 28389
2019-02-17 16:29:23,538 [INFO] [test] episode 36: step 770, reward 320.0, total_step 29159
2019-02-17 16:29:41,807 [INFO] [test] episode 37: step 487, reward 30.0, total_step 29646
2019-02-17 16:30:05,241 [INFO] [test] episode 38: step 650, reward 175.0, total_step 30296
2019-02-17 16:30:21,564 [INFO] [test] episode 39: step 489, reward 45.0, total_step 30785
2019-02-17 16:30:42,029 [INFO] [test] episode 40: step 618, reward 105.0, total_step 31403
2019-02-17 16:31:02,383 [INFO] [test] episode 41: step 625, reward 130.0, total_step 32028
2019-02-17 16:31:26,711 [INFO] [test] episode 42: step 717, reward 185.0, total_step 32745
2019-02-17 16:31:54,884 [INFO] [test] episode 43: step 847, reward 105.0, total_step 33592


2019-02-17 17:11:36,172 [INFO] episode 1230, step 849, reward 130.0, frame 884641
2019-02-17 17:12:04,414 [INFO] fit-count : 220900, epsilon = 0.20476090000936703, memory = 100000, loss = 0.00015100670862011611
2019-02-17 17:13:05,046 [INFO] fit-count : 221000, epsilon = 0.20440090000936778, memory = 100000, loss = 0.006407805252820253
2019-02-17 17:13:15,287 [INFO] episode 1231, step 667, reward 115.0, frame 885309
2019-02-17 17:13:58,536 [INFO] fit-count : 221100, epsilon = 0.20404090000936853, memory = 100000, loss = 0.0046659205108881
2019-02-17 17:14:51,228 [INFO] episode 1232, step 676, reward 250.0, frame 885986
2019-02-17 17:14:57,082 [INFO] fit-count : 221200, epsilon = 0.20368090000936928, memory = 100000, loss = 0.0003241105587221682
2019-02-17 17:15:55,225 [INFO] fit-count : 221300, epsilon = 0.20332090000937003, memory = 100000, loss = 0.005595575086772442
2019-02-17 17:16:45,154 [INFO] episode 1233, step 781, reward 390.0, frame 886768
2019-02-17 17:16:54,490 [INFO] fit-c

2019-02-17 17:54:56,345 [INFO] episode 1257, step 516, reward 70.0, frame 903602
2019-02-17 17:55:03,105 [INFO] fit-count : 225600, epsilon = 0.1878409000094023, memory = 100000, loss = 0.00027121586026623845
2019-02-17 17:55:56,231 [INFO] fit-count : 225700, epsilon = 0.18748090000940304, memory = 100000, loss = 0.005484242923557758
2019-02-17 17:55:58,987 [INFO] episode 1258, step 479, reward 75.0, frame 904082
2019-02-17 17:56:49,140 [INFO] fit-count : 225800, epsilon = 0.1871209000094038, memory = 100000, loss = 0.004717764910310507
2019-02-17 17:57:27,265 [INFO] episode 1259, step 629, reward 215.0, frame 904712
2019-02-17 17:57:46,655 [INFO] fit-count : 225900, epsilon = 0.18676090000940454, memory = 100000, loss = 0.00022927657118998468
2019-02-17 17:58:41,492 [INFO] fit-count : 226000, epsilon = 0.1864009000094053, memory = 100000, loss = 0.000463955570012331
2019-02-17 17:59:34,930 [INFO] fit-count : 226100, epsilon = 0.18604090000940604, memory = 100000, loss = 0.000396694580

2019-02-17 18:38:25,975 [INFO] [test] episode 14: step 975, reward 355.0, total_step 13531
2019-02-17 18:38:42,353 [INFO] [test] episode 15: step 497, reward 80.0, total_step 14028
2019-02-17 18:39:19,720 [INFO] [test] episode 16: step 1109, reward 310.0, total_step 15137
2019-02-17 18:39:48,511 [INFO] [test] episode 17: step 861, reward 215.0, total_step 15998
2019-02-17 18:40:01,455 [INFO] [test] episode 18: step 399, reward 80.0, total_step 16397
2019-02-17 18:40:24,185 [INFO] [test] episode 19: step 647, reward 155.0, total_step 17044


In [None]:
test_agent = DQNAgent(env, input_shape=input_shape, load_path=save_path)
test_episode_rewards, _ = test(env, test_agent, episodes=test_episodes)
print('mean(test_episode_rewards) = {}'.format(
        np.mean(test_episode_rewards)))