# DQNの解説

![image.png](attachment:image.png)

画像引用:  
https://qiita.com/sugulu/items/3c7d6cbe600d455e853b

### DQNの特徴
- Q学習において状態行動テーブルを関数で表したもの.
- 離散的な行動を扱うことができる.

参考:  
http://blog.syundo.org/post/20171208-reinforcement-learning-dqn-and-impl/

### OpenAI gymのインストール

githubのレポジトリを参考に, gymモジュールをインストールしてください.  
https://github.com/openai/gym

In [1]:
import gym
import numpy as np
import renom as rm
import matplotlib.pyplot as plt
from renom.utility.initializer import Gaussian
from renom.cuda import set_cuda_active
from renom_rl.dqn import DQN
from renom_rl.env import BaseEnv
from gym.core import Env
from PIL import Image

set_cuda_active(True)
env = gym.make('BreakoutNoFrameskip-v4')

class CustomEnv(BaseEnv):
    
    def __init__(self, env):
        self.env = env
        self.action_shape = 4
        self.state_shape = (4, 84, 84)
        self.previous_frames = []
        super(CustomEnv, self).__init__()
    
    def reset(self):
        self.env.reset()
        n_step = np.random.randint(4, 32+1)
        for _ in range(n_step):
            state, _, _ = self.step(self.env.action_space.sample())
        return state
    
    def sample(self):
        return self.env.action_space.sample()
    
    def render(self):
        self.env.render()

    def _preprocess(self,state):
        resized_image = Image.fromarray(state).resize((84, 84)).convert('L')
        # image_array = np.asarray(resized_image.getdata()).reshape(110, 84)/255.
        # final_image = image_array[26:110, :]
        final_image = np.asarray(resized_image.getdata()).reshape(84, 84)/255.
        return final_image
    
    def step(self, action):
        state_list = []
        reward_list = []
        terminal = False
        for _ in range(4):
            # Use last frame. Other frames will be skipped.
            s, r, t, _ = self.env.step(action)
            state = self._preprocess(s)
            reward_list.append(r)
            if t:
                terminal = True
                
        if len(self.previous_frames) > 3:
            self.previous_frames = self.previous_frames[1:] + [state]
        else:
            self.previous_frames += [state]
        state = np.stack(self.previous_frames)
        return state, np.sum(reward_list), terminal
    
custom_env = CustomEnv(env)
q_network = rm.Sequential([rm.Conv2d(32, filter=8, stride=4, ignore_bias=True),
                           rm.Relu(),
                           rm.Conv2d(64, filter=4, stride=2, ignore_bias=True),
                           rm.Relu(),
                           rm.Conv2d(64, filter=3, stride=1, ignore_bias=True),
                           rm.Relu(), 
                           rm.Flatten(), 
                           rm.Dense(512, ignore_bias=True),
                           rm.Relu(),
                           rm.Dense(custom_env.action_shape, ignore_bias=True)])

In [2]:
model = DQN(custom_env, q_network)

In [3]:
model.fit(render=True, greedy_step=1000000)

Run random 5000 step for storing experiences


episode 001 avg_loss: 0.005 total_reward [train:1.000 test:-] e-greedy:0.000: : 161it [00:04, 43.29it/s]
episode 002 avg_loss: 0.010 total_reward [train:0.000 test:-] e-greedy:0.000: : 115it [00:02, 44.38it/s]
episode 003 avg_loss: 0.007 total_reward [train:2.000 test:-] e-greedy:0.000: : 208it [00:04, 42.52it/s]
episode 004 avg_loss: 0.006 total_reward [train:0.000 test:-] e-greedy:0.001: : 133it [00:03, 42.42it/s]
episode 005 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.001: : 216it [00:04, 43.73it/s]
episode 006 avg_loss: 0.005 total_reward [train:0.000 test:-] e-greedy:0.001: : 124it [00:02, 42.76it/s]
episode 007 avg_loss: 0.007 total_reward [train:1.000 test:-] e-greedy:0.001: : 154it [00:03, 42.29it/s]
episode 008 avg_loss: 0.005 total_reward [train:2.000 test:-] e-greedy:0.001: : 170it [00:03, 43.19it/s]
episode 009 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.001: : 143it [00:03, 43.61it/s]
episode 010 avg_loss: 0.004 total_reward [train:0.000 t

episode 155 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.022: : 104it [00:02, 41.09it/s]
episode 156 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.022: : 124it [00:02, 43.20it/s]
episode 157 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.022: : 219it [00:05, 43.13it/s]
episode 158 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.022: : 133it [00:03, 42.40it/s]
episode 159 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.022: : 118it [00:02, 41.52it/s]
episode 160 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.023: : 153it [00:03, 43.03it/s]
episode 161 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.023: : 241it [00:05, 43.03it/s]
episode 162 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.023: : 147it [00:03, 43.34it/s]
episode 163 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.023: : 121it [00:02, 44.01it/s]
episode 164 avg_loss: 0.002 total_reward [train:3.000 t

episode 233 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.033: : 237it [00:05, 43.95it/s]
episode 234 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.033: : 131it [00:03, 42.93it/s]
episode 235 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.033: : 232it [00:05, 43.76it/s]
episode 236 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.034: : 214it [00:04, 43.68it/s]
episode 237 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.034: : 180it [00:04, 44.22it/s]
episode 238 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.034: : 121it [00:02, 42.37it/s]
episode 239 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.034: : 130it [00:02, 44.51it/s]
episode 240 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.034: : 148it [00:03, 43.78it/s]
episode 241 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.034: : 226it [00:05, 43.99it/s]
episode 242 avg_loss: 0.001 total_reward [train:0.000 t

episode 388 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.055: : 168it [00:03, 43.80it/s]
episode 389 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.055: : 162it [00:03, 43.23it/s]
episode 390 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.055: : 125it [00:02, 44.59it/s]
episode 391 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.055: : 103it [00:02, 43.86it/s]
episode 392 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.055: : 177it [00:04, 43.36it/s]
episode 393 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.056: : 201it [00:04, 44.08it/s]
episode 394 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.056: : 132it [00:03, 43.66it/s]
episode 395 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.056: : 172it [00:03, 43.51it/s]
episode 396 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.056: : 161it [00:03, 43.62it/s]
episode 397 avg_loss: 0.002 total_reward [train:2.000 t

episode 543 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.077: : 149it [00:03, 42.51it/s]
episode 544 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.077: : 126it [00:02, 42.47it/s]
episode 545 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.077: : 118it [00:02, 43.22it/s]
episode 546 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.077: : 216it [00:05, 43.17it/s]
episode 547 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.077: : 248it [00:05, 43.52it/s]
episode 548 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.077: : 111it [00:02, 42.25it/s]
episode 549 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.077: : 112it [00:02, 43.03it/s]
episode 550 avg_loss: 0.001 total_reward [train:1.000 test:0.000] e-greedy:0.078: : 147it [00:04, 32.71it/s]
episode 551 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.078: : 119it [00:02, 42.95it/s]
episode 552 avg_loss: 0.001 total_reward [train:1.0

episode 698 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.099: : 222it [00:05, 43.20it/s]
episode 699 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.099: : 107it [00:02, 42.91it/s]
episode 700 avg_loss: 0.002 total_reward [train:3.000 test:7.000] e-greedy:0.099: : 205it [00:07, 44.34it/s]
episode 701 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.100: : 192it [00:04, 42.78it/s]
episode 702 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.100: : 184it [00:04, 43.00it/s]
episode 703 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.100: : 224it [00:05, 43.48it/s]
episode 704 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.100: : 142it [00:03, 42.89it/s]
episode 705 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.100: : 136it [00:03, 42.71it/s]
episode 706 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.101: : 242it [00:05, 43.49it/s]
episode 707 avg_loss: 0.002 total_reward [train:1.0

episode 852 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.122: : 107it [00:02, 41.74it/s]
episode 853 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.122: : 114it [00:02, 42.54it/s]
episode 854 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.122: : 159it [00:03, 43.62it/s]
episode 855 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.122: : 191it [00:04, 43.77it/s]
episode 856 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.122: : 159it [00:03, 43.00it/s]
episode 857 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.123: : 144it [00:03, 42.38it/s]
episode 858 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.123: : 179it [00:04, 43.44it/s]
episode 859 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.123: : 129it [00:02, 43.39it/s]
episode 860 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.123: : 258it [00:05, 43.42it/s]
episode 861 avg_loss: 0.001 total_reward [train:1.000 t

episode 1006 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.145: : 132it [00:03, 43.57it/s]
episode 1007 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.145: : 168it [00:03, 44.24it/s]
episode 1008 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.145: : 108it [00:02, 43.45it/s]
episode 1009 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.145: : 130it [00:02, 45.83it/s]
episode 1010 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.145: : 124it [00:02, 43.87it/s]
episode 1011 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.145: : 119it [00:02, 43.56it/s]
episode 1012 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.145: : 236it [00:05, 44.73it/s]
episode 1013 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.146: : 167it [00:03, 44.17it/s]
episode 1014 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.146: : 158it [00:03, 44.80it/s]
episode 1015 avg_loss: 0.001 total_reward [tra

episode 1160 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.166: : 107it [00:02, 43.92it/s]
episode 1161 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.167: : 253it [00:05, 43.37it/s]
episode 1162 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.167: : 231it [00:05, 43.48it/s]
episode 1163 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.167: : 109it [00:02, 43.27it/s]
episode 1164 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.167: : 150it [00:03, 44.64it/s]
episode 1165 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.167: : 163it [00:03, 43.38it/s]
episode 1166 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.167: : 153it [00:03, 42.31it/s]
episode 1167 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.167: : 202it [00:04, 43.39it/s]
episode 1168 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.168: : 128it [00:02, 43.70it/s]
episode 1169 avg_loss: 0.001 total_reward [tra

episode 1314 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.188: : 216it [00:04, 43.45it/s]
episode 1315 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.189: : 111it [00:02, 43.70it/s]
episode 1316 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.189: : 232it [00:05, 43.49it/s]
episode 1317 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.189: : 147it [00:03, 43.62it/s]
episode 1318 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.189: : 141it [00:03, 43.97it/s]
episode 1319 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.189: : 188it [00:04, 43.55it/s]
episode 1320 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.189: : 222it [00:05, 44.34it/s]
episode 1321 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.190: : 174it [00:04, 43.48it/s]
episode 1322 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.190: : 117it [00:02, 42.37it/s]
episode 1323 avg_loss: 0.001 total_reward [tra

episode 1468 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.211: : 187it [00:04, 43.76it/s]
episode 1469 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.212: : 144it [00:03, 44.09it/s]
episode 1470 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.212: : 114it [00:02, 42.82it/s]
episode 1471 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.212: : 244it [00:05, 44.46it/s]
episode 1472 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.212: : 208it [00:04, 43.71it/s]
episode 1473 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.212: : 204it [00:04, 43.56it/s]
episode 1474 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.212: : 103it [00:02, 42.49it/s]
episode 1475 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.213: : 244it [00:05, 43.63it/s]
episode 1476 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.213: : 130it [00:02, 44.28it/s]
episode 1477 avg_loss: 0.001 total_reward [tra

episode 1622 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.236: : 166it [00:03, 44.07it/s]
episode 1623 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.236: : 308it [00:06, 44.25it/s]
episode 1624 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.236: : 197it [00:04, 44.19it/s]
episode 1625 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.237: : 224it [00:05, 43.69it/s]
episode 1626 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.237: : 260it [00:06, 44.55it/s]
episode 1627 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.237: : 268it [00:06, 43.52it/s]
episode 1628 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.237: : 258it [00:05, 43.52it/s]
episode 1629 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.238: : 207it [00:04, 43.50it/s]
episode 1630 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.238: : 246it [00:05, 43.66it/s]
episode 1631 avg_loss: 0.001 total_reward [tra

episode 1776 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.264: : 157it [00:03, 44.52it/s]
episode 1777 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.264: : 183it [00:04, 44.47it/s]
episode 1778 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.264: : 144it [00:03, 44.16it/s]
episode 1779 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.264: : 96it [00:02, 43.50it/s]
episode 1780 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.265: : 203it [00:04, 45.29it/s]
episode 1781 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.265: : 226it [00:04, 45.67it/s]
episode 1782 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.265: : 225it [00:04, 46.17it/s]
episode 1783 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.265: : 322it [00:07, 45.02it/s]
episode 1784 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.266: : 316it [00:06, 45.18it/s]
episode 1785 avg_loss: 0.001 total_reward [trai

episode 1930 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.294: : 142it [00:03, 44.65it/s]
episode 1931 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.295: : 314it [00:07, 44.79it/s]
episode 1932 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.295: : 195it [00:04, 46.31it/s]
episode 1933 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.295: : 187it [00:04, 45.06it/s]
episode 1934 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.295: : 200it [00:04, 45.98it/s]
episode 1935 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.295: : 229it [00:05, 44.38it/s]
episode 1936 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.296: : 320it [00:07, 46.24it/s]
episode 1937 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.296: : 188it [00:04, 44.44it/s]
episode 1938 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.296: : 150it [00:03, 46.36it/s]
episode 1939 avg_loss: 0.001 total_reward [tra

episode 2084 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.326: : 297it [00:06, 43.14it/s]
episode 2085 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.326: : 219it [00:05, 43.11it/s]
episode 2086 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.327: : 307it [00:07, 43.14it/s]
episode 2087 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.327: : 193it [00:04, 43.69it/s]
episode 2088 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.327: : 253it [00:05, 44.56it/s]
episode 2089 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.327: : 223it [00:05, 44.33it/s]
episode 2090 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.328: : 343it [00:07, 44.54it/s]
episode 2091 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.328: : 304it [00:07, 43.36it/s]
episode 2092 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.328: : 331it [00:07, 42.77it/s]
episode 2093 avg_loss: 0.001 total_reward [tra

episode 2238 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.362: : 409it [00:09, 44.09it/s]
episode 2239 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.362: : 204it [00:04, 44.59it/s]
episode 2240 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.362: : 269it [00:06, 44.57it/s]
episode 2241 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.362: : 164it [00:03, 44.46it/s]
episode 2242 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.363: : 278it [00:06, 45.35it/s]
episode 2243 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.363: : 388it [00:08, 45.41it/s]
episode 2244 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.363: : 175it [00:03, 45.17it/s]
episode 2245 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.363: : 159it [00:03, 45.30it/s]
episode 2246 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.364: : 350it [00:07, 45.32it/s]
episode 2247 avg_loss: 0.002 total_reward [tra

episode 2392 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.399: : 310it [00:07, 45.15it/s]
episode 2393 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.399: : 233it [00:05, 44.69it/s]
episode 2394 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.399: : 251it [00:05, 44.42it/s]
episode 2395 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.400: : 190it [00:04, 43.72it/s]
episode 2396 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.400: : 198it [00:04, 43.83it/s]
episode 2397 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.400: : 421it [00:09, 43.99it/s]
episode 2398 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.400: : 219it [00:05, 43.56it/s]
episode 2399 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.401: : 381it [00:08, 44.91it/s]
episode 2400 avg_loss: 0.001 total_reward [train:4.000 test:11.000] e-greedy:0.401: : 305it [00:10, 46.20it/s]
episode 2401 avg_loss: 0.002 total_reward

episode 2546 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.440: : 301it [00:06, 43.73it/s]
episode 2547 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.440: : 239it [00:05, 42.92it/s]
episode 2548 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.441: : 336it [00:07, 43.38it/s]
episode 2549 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.441: : 288it [00:06, 44.08it/s]
episode 2550 avg_loss: 0.002 total_reward [train:7.000 test:12.000] e-greedy:0.441: : 415it [00:15, 42.06it/s]
episode 2551 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.442: : 157it [00:04, 38.83it/s]
episode 2552 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.442: : 314it [00:07, 41.20it/s]
episode 2553 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.442: : 527it [00:12, 42.67it/s]
episode 2554 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.443: : 443it [00:10, 41.46it/s]
episode 2555 avg_loss: 0.002 total_reward

episode 2700 avg_loss: 0.001 total_reward [train:5.000 test:7.000] e-greedy:0.485: : 298it [00:08, 33.91it/s]
episode 2701 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.485: : 259it [00:05, 43.51it/s]
episode 2702 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.485: : 463it [00:10, 43.27it/s]
episode 2703 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.486: : 340it [00:08, 45.27it/s]
episode 2704 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.486: : 419it [00:09, 45.66it/s]
episode 2705 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.486: : 350it [00:07, 46.00it/s]
episode 2706 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.487: : 307it [00:06, 44.94it/s]
episode 2707 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.487: : 325it [00:07, 46.33it/s]
episode 2708 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.487: : 367it [00:08, 45.75it/s]
episode 2709 avg_loss: 0.001 total_reward 

episode 2854 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.533: : 390it [00:09, 41.19it/s]
episode 2855 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.533: : 406it [00:09, 43.38it/s]
episode 2856 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.534: : 427it [00:09, 43.13it/s]
episode 2857 avg_loss: 0.001 total_reward [train:14.000 test:-] e-greedy:0.534: : 429it [00:09, 43.33it/s]
episode 2858 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.534: : 365it [00:08, 43.24it/s]
episode 2859 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.535: : 439it [00:09, 44.00it/s]
episode 2860 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.535: : 422it [00:09, 43.69it/s]
episode 2861 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.536: : 410it [00:09, 43.80it/s]
episode 2862 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.536: : 352it [00:08, 43.85it/s]
episode 2863 avg_loss: 0.002 total_reward 

episode 2931 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.560: : 344it [00:07, 44.00it/s]
episode 2932 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.560: : 321it [00:07, 43.41it/s]
episode 2933 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.560: : 404it [00:09, 43.30it/s]
episode 2934 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.561: : 489it [00:11, 43.60it/s]
episode 2935 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.561: : 318it [00:07, 43.38it/s]
episode 2936 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.561: : 458it [00:10, 44.11it/s]
episode 2937 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.562: : 309it [00:07, 43.48it/s]
episode 2938 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.562: : 343it [00:07, 43.35it/s]
episode 2939 avg_loss: 0.002 total_reward [train:17.000 test:-] e-greedy:0.562: : 531it [00:12, 35.07it/s]
episode 2940 avg_loss: 0.002 total_reward 

episode 3085 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.613: : 567it [00:13, 42.88it/s]
episode 3086 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.614: : 348it [00:08, 37.76it/s]
episode 3087 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.614: : 482it [00:11, 42.20it/s]
episode 3088 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.614: : 530it [00:11, 43.40it/s]
episode 3089 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.615: : 336it [00:07, 44.51it/s]
episode 3090 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.615: : 279it [00:06, 44.40it/s]
episode 3091 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.615: : 400it [00:09, 42.53it/s]
episode 3092 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.616: : 311it [00:07, 42.62it/s]
episode 3093 avg_loss: 0.002 total_reward [train:15.000 test:-] e-greedy:0.616: : 569it [00:13, 42.03it/s]
episode 3094 avg_loss: 0.002 total_reward

episode 3237 avg_loss: 0.002 total_reward [train:17.000 test:-] e-greedy:0.671: : 700it [00:16, 42.90it/s]
episode 3238 avg_loss: 0.002 total_reward [train:15.000 test:-] e-greedy:0.671: : 432it [00:10, 42.92it/s]
episode 3239 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.671: : 441it [00:10, 43.31it/s]
episode 3240 avg_loss: 0.002 total_reward [train:15.000 test:-] e-greedy:0.672: : 398it [00:09, 42.54it/s]
episode 3241 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.672: : 451it [00:10, 43.33it/s]
episode 3242 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.673: : 365it [00:08, 43.61it/s]
episode 3243 avg_loss: 0.002 total_reward [train:16.000 test:-] e-greedy:0.673: : 432it [00:10, 42.99it/s]
episode 3244 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.673: : 605it [00:13, 43.74it/s]
episode 3245 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.674: : 452it [00:10, 42.89it/s]
episode 3246 avg_loss: 0.002 total_rewar

episode 3389 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.737: : 308it [00:07, 40.86it/s]
episode 3390 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.737: : 469it [00:11, 41.68it/s]
episode 3391 avg_loss: 0.002 total_reward [train:16.000 test:-] e-greedy:0.738: : 569it [00:13, 41.52it/s]
episode 3392 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.738: : 457it [00:10, 43.48it/s]
episode 3393 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.739: : 704it [00:16, 43.31it/s]
episode 3394 avg_loss: 0.003 total_reward [train:13.000 test:-] e-greedy:0.739: : 468it [00:10, 42.86it/s]
episode 3395 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.739: : 313it [00:07, 42.97it/s]
episode 3396 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.740: : 371it [00:08, 42.27it/s]
episode 3397 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.740: : 370it [00:08, 42.70it/s]
episode 3398 avg_loss: 0.003 total_reward

episode 3541 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.799: : 251it [00:05, 42.32it/s]
episode 3542 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.799: : 370it [00:08, 43.37it/s]
episode 3543 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.800: : 540it [00:12, 43.71it/s]
episode 3544 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.800: : 493it [00:11, 43.35it/s]
episode 3545 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.800: : 335it [00:07, 41.98it/s]
episode 3546 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.801: : 452it [00:10, 42.66it/s]
episode 3547 avg_loss: 0.003 total_reward [train:20.000 test:-] e-greedy:0.801: : 572it [00:13, 42.90it/s]
episode 3548 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.802: : 445it [00:10, 42.87it/s]
episode 3549 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.802: : 470it [00:10, 42.14it/s]
episode 3550 avg_loss: 0.003 total_reward

episode 3693 avg_loss: 0.004 total_reward [train:16.000 test:-] e-greedy:0.862: : 597it [00:13, 44.59it/s]
episode 3694 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.863: : 525it [00:11, 44.82it/s]
episode 3695 avg_loss: 0.003 total_reward [train:24.000 test:-] e-greedy:0.864: : 804it [00:18, 44.66it/s]
episode 3696 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.864: : 511it [00:11, 44.58it/s]
episode 3697 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.864: : 404it [00:09, 44.36it/s]
episode 3698 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.865: : 569it [00:12, 44.54it/s]
episode 3699 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.865: : 458it [00:10, 44.61it/s]
episode 3700 avg_loss: 0.004 total_reward [train:10.000 test:16.000] e-greedy:0.866: : 510it [00:14, 45.19it/s]
episode 3701 avg_loss: 0.004 total_reward [train:16.000 test:-] e-greedy:0.866: : 678it [00:15, 43.91it/s]
episode 3702 avg_loss: 0.003 tota

episode 3845 avg_loss: 0.004 total_reward [train:18.000 test:-] e-greedy:0.900: : 475it [00:10, 44.11it/s]
episode 3846 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.900: : 233it [00:05, 43.41it/s]
episode 3847 avg_loss: 0.003 total_reward [train:15.000 test:-] e-greedy:0.900: : 397it [00:09, 43.57it/s]
episode 3848 avg_loss: 0.004 total_reward [train:17.000 test:-] e-greedy:0.900: : 475it [00:10, 44.46it/s]
episode 3849 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 384it [00:08, 44.10it/s]
episode 3850 avg_loss: 0.003 total_reward [train:10.000 test:9.000] e-greedy:0.900: : 492it [00:13, 36.14it/s]
episode 3851 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 471it [00:10, 43.34it/s]
episode 3852 avg_loss: 0.004 total_reward [train:23.000 test:-] e-greedy:0.900: : 601it [00:13, 43.43it/s]
episode 3853 avg_loss: 0.004 total_reward [train:14.000 test:-] e-greedy:0.900: : 457it [00:10, 43.49it/s]
episode 3854 avg_loss: 0.003 total_

episode 3997 avg_loss: 0.003 total_reward [train:20.000 test:-] e-greedy:0.900: : 637it [00:14, 43.96it/s]
episode 3998 avg_loss: 0.003 total_reward [train:16.000 test:-] e-greedy:0.900: : 512it [00:11, 44.00it/s]
episode 3999 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 421it [00:09, 43.77it/s]
episode 4000 avg_loss: 0.003 total_reward [train:10.000 test:18.000] e-greedy:0.900: : 492it [00:13, 35.26it/s]
episode 4001 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 711it [00:17, 41.48it/s]
episode 4002 avg_loss: 0.003 total_reward [train:13.000 test:-] e-greedy:0.900: : 467it [00:11, 41.38it/s]
episode 4003 avg_loss: 0.003 total_reward [train:17.000 test:-] e-greedy:0.900: : 649it [00:15, 42.04it/s]
episode 4004 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 447it [00:10, 41.55it/s]
episode 4005 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 379it [00:09, 41.67it/s]
episode 4006 avg_loss: 0.004 total

episode 4149 avg_loss: 0.003 total_reward [train:24.000 test:-] e-greedy:0.900: : 461it [00:11, 41.65it/s]
episode 4150 avg_loss: 0.003 total_reward [train:13.000 test:6.000] e-greedy:0.900: : 702it [00:18, 38.35it/s]
episode 4151 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 395it [00:09, 42.97it/s]
episode 4152 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.900: : 606it [00:14, 42.04it/s]
episode 4153 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 381it [00:09, 41.85it/s]
episode 4154 avg_loss: 0.003 total_reward [train:17.000 test:-] e-greedy:0.900: : 577it [00:13, 42.17it/s]
episode 4155 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 426it [00:10, 42.48it/s]
episode 4156 avg_loss: 0.004 total_reward [train:16.000 test:-] e-greedy:0.900: : 578it [00:13, 42.11it/s]
episode 4157 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 553it [00:13, 42.06it/s]
episode 4158 avg_loss: 0.003 total

episode 4301 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 456it [00:11, 40.75it/s]
episode 4302 avg_loss: 0.004 total_reward [train:22.000 test:-] e-greedy:0.900: : 875it [00:20, 41.51it/s]
episode 4303 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 475it [00:11, 41.58it/s]
episode 4304 avg_loss: 0.003 total_reward [train:20.000 test:-] e-greedy:0.900: : 642it [00:15, 41.53it/s]
episode 4305 avg_loss: 0.004 total_reward [train:20.000 test:-] e-greedy:0.900: : 571it [00:13, 41.65it/s]
episode 4306 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 429it [00:10, 41.61it/s]
episode 4307 avg_loss: 0.004 total_reward [train:16.000 test:-] e-greedy:0.900: : 528it [00:12, 41.75it/s]
episode 4308 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 593it [00:14, 41.58it/s]
episode 4309 avg_loss: 0.003 total_reward [train:19.000 test:-] e-greedy:0.900: : 557it [00:13, 41.50it/s]
episode 4310 avg_loss: 0.003 total_rewa

episode 4453 avg_loss: 0.003 total_reward [train:15.000 test:-] e-greedy:0.900: : 516it [00:12, 41.07it/s]
episode 4454 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 403it [00:09, 41.49it/s]
episode 4455 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 336it [00:08, 41.37it/s]
episode 4456 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 412it [00:09, 41.70it/s]
episode 4457 avg_loss: 0.004 total_reward [train:15.000 test:-] e-greedy:0.900: : 435it [00:10, 42.45it/s]
episode 4458 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 426it [00:10, 41.68it/s]
episode 4459 avg_loss: 0.003 total_reward [train:13.000 test:-] e-greedy:0.900: : 481it [00:11, 41.59it/s]
episode 4460 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 492it [00:11, 41.81it/s]
episode 4461 avg_loss: 0.003 total_reward [train:17.000 test:-] e-greedy:0.900: : 483it [00:11, 41.72it/s]
episode 4462 avg_loss: 0.003 total_reward 

episode 4605 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 327it [00:16, 20.36it/s]
episode 4606 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 407it [00:13, 30.00it/s]
episode 4607 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 451it [00:15, 29.79it/s]
episode 4608 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 430it [00:12, 42.36it/s]
episode 4609 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 461it [00:15, 20.10it/s]
episode 4610 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 233it [00:07, 30.63it/s]
episode 4611 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 548it [00:18, 29.93it/s]
episode 4612 avg_loss: 0.004 total_reward [train:16.000 test:-] e-greedy:0.900: : 669it [00:22, 27.68it/s]
episode 4613 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 590it [00:16, 35.88it/s]
episode 4614 avg_loss: 0.004 total_reward [t

episode 4758 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 445it [00:10, 41.99it/s]
episode 4759 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 370it [00:08, 42.32it/s]
episode 4760 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 505it [00:12, 41.57it/s]
episode 4761 avg_loss: 0.004 total_reward [train:19.000 test:-] e-greedy:0.900: : 697it [00:16, 41.92it/s]
episode 4762 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 525it [00:12, 42.10it/s]
episode 4763 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 405it [00:09, 41.92it/s]
episode 4764 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 486it [00:11, 41.91it/s]
episode 4765 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.900: : 262it [00:06, 41.66it/s]
episode 4766 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 409it [00:09, 41.73it/s]
episode 4767 avg_loss: 0.004 total_reward [t

episode 4910 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 437it [00:10, 41.59it/s]
episode 4911 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 360it [00:08, 41.49it/s]
episode 4912 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 494it [00:11, 41.57it/s]
episode 4913 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 475it [00:11, 41.00it/s]
episode 4914 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 473it [00:11, 41.88it/s]
episode 4915 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 502it [00:11, 41.88it/s]
episode 4916 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 454it [00:10, 41.92it/s]
episode 4917 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 437it [00:10, 41.47it/s]
episode 4918 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 446it [00:10, 41.68it/s]
episode 4919 avg_loss: 0.005 total_reward

episode 5062 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 276it [00:06, 42.61it/s]
episode 5063 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.900: : 586it [00:13, 42.48it/s]
episode 5064 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:08, 42.58it/s]
episode 5065 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 285it [00:06, 42.89it/s]
episode 5066 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 586it [00:13, 42.89it/s]
episode 5067 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.900: : 584it [00:13, 42.77it/s]
episode 5068 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 470it [00:11, 42.73it/s]
episode 5069 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 504it [00:11, 43.09it/s]
episode 5070 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 453it [00:10, 42.69it/s]
episode 5071 avg_loss: 0.004 total_rewar

episode 5214 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 360it [00:08, 43.38it/s]
episode 5215 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 531it [00:12, 42.28it/s]
episode 5216 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 387it [00:09, 42.23it/s]
episode 5217 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 479it [00:11, 42.41it/s]
episode 5218 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 418it [00:09, 42.62it/s]
episode 5219 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.900: : 737it [00:17, 42.74it/s]
episode 5220 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.900: : 400it [00:09, 41.63it/s]
episode 5221 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 390it [00:09, 43.77it/s]
episode 5222 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.900: : 494it [00:11, 42.73it/s]
episode 5223 avg_loss: 0.003 total_reward

episode 5366 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 482it [00:11, 42.56it/s]
episode 5367 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 403it [00:09, 42.84it/s]
episode 5368 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:09, 42.56it/s]
episode 5369 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 516it [00:12, 42.50it/s]
episode 5370 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 384it [00:09, 42.27it/s]
episode 5371 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 455it [00:10, 42.98it/s]
episode 5372 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 522it [00:12, 42.75it/s]
episode 5373 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 467it [00:11, 42.39it/s]
episode 5374 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 433it [00:10, 42.34it/s]
episode 5375 avg_loss: 0.003 total_reward 

episode 5518 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 462it [00:10, 42.36it/s]
episode 5519 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 678it [00:15, 42.45it/s]
episode 5520 avg_loss: 0.003 total_reward [train:14.000 test:-] e-greedy:0.900: : 502it [00:11, 42.28it/s]
episode 5521 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 430it [00:10, 42.58it/s]
episode 5522 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 401it [00:09, 42.35it/s]
episode 5523 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 391it [00:09, 42.42it/s]
episode 5524 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 414it [00:09, 42.23it/s]
episode 5525 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 477it [00:11, 42.38it/s]
episode 5526 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 316it [00:07, 42.22it/s]
episode 5527 avg_loss: 0.003 total_reward

episode 5670 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.900: : 562it [00:13, 42.87it/s]
episode 5671 avg_loss: 0.004 total_reward [train:15.000 test:-] e-greedy:0.900: : 565it [00:13, 42.85it/s]
episode 5672 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 436it [00:10, 42.66it/s]
episode 5673 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 423it [00:09, 42.48it/s]
episode 5674 avg_loss: 0.003 total_reward [train:12.000 test:-] e-greedy:0.900: : 515it [00:12, 42.68it/s]
episode 5675 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 418it [00:09, 42.61it/s]
episode 5676 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 476it [00:11, 42.97it/s]
episode 5677 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 402it [00:09, 42.72it/s]
episode 5678 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 382it [00:08, 42.61it/s]
episode 5679 avg_loss: 0.004 total_rewar

episode 5822 avg_loss: 0.004 total_reward [train:14.000 test:-] e-greedy:0.900: : 594it [00:13, 42.85it/s]
episode 5823 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 463it [00:10, 42.78it/s]
episode 5824 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 434it [00:10, 42.49it/s]
episode 5825 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 501it [00:11, 42.42it/s]
episode 5826 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 476it [00:11, 42.45it/s]
episode 5827 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 395it [00:09, 43.04it/s]
episode 5828 avg_loss: 0.004 total_reward [train:14.000 test:-] e-greedy:0.900: : 486it [00:11, 42.76it/s]
episode 5829 avg_loss: 0.004 total_reward [train:14.000 test:-] e-greedy:0.900: : 400it [00:09, 43.08it/s]
episode 5830 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 350it [00:08, 42.89it/s]
episode 5831 avg_loss: 0.004 total_rewar

episode 5974 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 521it [00:12, 42.44it/s]
episode 5975 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 345it [00:08, 42.99it/s]
episode 5976 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 376it [00:08, 42.37it/s]
episode 5977 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 561it [00:13, 42.69it/s]
episode 5978 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 421it [00:09, 42.80it/s]
episode 5979 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 445it [00:10, 43.22it/s]
episode 5980 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 549it [00:12, 42.33it/s]
episode 5981 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 502it [00:11, 42.45it/s]
episode 5982 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 481it [00:11, 42.32it/s]
episode 5983 avg_loss: 0.004 total_reward

episode 6126 avg_loss: 0.005 total_reward [train:12.000 test:-] e-greedy:0.900: : 636it [00:14, 42.56it/s]
episode 6127 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 408it [00:09, 42.41it/s]
episode 6128 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 368it [00:08, 42.46it/s]
episode 6129 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:09, 42.41it/s]
episode 6130 avg_loss: 0.005 total_reward [train:14.000 test:-] e-greedy:0.900: : 496it [00:11, 42.60it/s]
episode 6131 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 524it [00:12, 42.74it/s]
episode 6132 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 451it [00:10, 42.88it/s]
episode 6133 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 444it [00:10, 42.47it/s]
episode 6134 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 594it [00:13, 42.48it/s]
episode 6135 avg_loss: 0.004 total_reward

episode 6278 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 332it [00:08, 41.28it/s]
episode 6279 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 559it [00:13, 42.68it/s]
episode 6280 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 505it [00:11, 42.93it/s]
episode 6281 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 435it [00:10, 43.22it/s]
episode 6282 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 436it [00:10, 42.24it/s]
episode 6283 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 475it [00:11, 42.45it/s]
episode 6284 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 411it [00:09, 42.55it/s]
episode 6285 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 381it [00:08, 42.39it/s]
episode 6286 avg_loss: 0.005 total_reward [train:16.000 test:-] e-greedy:0.900: : 680it [00:15, 43.21it/s]
episode 6287 avg_loss: 0.005 total_rewa

episode 6430 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 359it [00:08, 42.39it/s]
episode 6431 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 344it [00:08, 42.78it/s]
episode 6432 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:09, 42.82it/s]
episode 6433 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 477it [00:11, 42.65it/s]
episode 6434 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 538it [00:12, 42.55it/s]
episode 6435 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 553it [00:12, 42.78it/s]
episode 6436 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 442it [00:10, 42.58it/s]
episode 6437 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 439it [00:10, 42.71it/s]
episode 6438 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 426it [00:10, 42.50it/s]
episode 6439 avg_loss: 0.004 total_reward 

episode 6582 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 460it [00:10, 42.90it/s]
episode 6583 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 427it [00:10, 42.38it/s]
episode 6584 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:08, 42.80it/s]
episode 6585 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 405it [00:09, 43.87it/s]
episode 6586 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 610it [00:14, 43.26it/s]
episode 6587 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 491it [00:11, 42.59it/s]
episode 6588 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:08, 42.58it/s]
episode 6589 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 327it [00:07, 42.55it/s]
episode 6590 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 472it [00:11, 42.90it/s]
episode 6591 avg_loss: 0.004 total_rewar

episode 6734 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 445it [00:10, 42.20it/s]
episode 6735 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 438it [00:10, 42.42it/s]
episode 6736 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 346it [00:08, 42.33it/s]
episode 6737 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 572it [00:13, 42.54it/s]
episode 6738 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 311it [00:07, 41.99it/s]
episode 6739 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 287it [00:06, 42.31it/s]
episode 6740 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 369it [00:08, 42.68it/s]
episode 6741 avg_loss: 0.004 total_reward [train:15.000 test:-] e-greedy:0.900: : 546it [00:12, 42.41it/s]
episode 6742 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 434it [00:10, 42.62it/s]
episode 6743 avg_loss: 0.004 total_reward [

episode 6886 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 421it [00:09, 42.45it/s]
episode 6887 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 332it [00:07, 42.39it/s]
episode 6888 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 399it [00:09, 42.71it/s]
episode 6889 avg_loss: 0.004 total_reward [train:15.000 test:-] e-greedy:0.900: : 542it [00:12, 42.56it/s]
episode 6890 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 298it [00:07, 42.22it/s]
episode 6891 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 370it [00:08, 41.93it/s]
episode 6892 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 311it [00:07, 42.64it/s]
episode 6893 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 485it [00:11, 43.54it/s]
episode 6894 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 452it [00:10, 42.60it/s]
episode 6895 avg_loss: 0.004 total_reward [

episode 7038 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:08, 42.48it/s]
episode 7039 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 406it [00:09, 42.90it/s]
episode 7040 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 420it [00:09, 43.56it/s]
episode 7041 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 368it [00:08, 42.87it/s]
episode 7042 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 384it [00:08, 43.06it/s]
episode 7043 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 505it [00:11, 43.66it/s]
episode 7044 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 423it [00:09, 42.75it/s]
episode 7045 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 387it [00:09, 42.65it/s]
episode 7046 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 410it [00:09, 43.28it/s]
episode 7047 avg_loss: 0.004 total_reward [tra

episode 7191 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 421it [00:09, 42.68it/s]
episode 7192 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 369it [00:08, 42.14it/s]
episode 7193 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 263it [00:06, 42.15it/s]
episode 7194 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 437it [00:10, 42.40it/s]
episode 7195 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 443it [00:10, 42.22it/s]
episode 7196 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 442it [00:10, 42.45it/s]
episode 7197 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 590it [00:13, 42.73it/s]
episode 7198 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 481it [00:11, 42.32it/s]
episode 7199 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 470it [00:11, 43.03it/s]
episode 7200 avg_loss: 0.005 total_reward 

episode 7343 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 413it [00:09, 42.42it/s]
episode 7344 avg_loss: 0.005 total_reward [train:14.000 test:-] e-greedy:0.900: : 549it [00:12, 38.13it/s]
episode 7345 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 392it [00:09, 42.17it/s]
episode 7346 avg_loss: 0.005 total_reward [train:12.000 test:-] e-greedy:0.900: : 471it [00:11, 42.62it/s]
episode 7347 avg_loss: 0.005 total_reward [train:14.000 test:-] e-greedy:0.900: : 524it [00:12, 42.53it/s]
episode 7348 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 543it [00:12, 42.70it/s]
episode 7349 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:08, 42.31it/s]
episode 7350 avg_loss: 0.005 total_reward [train:10.000 test:6.000] e-greedy:0.900: : 481it [00:13, 36.75it/s]
episode 7351 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 380it [00:08, 43.26it/s]
episode 7352 avg_loss: 0.005 total_re

episode 7497 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 310it [00:07, 40.44it/s]
episode 7498 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 372it [00:08, 42.48it/s]
episode 7499 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 489it [00:11, 42.78it/s]
episode 7500 avg_loss: 0.004 total_reward [train:6.000 test:5.000] e-greedy:0.900: : 346it [00:09, 35.65it/s]
episode 7501 avg_loss: 0.005 total_reward [train:13.000 test:-] e-greedy:0.900: : 517it [00:12, 42.51it/s]
episode 7502 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 384it [00:09, 42.27it/s]
episode 7503 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 516it [00:12, 42.63it/s]
episode 7504 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 360it [00:08, 43.32it/s]
episode 7505 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 377it [00:08, 42.02it/s]
episode 7506 avg_loss: 0.005 total_rewar

episode 7650 avg_loss: 0.005 total_reward [train:6.000 test:8.000] e-greedy:0.900: : 357it [00:10, 33.37it/s]
episode 7651 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 377it [00:08, 42.21it/s]
episode 7652 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 357it [00:08, 42.56it/s]
episode 7653 avg_loss: 0.005 total_reward [train:17.000 test:-] e-greedy:0.900: : 500it [00:11, 43.12it/s]
episode 7654 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 417it [00:09, 42.14it/s]
episode 7655 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 394it [00:09, 42.60it/s]
episode 7656 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 487it [00:11, 42.51it/s]
episode 7657 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 367it [00:08, 42.47it/s]
episode 7658 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 470it [00:11, 43.03it/s]
episode 7659 avg_loss: 0.005 total_rewar

episode 7802 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 464it [00:11, 42.06it/s]
episode 7803 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 358it [00:08, 41.92it/s]
episode 7804 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:08, 42.56it/s]
episode 7805 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 405it [00:09, 42.51it/s]
episode 7806 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 326it [00:07, 42.39it/s]
episode 7807 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 444it [00:10, 42.49it/s]
episode 7808 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 484it [00:11, 42.69it/s]
episode 7809 avg_loss: 0.005 total_reward [train:14.000 test:-] e-greedy:0.900: : 545it [00:12, 42.79it/s]
episode 7810 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 354it [00:08, 42.63it/s]
episode 7811 avg_loss: 0.005 total_reward [

episode 7956 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 378it [00:08, 42.70it/s]
episode 7957 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 437it [00:10, 42.61it/s]
episode 7958 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 341it [00:08, 42.39it/s]
episode 7959 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 404it [00:09, 42.56it/s]
episode 7960 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.900: : 245it [00:05, 42.53it/s]
episode 7961 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 487it [00:11, 42.47it/s]
episode 7962 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:08, 42.74it/s]
episode 7963 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 639it [00:14, 42.70it/s]
episode 7964 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 323it [00:07, 42.51it/s]
episode 7965 avg_loss: 0.004 total_reward [t

episode 8110 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 463it [00:10, 42.48it/s]
episode 8111 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 348it [00:08, 42.26it/s]
episode 8112 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 326it [00:07, 42.24it/s]
episode 8113 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 357it [00:08, 42.21it/s]
episode 8114 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 293it [00:06, 42.03it/s]
episode 8115 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 334it [00:07, 42.15it/s]
episode 8116 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 336it [00:07, 42.38it/s]
episode 8117 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 423it [00:09, 42.36it/s]
episode 8118 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 371it [00:08, 42.56it/s]
episode 8119 avg_loss: 0.005 total_reward [tr

episode 8264 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:07, 42.59it/s]
episode 8265 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 472it [00:11, 42.43it/s]
episode 8266 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 433it [00:10, 42.55it/s]
episode 8267 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 520it [00:12, 43.04it/s]
episode 8268 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:07, 42.12it/s]
episode 8269 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 288it [00:06, 42.17it/s]
episode 8270 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 378it [00:08, 42.48it/s]
episode 8271 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 481it [00:11, 42.09it/s]
episode 8272 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 328it [00:07, 42.26it/s]
episode 8273 avg_loss: 0.004 total_reward 

episode 8418 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 441it [00:10, 42.71it/s]
episode 8419 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 403it [00:09, 42.63it/s]
episode 8420 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 387it [00:09, 42.27it/s]
episode 8421 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 317it [00:07, 42.33it/s]
episode 8422 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 411it [00:09, 42.28it/s]
episode 8423 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 360it [00:08, 42.76it/s]
episode 8424 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 504it [00:11, 42.58it/s]
episode 8425 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 359it [00:08, 42.44it/s]
episode 8426 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 392it [00:09, 42.38it/s]
episode 8427 avg_loss: 0.004 total_reward [tr

episode 8572 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 455it [00:11, 42.50it/s]
episode 8573 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 395it [00:09, 43.68it/s]
episode 8574 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 250it [00:05, 43.12it/s]
episode 8575 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 327it [00:07, 42.34it/s]
episode 8576 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 450it [00:10, 42.22it/s]
episode 8577 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:08, 40.68it/s]
episode 8578 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 246it [00:05, 42.18it/s]
episode 8579 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 377it [00:08, 43.13it/s]
episode 8580 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.900: : 280it [00:06, 42.88it/s]
episode 8581 avg_loss: 0.004 total_reward [tra

episode 8726 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 412it [00:09, 42.44it/s]
episode 8727 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 480it [00:11, 42.51it/s]
episode 8728 avg_loss: 0.004 total_reward [train:12.000 test:-] e-greedy:0.900: : 522it [00:12, 42.86it/s]
episode 8729 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 356it [00:08, 42.53it/s]
episode 8730 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:07, 42.62it/s]
episode 8731 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 426it [00:10, 42.56it/s]
episode 8732 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 541it [00:12, 42.20it/s]
episode 8733 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:09, 42.03it/s]
episode 8734 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:09, 42.54it/s]
episode 8735 avg_loss: 0.004 total_reward [

episode 8880 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 452it [00:10, 41.88it/s]
episode 8881 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:08, 39.87it/s]
episode 8882 avg_loss: 0.004 total_reward [train:5.000 test:-] e-greedy:0.900: : 286it [00:06, 41.70it/s]
episode 8883 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 406it [00:09, 41.41it/s]
episode 8884 avg_loss: 0.004 total_reward [train:9.000 test:-] e-greedy:0.900: : 458it [00:10, 41.84it/s]
episode 8885 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 182it [00:04, 41.01it/s]
episode 8886 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 455it [00:10, 42.01it/s]
episode 8887 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 472it [00:11, 41.53it/s]
episode 8888 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:09, 42.17it/s]
episode 8889 avg_loss: 0.004 total_reward [tra

episode 8957 avg_loss: 0.004 total_reward [train:15.000 test:-] e-greedy:0.900: : 509it [00:12, 42.15it/s]
episode 8958 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 532it [00:12, 42.36it/s]
episode 8959 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.900: : 388it [00:09, 42.06it/s]
episode 8960 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 330it [00:07, 43.07it/s]
episode 8961 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 304it [00:07, 40.45it/s]
episode 8962 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 360it [00:08, 42.60it/s]
episode 8963 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 302it [00:07, 42.30it/s]
episode 8964 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 376it [00:08, 41.84it/s]
episode 8965 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 388it [00:09, 41.91it/s]
episode 8966 avg_loss: 0.005 total_reward [t

episode 9111 avg_loss: 0.004 total_reward [train:10.000 test:-] e-greedy:0.900: : 463it [00:11, 42.23it/s]
episode 9112 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 546it [00:12, 42.07it/s]
episode 9113 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 363it [00:08, 42.09it/s]
episode 9114 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 398it [00:09, 41.69it/s]
episode 9115 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 302it [00:07, 41.86it/s]
episode 9116 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 348it [00:08, 41.94it/s]
episode 9117 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 454it [00:10, 41.97it/s]
episode 9118 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 474it [00:11, 41.74it/s]
episode 9119 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 229it [00:05, 41.88it/s]
episode 9120 avg_loss: 0.005 total_reward 

episode 9265 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 361it [00:08, 42.37it/s]
episode 9266 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 360it [00:08, 42.95it/s]
episode 9267 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 454it [00:10, 42.24it/s]
episode 9268 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 291it [00:06, 42.22it/s]
episode 9269 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 305it [00:07, 42.39it/s]
episode 9270 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 332it [00:08, 36.09it/s]
episode 9271 avg_loss: 0.005 total_reward [train:11.000 test:-] e-greedy:0.900: : 481it [00:11, 41.95it/s]
episode 9272 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 308it [00:07, 42.14it/s]
episode 9273 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:06, 42.12it/s]
episode 9274 avg_loss: 0.005 total_reward [tr

episode 9419 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 419it [00:10, 41.47it/s]
episode 9420 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:07, 41.44it/s]
episode 9421 avg_loss: 0.004 total_reward [train:6.000 test:-] e-greedy:0.900: : 367it [00:08, 41.61it/s]
episode 9422 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 349it [00:08, 41.74it/s]
episode 9423 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 457it [00:11, 41.47it/s]
episode 9424 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 472it [00:11, 41.67it/s]
episode 9425 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 388it [00:09, 41.63it/s]
episode 9426 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:09, 42.15it/s]
episode 9427 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 462it [00:11, 41.72it/s]
episode 9428 avg_loss: 0.005 total_reward [t

episode 9573 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 319it [00:07, 41.25it/s]
episode 9574 avg_loss: 0.004 total_reward [train:11.000 test:-] e-greedy:0.900: : 457it [00:10, 41.84it/s]
episode 9575 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 333it [00:07, 41.70it/s]
episode 9576 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 339it [00:08, 41.95it/s]
episode 9577 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 534it [00:12, 41.49it/s]
episode 9578 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 295it [00:07, 41.65it/s]
episode 9579 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:05, 41.12it/s]
episode 9580 avg_loss: 0.004 total_reward [train:13.000 test:-] e-greedy:0.900: : 448it [00:10, 40.50it/s]
episode 9581 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 396it [00:09, 41.98it/s]
episode 9582 avg_loss: 0.005 total_reward [t

episode 9727 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 356it [00:08, 41.36it/s]
episode 9728 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 394it [00:09, 41.16it/s]
episode 9729 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 501it [00:12, 41.54it/s]
episode 9730 avg_loss: 0.004 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:09, 41.57it/s]
episode 9731 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 396it [00:09, 41.76it/s]
episode 9732 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 264it [00:06, 41.46it/s]
episode 9733 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 457it [00:10, 41.69it/s]
episode 9734 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 438it [00:10, 41.67it/s]
episode 9735 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 277it [00:06, 41.65it/s]
episode 9736 avg_loss: 0.005 total_reward [t

episode 9881 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 454it [00:10, 42.81it/s]
episode 9882 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 465it [00:10, 43.10it/s]
episode 9883 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 400it [00:09, 43.61it/s]
episode 9884 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 472it [00:11, 42.42it/s]
episode 9885 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 347it [00:08, 41.96it/s]
episode 9886 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 367it [00:08, 42.32it/s]
episode 9887 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 527it [00:12, 42.50it/s]
episode 9888 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 410it [00:09, 43.33it/s]
episode 9889 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:09, 42.40it/s]
episode 9890 avg_loss: 0.006 total_reward [t

episode 10034 avg_loss: 0.005 total_reward [train:2.000 test:-] e-greedy:0.900: : 156it [00:03, 41.88it/s]
episode 10035 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 479it [00:11, 42.13it/s]
episode 10036 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 327it [00:07, 41.43it/s]
episode 10037 avg_loss: 0.005 total_reward [train:12.000 test:-] e-greedy:0.900: : 403it [00:09, 41.82it/s]
episode 10038 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 375it [00:08, 43.04it/s]
episode 10039 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 291it [00:06, 42.08it/s]
episode 10040 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 309it [00:07, 42.36it/s]
episode 10041 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 479it [00:11, 42.60it/s]
episode 10042 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 304it [00:07, 41.83it/s]
episode 10043 avg_loss: 0.005 total

episode 10186 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 309it [00:07, 41.19it/s]
episode 10187 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 209it [00:05, 42.07it/s]
episode 10188 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 212it [00:05, 41.02it/s]
episode 10189 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:06, 40.60it/s]
episode 10190 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 250it [00:06, 41.22it/s]
episode 10191 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 310it [00:07, 40.70it/s]
episode 10192 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 286it [00:06, 41.04it/s]
episode 10193 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 367it [00:08, 41.11it/s]
episode 10194 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 189it [00:04, 40.55it/s]
episode 10195 avg_loss: 0.005 total_r

episode 10338 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 364it [00:08, 42.09it/s]
episode 10339 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 398it [00:09, 41.83it/s]
episode 10340 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 398it [00:09, 42.21it/s]
episode 10341 avg_loss: 0.006 total_reward [train:16.000 test:-] e-greedy:0.900: : 550it [00:13, 42.97it/s]
episode 10342 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 260it [00:06, 42.30it/s]
episode 10343 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 232it [00:05, 41.67it/s]
episode 10344 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:07, 42.50it/s]
episode 10345 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:05, 41.56it/s]
episode 10346 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 314it [00:07, 42.76it/s]
episode 10347 avg_loss: 0.005 total_

episode 10490 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 259it [00:06, 41.73it/s]
episode 10491 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 336it [00:08, 41.68it/s]
episode 10492 avg_loss: 0.005 total_reward [train:13.000 test:-] e-greedy:0.900: : 439it [00:10, 42.00it/s]
episode 10493 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:04, 41.32it/s]
episode 10494 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 298it [00:07, 41.67it/s]
episode 10495 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 299it [00:07, 42.10it/s]
episode 10496 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 419it [00:10, 41.01it/s]
episode 10497 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 305it [00:09, 32.77it/s]
episode 10498 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:04, 41.04it/s]
episode 10499 avg_loss: 0.005 total_

episode 10642 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 340it [00:08, 42.62it/s]
episode 10643 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 260it [00:06, 42.92it/s]
episode 10644 avg_loss: 0.005 total_reward [train:2.000 test:-] e-greedy:0.900: : 156it [00:03, 41.67it/s]
episode 10645 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 263it [00:06, 41.63it/s]
episode 10646 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 339it [00:08, 42.30it/s]
episode 10647 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:08, 42.29it/s]
episode 10648 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 344it [00:08, 41.87it/s]
episode 10649 avg_loss: 0.005 total_reward [train:12.000 test:-] e-greedy:0.900: : 428it [00:12, 33.26it/s]
episode 10650 avg_loss: 0.004 total_reward [train:3.000 test:9.000] e-greedy:0.900: : 193it [00:07, 42.00it/s]
episode 10651 avg_loss: 0.005 to

episode 10794 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 309it [00:07, 40.86it/s]
episode 10795 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 346it [00:08, 40.34it/s]
episode 10796 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:08, 40.48it/s]
episode 10797 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 38.28it/s]
episode 10798 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 397it [00:10, 37.43it/s]
episode 10799 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 461it [00:10, 42.07it/s]
episode 10800 avg_loss: 0.005 total_reward [train:7.000 test:5.000] e-greedy:0.900: : 329it [00:09, 35.51it/s]
episode 10801 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 273it [00:06, 42.31it/s]
episode 10802 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 442it [00:10, 42.40it/s]
episode 10803 avg_loss: 0.005 to

episode 10946 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.900: : 320it [00:07, 43.01it/s]
episode 10947 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 368it [00:08, 41.79it/s]
episode 10948 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 250it [00:05, 42.96it/s]
episode 10949 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 469it [00:11, 42.48it/s]
episode 10950 avg_loss: 0.006 total_reward [train:7.000 test:8.000] e-greedy:0.900: : 348it [00:10, 33.33it/s]
episode 10951 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 259it [00:06, 41.99it/s]
episode 10952 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 353it [00:08, 42.06it/s]
episode 10953 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 41.04it/s]
episode 10954 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:04, 41.43it/s]
episode 10955 avg_loss: 0.006 tot

episode 11098 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 306it [00:07, 42.14it/s]
episode 11099 avg_loss: 0.005 total_reward [train:12.000 test:-] e-greedy:0.900: : 406it [00:09, 42.33it/s]
episode 11100 avg_loss: 0.006 total_reward [train:8.000 test:8.000] e-greedy:0.900: : 438it [00:12, 34.20it/s]
episode 11101 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 445it [00:10, 42.54it/s]
episode 11102 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 321it [00:07, 41.87it/s]
episode 11103 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:07, 41.55it/s]
episode 11104 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 421it [00:10, 41.85it/s]
episode 11105 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:07, 41.81it/s]
episode 11106 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 461it [00:10, 42.18it/s]
episode 11107 avg_loss: 0.005 t

episode 11250 avg_loss: 0.006 total_reward [train:12.000 test:3.000] e-greedy:0.900: : 498it [00:13, 37.95it/s]
episode 11251 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 337it [00:08, 41.54it/s]
episode 11252 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 407it [00:09, 41.53it/s]
episode 11253 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 299it [00:07, 41.33it/s]
episode 11254 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:09, 41.78it/s]
episode 11255 avg_loss: 0.006 total_reward [train:3.000 test:-] e-greedy:0.900: : 207it [00:04, 41.49it/s]
episode 11256 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 487it [00:11, 42.03it/s]
episode 11257 avg_loss: 0.006 total_reward [train:3.000 test:-] e-greedy:0.900: : 187it [00:04, 41.92it/s]
episode 11258 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 408it [00:09, 42.02it/s]
episode 11259 avg_loss: 0.006 to

episode 11402 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:07, 41.49it/s]
episode 11403 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:09, 41.92it/s]
episode 11404 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 373it [00:08, 41.85it/s]
episode 11405 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 270it [00:06, 42.50it/s]
episode 11406 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 442it [00:10, 42.04it/s]
episode 11407 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 427it [00:10, 41.67it/s]
episode 11408 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 358it [00:08, 41.86it/s]
episode 11409 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 422it [00:10, 42.11it/s]
episode 11410 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:05, 41.43it/s]
episode 11411 avg_loss: 0.006 total_r

episode 11554 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 226it [00:05, 39.75it/s]
episode 11555 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 502it [00:12, 41.08it/s]
episode 11556 avg_loss: 0.006 total_reward [train:11.000 test:-] e-greedy:0.900: : 504it [00:12, 41.31it/s]
episode 11557 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 516it [00:12, 41.61it/s]
episode 11558 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 427it [00:10, 41.88it/s]
episode 11559 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 407it [00:09, 41.65it/s]
episode 11560 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 373it [00:08, 41.76it/s]
episode 11561 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 227it [00:05, 41.47it/s]
episode 11562 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 392it [00:09, 42.08it/s]
episode 11563 avg_loss: 0.006 total

episode 11706 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 40.34it/s]
episode 11707 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 352it [00:09, 40.18it/s]
episode 11708 avg_loss: 0.006 total_reward [train:11.000 test:-] e-greedy:0.900: : 396it [00:09, 40.45it/s]
episode 11709 avg_loss: 0.006 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 40.58it/s]
episode 11710 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 445it [00:10, 41.29it/s]
episode 11711 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:09, 41.03it/s]
episode 11712 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 273it [00:06, 41.25it/s]
episode 11713 avg_loss: 0.005 total_reward [train:9.000 test:-] e-greedy:0.900: : 430it [00:10, 41.62it/s]
episode 11714 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 314it [00:07, 40.92it/s]
episode 11715 avg_loss: 0.006 total_

episode 11858 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 328it [00:08, 36.20it/s]
episode 11859 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 351it [00:09, 37.07it/s]
episode 11860 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 303it [00:08, 36.76it/s]
episode 11861 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 455it [00:12, 36.70it/s]
episode 11862 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 312it [00:08, 38.22it/s]
episode 11863 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 431it [00:10, 39.46it/s]
episode 11864 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 467it [00:11, 39.93it/s]
episode 11865 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 351it [00:08, 40.50it/s]
episode 11866 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 418it [00:10, 41.10it/s]
episode 11867 avg_loss: 0.006 total_

episode 12010 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 383it [00:09, 39.15it/s]
episode 12011 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 369it [00:09, 39.16it/s]
episode 12012 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:07, 39.13it/s]
episode 12013 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 292it [00:07, 39.20it/s]
episode 12014 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 374it [00:09, 30.68it/s]
episode 12015 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 356it [00:09, 39.92it/s]
episode 12016 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 298it [00:07, 38.78it/s]
episode 12017 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 296it [00:07, 38.37it/s]
episode 12018 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 374it [00:09, 38.55it/s]
episode 12019 avg_loss: 0.006 total_

episode 12162 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 39.04it/s]
episode 12163 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 446it [00:11, 39.67it/s]
episode 12164 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 338it [00:08, 39.08it/s]
episode 12165 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 321it [00:08, 36.69it/s]
episode 12166 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 373it [00:11, 33.38it/s]
episode 12167 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 345it [00:10, 25.48it/s]
episode 12168 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 321it [00:10, 30.14it/s]
episode 12169 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 318it [00:08, 37.18it/s]
episode 12170 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 316it [00:08, 38.16it/s]
episode 12171 avg_loss: 0.006 total_r

episode 12314 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 348it [00:10, 34.46it/s]
episode 12315 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 470it [00:13, 33.97it/s]
episode 12316 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 287it [00:08, 33.59it/s]
episode 12317 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.900: : 216it [00:06, 34.55it/s]
episode 12318 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:11, 33.10it/s]
episode 12319 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 309it [00:09, 33.15it/s]
episode 12320 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 248it [00:07, 33.33it/s]
episode 12321 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 348it [00:10, 33.58it/s]
episode 12322 avg_loss: 0.006 total_reward [train:14.000 test:-] e-greedy:0.900: : 507it [00:15, 33.19it/s]
episode 12323 avg_loss: 0.005 total_

episode 12466 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 325it [00:09, 32.51it/s]
episode 12467 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 423it [00:12, 32.83it/s]
episode 12468 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 536it [00:16, 32.70it/s]
episode 12469 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 356it [00:11, 32.72it/s]
episode 12470 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 458it [00:14, 31.78it/s]
episode 12471 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 322it [00:10, 31.59it/s]
episode 12472 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 371it [00:11, 31.63it/s]
episode 12473 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 339it [00:10, 31.65it/s]
episode 12474 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 597it [00:18, 32.06it/s]
episode 12475 avg_loss: 0.006 total

episode 12618 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 288it [00:09, 30.27it/s]
episode 12619 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:10, 29.06it/s]
episode 12620 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 302it [00:10, 28.84it/s]
episode 12621 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 325it [00:11, 29.08it/s]
episode 12622 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 375it [00:12, 29.04it/s]
episode 12623 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 382it [00:13, 28.66it/s]
episode 12624 avg_loss: 0.005 total_reward [train:8.000 test:-] e-greedy:0.900: : 356it [00:12, 28.60it/s]
episode 12625 avg_loss: 0.005 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:08, 28.58it/s]
episode 12626 avg_loss: 0.005 total_reward [train:10.000 test:-] e-greedy:0.900: : 448it [00:15, 28.48it/s]
episode 12627 avg_loss: 0.005 total_

episode 12770 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 342it [00:13, 24.80it/s]
episode 12771 avg_loss: 0.005 total_reward [train:5.000 test:-] e-greedy:0.900: : 322it [00:12, 25.02it/s]
episode 12772 avg_loss: 0.006 total_reward [train:11.000 test:-] e-greedy:0.900: : 410it [00:16, 24.93it/s]
episode 12773 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 427it [00:21, 19.64it/s]
episode 12774 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 402it [00:16, 24.13it/s]
episode 12775 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 327it [00:13, 24.72it/s]
episode 12776 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 350it [01:15,  4.65it/s]
episode 12777 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 327it [00:15, 21.18it/s]
episode 12778 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 393it [00:17, 21.94it/s]
episode 12779 avg_loss: 0.006 total_

episode 12922 avg_loss: 0.006 total_reward [train:9.000 test:-] e-greedy:0.900: : 450it [00:18, 25.00it/s]
episode 12923 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 302it [00:11, 26.70it/s]
episode 12924 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 384it [00:15, 24.85it/s]
episode 12925 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 415it [00:15, 27.63it/s]
episode 12926 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 466it [00:22, 23.28it/s]
episode 12927 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 355it [00:17, 27.47it/s]
episode 12928 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 410it [00:16, 25.07it/s]
episode 12929 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 330it [00:15, 26.41it/s]
episode 12930 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 373it [00:19, 18.86it/s]
episode 12931 avg_loss: 0.007 total_r

episode 13074 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 355it [00:14, 25.30it/s]
episode 13075 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 328it [00:12, 26.97it/s]
episode 13076 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 293it [00:10, 27.83it/s]
episode 13077 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 313it [00:09, 34.91it/s]
episode 13078 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 340it [00:09, 34.01it/s]
episode 13079 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 375it [00:15, 24.92it/s]
episode 13080 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 369it [00:10, 34.79it/s]
episode 13081 avg_loss: 0.005 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:09, 34.73it/s]
episode 13082 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 317it [00:09, 33.38it/s]
episode 13083 avg_loss: 0.006 total_r

episode 13226 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 292it [00:17, 16.84it/s]
episode 13227 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 370it [00:16, 22.97it/s]
episode 13228 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 266it [00:11, 22.58it/s]
episode 13229 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 377it [00:19, 19.81it/s]
episode 13230 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 383it [00:19, 19.44it/s]
episode 13231 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 304it [00:15, 20.23it/s]
episode 13232 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 344it [00:18, 22.65it/s]
episode 13233 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 391it [00:17, 22.23it/s]
episode 13234 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 332it [00:14, 22.94it/s]
episode 13235 avg_loss: 0.006 total_r

episode 13378 avg_loss: 0.006 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:08, 23.82it/s]
episode 13379 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 332it [00:14, 23.46it/s]
episode 13380 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 432it [00:18, 23.86it/s]
episode 13381 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 277it [00:12, 22.94it/s]
episode 13382 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 311it [00:13, 22.72it/s]
episode 13383 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 337it [00:14, 22.71it/s]
episode 13384 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 401it [00:20, 19.25it/s]
episode 13385 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 407it [00:18, 23.21it/s]
episode 13386 avg_loss: 0.006 total_reward [train:6.000 test:-] e-greedy:0.900: : 358it [00:15, 22.98it/s]
episode 13387 avg_loss: 0.006 total_r

episode 13530 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:20, 19.07it/s]
episode 13531 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 359it [00:15, 22.78it/s]
episode 13532 avg_loss: 0.006 total_reward [train:2.000 test:-] e-greedy:0.900: : 159it [00:07, 22.77it/s]
episode 13533 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 429it [00:19, 22.09it/s]
episode 13534 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:19, 23.13it/s]
episode 13535 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 449it [00:25, 23.00it/s]
episode 13536 avg_loss: 0.006 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:16, 19.05it/s]
episode 13537 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:19, 15.49it/s]
episode 13538 avg_loss: 0.006 total_reward [train:11.000 test:-] e-greedy:0.900: : 403it [00:23, 17.28it/s]
episode 13539 avg_loss: 0.006 total_

episode 13682 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 319it [00:15, 23.50it/s]
episode 13683 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 419it [00:18, 22.55it/s]
episode 13684 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 457it [00:20, 22.63it/s]
episode 13685 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 428it [00:19, 22.35it/s]
episode 13686 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 367it [00:16, 22.17it/s]
episode 13687 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 380it [00:16, 22.66it/s]
episode 13688 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 409it [00:22, 17.85it/s]
episode 13689 avg_loss: 0.007 total_reward [train:10.000 test:-] e-greedy:0.900: : 389it [00:19, 23.36it/s]
episode 13690 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 370it [00:23, 15.69it/s]
episode 13691 avg_loss: 0.007 total_

episode 13834 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 310it [00:16, 18.74it/s]
episode 13835 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:17, 19.83it/s]
episode 13836 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 368it [00:16, 23.45it/s]
episode 13837 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 394it [00:17, 23.10it/s]
episode 13838 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:14, 23.61it/s]
episode 13839 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 415it [00:17, 25.11it/s]
episode 13840 avg_loss: 0.006 total_reward [train:10.000 test:-] e-greedy:0.900: : 438it [00:18, 24.18it/s]
episode 13841 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 289it [00:12, 23.60it/s]
episode 13842 avg_loss: 0.006 total_reward [train:4.000 test:-] e-greedy:0.900: : 249it [00:10, 23.82it/s]
episode 13843 avg_loss: 0.008 total_

episode 13986 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 333it [00:19, 18.59it/s]
episode 13987 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:15, 22.45it/s]
episode 13988 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 287it [00:13, 14.43it/s]
episode 13989 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:21, 21.88it/s]
episode 13990 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 315it [00:16, 19.40it/s]
episode 13991 avg_loss: 0.007 total_reward [train:10.000 test:-] e-greedy:0.900: : 455it [00:24, 18.29it/s]
episode 13992 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:11, 23.78it/s]
episode 13993 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 442it [00:21, 20.22it/s]
episode 13994 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 402it [00:18, 22.45it/s]
episode 13995 avg_loss: 0.006 total_

episode 14138 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:22, 17.02it/s]
episode 14139 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 340it [00:19, 17.06it/s]
episode 14140 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 410it [00:18, 21.67it/s]
episode 14141 avg_loss: 0.008 total_reward [train:9.000 test:-] e-greedy:0.900: : 418it [00:26, 15.55it/s]
episode 14142 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 332it [00:18, 17.50it/s]
episode 14143 avg_loss: 0.008 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:17, 21.08it/s]
episode 14144 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 329it [00:17, 18.35it/s]
episode 14145 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 370it [00:19, 22.97it/s]
episode 14146 avg_loss: 0.008 total_reward [train:6.000 test:-] e-greedy:0.900: : 345it [00:16, 20.58it/s]
episode 14147 avg_loss: 0.007 total_r

episode 14290 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:25,  3.15it/s]
episode 14291 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 309it [00:16, 16.22it/s]
episode 14292 avg_loss: 0.008 total_reward [train:3.000 test:-] e-greedy:0.900: : 225it [00:12, 19.14it/s]
episode 14293 avg_loss: 0.006 total_reward [train:7.000 test:-] e-greedy:0.900: : 402it [00:25, 16.02it/s]
episode 14294 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 329it [00:16, 22.11it/s]
episode 14295 avg_loss: 0.008 total_reward [train:5.000 test:-] e-greedy:0.900: : 329it [00:21, 15.51it/s]
episode 14296 avg_loss: 0.006 total_reward [train:3.000 test:-] e-greedy:0.900: : 230it [00:15, 14.47it/s]
episode 14297 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:18, 14.90it/s]
episode 14298 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 330it [00:20, 21.61it/s]
episode 14299 avg_loss: 0.007 total_r

episode 14442 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:20, 19.83it/s]
episode 14443 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:18, 21.05it/s]
episode 14444 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 280it [00:13, 21.39it/s]
episode 14445 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 297it [00:16, 18.11it/s]
episode 14446 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 277it [00:15, 18.29it/s]
episode 14447 avg_loss: 0.006 total_reward [train:8.000 test:-] e-greedy:0.900: : 385it [00:23, 21.24it/s]
episode 14448 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 262it [00:11, 22.07it/s]
episode 14449 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 340it [00:15, 19.57it/s]
episode 14450 avg_loss: 0.007 total_reward [train:6.000 test:7.000] e-greedy:0.900: : 358it [00:30,  1.83it/s]
episode 14451 avg_loss: 0.006 tot

episode 14594 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 395it [00:26, 15.14it/s]
episode 14595 avg_loss: 0.008 total_reward [train:6.000 test:-] e-greedy:0.900: : 334it [00:18, 18.52it/s]
episode 14596 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 360it [00:24, 18.07it/s]
episode 14597 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 278it [00:12, 21.46it/s]
episode 14598 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 276it [00:14, 20.53it/s]
episode 14599 avg_loss: 0.008 total_reward [train:5.000 test:-] e-greedy:0.900: : 307it [00:15, 23.34it/s]
episode 14600 avg_loss: 0.007 total_reward [train:3.000 test:5.000] e-greedy:0.900: : 228it [00:13, 23.25it/s]
episode 14601 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 425it [00:18, 22.45it/s]
episode 14602 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 405it [00:17, 23.85it/s]
episode 14603 avg_loss: 0.007 tot

episode 14746 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 285it [00:19, 14.37it/s]
episode 14747 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 548it [00:36, 14.93it/s]
episode 14748 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 366it [00:17, 19.14it/s]
episode 14749 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:26, 12.86it/s]
episode 14750 avg_loss: 0.007 total_reward [train:3.000 test:7.000] e-greedy:0.900: : 227it [00:19,  1.01it/s]
episode 14751 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 269it [00:15, 17.08it/s]
episode 14752 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 317it [00:21, 20.94it/s]
episode 14753 avg_loss: 0.008 total_reward [train:7.000 test:-] e-greedy:0.900: : 331it [00:19, 17.23it/s]
episode 14754 avg_loss: 0.008 total_reward [train:7.000 test:-] e-greedy:0.900: : 394it [00:20, 19.21it/s]
episode 14755 avg_loss: 0.008 tot

episode 14898 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 326it [00:16, 19.76it/s]
episode 14899 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:16, 21.16it/s]
episode 14900 avg_loss: 0.007 total_reward [train:7.000 test:5.000] e-greedy:0.900: : 398it [00:20, 21.63it/s]
episode 14901 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 270it [00:11, 22.93it/s]
episode 14902 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:14, 23.39it/s]
episode 14903 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 226it [00:09, 22.81it/s]
episode 14904 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:14, 20.48it/s]
episode 14905 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:14, 20.74it/s]
episode 14906 avg_loss: 0.007 total_reward [train:10.000 test:-] e-greedy:0.900: : 350it [00:18, 19.02it/s]
episode 14907 avg_loss: 0.007 to

episode 15050 avg_loss: 0.007 total_reward [train:5.000 test:5.000] e-greedy:0.900: : 275it [00:24, 21.64it/s]
episode 15051 avg_loss: 0.008 total_reward [train:3.000 test:-] e-greedy:0.900: : 202it [00:14, 13.74it/s]
episode 15052 avg_loss: 0.007 total_reward [train:3.000 test:-] e-greedy:0.900: : 232it [00:16, 19.69it/s]
episode 15053 avg_loss: 0.008 total_reward [train:7.000 test:-] e-greedy:0.900: : 333it [00:15, 21.19it/s]
episode 15054 avg_loss: 0.008 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:19, 15.35it/s]
episode 15055 avg_loss: 0.008 total_reward [train:5.000 test:-] e-greedy:0.900: : 313it [00:18, 17.35it/s]
episode 15056 avg_loss: 0.008 total_reward [train:10.000 test:-] e-greedy:0.900: : 491it [00:35, 13.72it/s]
episode 15057 avg_loss: 0.007 total_reward [train:8.000 test:-] e-greedy:0.900: : 408it [00:24, 16.66it/s]
episode 15058 avg_loss: 0.008 total_reward [train:7.000 test:-] e-greedy:0.900: : 364it [00:17, 20.64it/s]
episode 15059 avg_loss: 0.008 to

episode 15202 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 297it [00:17, 16.56it/s]
episode 15203 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 372it [00:18, 22.00it/s]
episode 15204 avg_loss: 0.007 total_reward [train:3.000 test:-] e-greedy:0.900: : 185it [00:08, 21.27it/s]
episode 15205 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:16, 20.72it/s]
episode 15206 avg_loss: 0.008 total_reward [train:10.000 test:-] e-greedy:0.900: : 470it [00:27, 19.32it/s]
episode 15207 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 292it [00:13, 21.26it/s]
episode 15208 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 363it [00:22, 16.40it/s]
episode 15209 avg_loss: 0.007 total_reward [train:3.000 test:-] e-greedy:0.900: : 222it [00:10, 22.05it/s]
episode 15210 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 284it [00:23, 17.95it/s]
episode 15211 avg_loss: 0.007 total_

episode 15354 avg_loss: 0.007 total_reward [train:11.000 test:-] e-greedy:0.900: : 514it [00:31, 16.57it/s]
episode 15355 avg_loss: 0.007 total_reward [train:4.000 test:-] e-greedy:0.900: : 282it [00:13, 21.10it/s]
episode 15356 avg_loss: 0.007 total_reward [train:6.000 test:-] e-greedy:0.900: : 309it [00:18, 12.51it/s]
episode 15357 avg_loss: 0.007 total_reward [train:7.000 test:-] e-greedy:0.900: : 372it [00:20, 18.59it/s]
episode 15358 avg_loss: 0.008 total_reward [train:6.000 test:-] e-greedy:0.900: : 332it [00:15, 21.77it/s]
episode 15359 avg_loss: 0.008 total_reward [train:8.000 test:-] e-greedy:0.900: : 370it [00:17, 20.98it/s]
episode 15360 avg_loss: 0.007 total_reward [train:9.000 test:-] e-greedy:0.900: : 475it [00:25, 18.44it/s]
episode 15361 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 303it [00:17, 17.52it/s]
episode 15362 avg_loss: 0.007 total_reward [train:5.000 test:-] e-greedy:0.900: : 269it [00:12, 13.80it/s]
episode 15363 avg_loss: 0.008 total_

KeyboardInterrupt: 

In [4]:
q_network.save("dqn_exp5.h5")
# model = DQN(custom_env, q_network)

In [None]:
model.test(render=True)

In [None]:
import time
start_t = time.time()
a = np.random.permutation(int(1e1))
print(time.time()-start_t)

# 