In [1]:
import gym
import numpy as np
import renom as rm
import matplotlib.pyplot as plt
from renom.utility.initializer import Gaussian
from renom.cuda import set_cuda_active
from renom_rl.dqn import DQN
from renom_rl.env import BaseEnv
from gym.core import Env
from PIL import Image

set_cuda_active(True)
env = gym.make('BreakoutNoFrameskip-v4')

class CustomEnv(BaseEnv):
    
    def __init__(self, env):
        self.env = env
        self.action_shape = 4
        self.state_shape = (4, 84, 84)
        self.previous_frames = []
        super(CustomEnv, self).__init__()
    
    def reset(self):
        self.env.reset()
        n_step = np.random.randint(4, 32+1)
        for _ in range(n_step):
            state, _, _ = self.step(self.env.action_space.sample())
        return state
    
    def sample(self):
        return self.env.action_space.sample()
    
    def render(self):
        self.env.render()

    def _preprocess(self,state):
        resized_image = Image.fromarray(state).resize((84,110)).convert('L')
        image_array = np.asarray(resized_image.getdata()).reshape(110, 84)/255.
        final_image = image_array[26:110, :]
        return final_image
    
    def step(self, action):
        state_list = []
        reward_list = []
        terminal = False
        for _ in range(4):
            # Use last frame. Other frames will be skipped.
            s, r, t, _ = self.env.step(action)
            state = self._preprocess(s)
            reward_list.append(r)
            if t:
                terminal = True
                
        if len(self.previous_frames) > 3:
            self.previous_frames = self.previous_frames[1:] + [state]
        else:
            self.previous_frames += [state]
        state = np.stack(self.previous_frames)
        return state, (np.sum(reward_list) > 0).astype(np.int), terminal
    
custom_env = CustomEnv(env)
q_network = rm.Sequential([rm.Conv2d(32, filter=8, stride=4),
                           rm.Relu(),
                           rm.Conv2d(64, filter=4, stride=2), 
                           rm.Relu(),
                           rm.Conv2d(64, filter=3, stride=1), 
                           rm.Relu(), 
                           rm.Flatten(), 
                           rm.Dense(512),
                           rm.Relu(),
                           rm.Dense(custom_env.action_shape)])


In [None]:
model = DQN(custom_env, q_network)

In [None]:
model.train(render=True, greedy_step=100000)

Run random 5000 step for storing experiences


episode 001 avg_loss: 0.004 total_reward [train:2.000 test:-] e-greedy:0.002: : 185it [00:05, 38.09it/s]
episode 002 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.004: : 261it [00:06, 37.39it/s]
episode 003 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.005: : 117it [00:03, 36.57it/s]
episode 004 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.007: : 226it [00:06, 37.56it/s]
episode 005 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.009: : 166it [00:04, 37.87it/s]
episode 006 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.010: : 169it [00:04, 38.05it/s]
episode 007 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.011: : 102it [00:02, 37.33it/s]
episode 008 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.013: : 185it [00:05, 36.65it/s]
episode 009 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.014: : 141it [00:03, 36.83it/s]
episode 010 avg_loss: 0.003 total_reward [train:0.000 t

episode 079 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.120: : 149it [00:04, 31.59it/s]
episode 080 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.122: : 202it [00:06, 32.00it/s]
episode 081 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.123: : 173it [00:05, 31.64it/s]
episode 082 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.125: : 181it [00:05, 31.88it/s]
episode 083 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.126: : 135it [00:04, 30.90it/s]
episode 084 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.128: : 215it [00:05, 35.90it/s]
episode 085 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.129: : 133it [00:03, 36.66it/s]
episode 086 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.131: : 201it [00:05, 37.42it/s]
episode 087 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.133: : 210it [00:05, 36.48it/s]
episode 088 avg_loss: 0.002 total_reward [train:1.000 t

episode 156 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.259: : 224it [00:06, 36.00it/s]
episode 157 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.261: : 194it [00:05, 36.93it/s]
episode 158 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.263: : 173it [00:04, 37.83it/s]
episode 159 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.265: : 247it [00:06, 37.24it/s]
episode 160 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.267: : 297it [00:07, 37.35it/s]
episode 161 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.269: : 179it [00:04, 37.86it/s]
episode 162 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.271: : 178it [00:04, 36.86it/s]
episode 163 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.273: : 213it [00:05, 37.30it/s]
episode 164 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.275: : 301it [00:08, 37.25it/s]
episode 165 avg_loss: 0.002 total_reward [train:2.000 t

episode 233 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.426: : 226it [00:06, 36.39it/s]
episode 234 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.428: : 183it [00:04, 36.87it/s]
episode 235 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.430: : 234it [00:06, 37.81it/s]
episode 236 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.432: : 201it [00:05, 36.90it/s]
episode 237 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.433: : 197it [00:05, 37.32it/s]
episode 238 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.436: : 311it [00:08, 37.34it/s]
episode 239 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.439: : 270it [00:07, 37.62it/s]
episode 240 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.441: : 299it [00:08, 36.68it/s]
episode 241 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.443: : 202it [00:05, 36.94it/s]
episode 242 avg_loss: 0.001 total_reward [train:3.000 t

episode 310 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.619: : 295it [00:08, 36.26it/s]
episode 311 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.620: : 160it [00:04, 37.00it/s]
episode 312 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.623: : 321it [00:08, 37.07it/s]
episode 313 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.625: : 277it [00:07, 37.21it/s]
episode 314 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.628: : 309it [00:08, 36.45it/s]
episode 315 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.631: : 272it [00:07, 37.14it/s]
episode 316 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.632: : 206it [00:05, 36.87it/s]
episode 317 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.635: : 308it [00:08, 37.47it/s]
episode 318 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.637: : 249it [00:06, 36.96it/s]
episode 319 avg_loss: 0.002 total_reward [train:3.000 t

episode 387 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.825: : 278it [00:07, 36.16it/s]
episode 388 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.828: : 299it [00:08, 36.53it/s]
episode 389 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.832: : 389it [00:10, 36.43it/s]
episode 390 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.835: : 346it [00:09, 36.67it/s]
episode 391 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.838: : 356it [00:09, 36.91it/s]
episode 392 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.841: : 329it [00:09, 36.16it/s]
episode 393 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.845: : 411it [00:11, 36.79it/s]
episode 394 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.849: : 500it [00:13, 37.55it/s]
episode 395 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.852: : 345it [00:09, 36.23it/s]
episode 396 avg_loss: 0.002 total_reward [train:4.000 t

episode 464 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 325it [00:08, 36.73it/s]
episode 465 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 318it [00:08, 36.33it/s]
episode 466 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 409it [00:11, 36.39it/s]
episode 467 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 361it [00:09, 36.60it/s]
episode 468 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 422it [00:11, 36.40it/s]
episode 469 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 338it [00:09, 36.53it/s]
episode 470 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 401it [00:10, 36.53it/s]
episode 471 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 473it [00:12, 36.55it/s]
episode 472 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 410it [00:11, 36.50it/s]
episode 473 avg_loss: 0.001 total_reward [train:3.000 t

episode 541 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 196it [00:05, 36.52it/s]
episode 542 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 454it [00:12, 36.45it/s]
episode 543 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 620it [00:17, 36.76it/s]
episode 544 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 443it [00:12, 36.25it/s]
episode 545 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 610it [00:16, 36.42it/s]
episode 546 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 334it [00:09, 36.47it/s]
episode 547 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 429it [00:11, 36.39it/s]
episode 548 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 611it [00:16, 36.33it/s]
episode 549 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 331it [00:09, 36.02it/s]
episode 550 avg_loss: 0.001 total_reward [train:5.000 t

episode 618 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 397it [00:10, 36.54it/s]
episode 619 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 528it [00:14, 37.15it/s]
episode 620 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 414it [00:11, 36.66it/s]
episode 621 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 381it [00:10, 36.54it/s]
episode 622 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 342it [00:09, 36.68it/s]
episode 623 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 358it [00:09, 36.63it/s]
episode 624 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 369it [00:10, 36.68it/s]
episode 625 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.900: : 226it [00:06, 36.35it/s]
episode 626 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 366it [00:10, 36.47it/s]
episode 627 avg_loss: 0.001 total_reward [train:4.000 t

episode 695 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 456it [00:12, 37.45it/s]
episode 696 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 400it [00:10, 37.18it/s]
episode 697 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 465it [00:12, 36.66it/s]
episode 698 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 442it [00:12, 36.81it/s]
episode 699 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 346it [00:09, 36.79it/s]
episode 700 avg_loss: 0.001 total_reward [train:4.000 test:5.000] e-greedy:0.900: : 391it [00:13, 28.31it/s]
episode 701 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 417it [00:11, 37.13it/s]
episode 702 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 458it [00:12, 36.95it/s]
episode 703 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 256it [00:06, 37.07it/s]
episode 704 avg_loss: 0.001 total_reward [train:3.0

episode 772 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 471it [00:12, 36.63it/s]
episode 773 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 458it [00:12, 36.71it/s]
episode 774 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 332it [00:09, 37.01it/s]
episode 775 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 310it [00:08, 36.14it/s]
episode 776 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 607it [00:16, 36.74it/s]
episode 777 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 255it [00:07, 36.25it/s]
episode 778 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 385it [00:10, 36.54it/s]
episode 779 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 276it [00:07, 37.07it/s]
episode 780 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 438it [00:11, 36.98it/s]
episode 781 avg_loss: 0.001 total_reward [train:3.000 t

episode 849 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 362it [00:09, 36.75it/s]
episode 850 avg_loss: 0.001 total_reward [train:4.000 test:4.000] e-greedy:0.900: : 325it [00:12, 25.53it/s]
episode 851 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 505it [00:13, 36.78it/s]
episode 852 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 518it [00:14, 36.99it/s]
episode 853 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 505it [00:13, 36.88it/s]
episode 854 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 412it [00:11, 36.94it/s]
episode 855 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 480it [00:13, 37.11it/s]
episode 856 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 440it [00:12, 37.35it/s]
episode 857 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 450it [00:12, 36.65it/s]
episode 858 avg_loss: 0.001 total_reward [train:5.0

episode 926 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 482it [00:13, 36.74it/s]
episode 927 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 273it [00:07, 36.84it/s]
episode 928 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 384it [00:10, 37.40it/s]
episode 929 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 362it [00:09, 37.25it/s]
episode 930 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 369it [00:09, 37.25it/s]
episode 931 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 271it [00:07, 36.51it/s]
episode 932 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 523it [00:14, 36.90it/s]
episode 933 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 817it [00:22, 37.07it/s]
episode 934 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 474it [00:12, 37.20it/s]
episode 935 avg_loss: 0.001 total_reward [train:8.000 t

episode 1003 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 609it [00:16, 36.65it/s]
episode 1004 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 355it [00:09, 36.83it/s]
episode 1005 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 401it [00:10, 36.61it/s]
episode 1006 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:06, 36.43it/s]
episode 1007 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 464it [00:12, 37.58it/s]
episode 1008 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 556it [00:15, 37.35it/s]
episode 1009 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 270it [00:07, 36.73it/s]
episode 1010 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 271it [00:07, 36.48it/s]
episode 1011 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 489it [00:13, 37.08it/s]
episode 1012 avg_loss: 0.001 total_reward [tra

episode 1080 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 527it [00:14, 36.99it/s]
episode 1081 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 400it [00:10, 36.93it/s]
episode 1082 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 551it [00:14, 36.96it/s]
episode 1083 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 239it [00:06, 36.60it/s]
episode 1084 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 207it [00:05, 36.73it/s]
episode 1085 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 187it [00:05, 36.20it/s]
episode 1086 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 315it [00:08, 37.11it/s]
episode 1087 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 198it [00:05, 36.77it/s]
episode 1088 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 215it [00:05, 37.02it/s]
episode 1089 avg_loss: 0.001 total_reward [tra

episode 1157 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 275it [00:07, 36.61it/s]
episode 1158 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 503it [00:13, 37.00it/s]
episode 1159 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 36.86it/s]
episode 1160 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 234it [00:06, 36.71it/s]
episode 1161 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 291it [00:08, 36.37it/s]
episode 1162 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 260it [00:07, 37.69it/s]
episode 1163 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 278it [00:07, 36.51it/s]
episode 1164 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 360it [00:09, 37.30it/s]
episode 1165 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 397it [00:10, 36.82it/s]
episode 1166 avg_loss: 0.001 total_reward [tra

episode 1234 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 370it [00:10, 36.90it/s]
episode 1235 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 324it [00:08, 37.46it/s]
episode 1236 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 517it [00:14, 36.81it/s]
episode 1237 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 484it [00:13, 36.80it/s]
episode 1238 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 296it [00:08, 37.65it/s]
episode 1239 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 436it [00:11, 37.45it/s]
episode 1240 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 363it [00:09, 36.70it/s]
episode 1241 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.55it/s]
episode 1242 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 249it [00:06, 36.81it/s]
episode 1243 avg_loss: 0.001 total_reward [tra

episode 1311 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 263it [00:07, 36.26it/s]
episode 1312 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 390it [00:10, 36.56it/s]
episode 1313 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 36.01it/s]
episode 1314 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 380it [00:10, 36.99it/s]
episode 1315 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 317it [00:08, 36.67it/s]
episode 1316 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 398it [00:10, 36.56it/s]
episode 1317 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 426it [00:11, 36.81it/s]
episode 1318 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 220it [00:06, 37.18it/s]
episode 1319 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 373it [00:10, 36.66it/s]
episode 1320 avg_loss: 0.001 total_reward [tra

episode 1388 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 386it [00:10, 36.54it/s]
episode 1389 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 315it [00:08, 36.41it/s]
episode 1390 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 444it [00:12, 37.33it/s]
episode 1391 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 190it [00:05, 36.24it/s]
episode 1392 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 316it [00:08, 37.02it/s]
episode 1393 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 233it [00:06, 36.02it/s]
episode 1394 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 187it [00:05, 36.28it/s]
episode 1395 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 37.52it/s]
episode 1396 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 223it [00:06, 36.53it/s]
episode 1397 avg_loss: 0.001 total_reward [tra

episode 1465 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 36.19it/s]
episode 1466 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 302it [00:08, 36.67it/s]
episode 1467 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 546it [00:14, 36.95it/s]
episode 1468 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 320it [00:08, 36.42it/s]
episode 1469 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 343it [00:09, 36.63it/s]
episode 1470 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 267it [00:07, 36.52it/s]
episode 1471 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 328it [00:09, 36.48it/s]
episode 1472 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.900: : 176it [00:04, 36.84it/s]
episode 1473 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:09, 36.71it/s]
episode 1474 avg_loss: 0.001 total_reward [tra

episode 1542 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 303it [00:08, 36.54it/s]
episode 1543 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 476it [00:12, 37.18it/s]
episode 1544 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 426it [00:11, 36.54it/s]
episode 1545 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 309it [00:08, 36.42it/s]
episode 1546 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 533it [00:14, 36.56it/s]
episode 1547 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 437it [00:11, 36.73it/s]
episode 1548 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 362it [00:09, 36.46it/s]
episode 1549 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 272it [00:07, 36.89it/s]
episode 1550 avg_loss: 0.001 total_reward [train:7.000 test:6.000] e-greedy:0.900: : 347it [00:12, 28.20it/s]
episode 1551 avg_loss: 0.001 total_reward 

episode 1619 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 390it [00:10, 36.62it/s]
episode 1620 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 390it [00:10, 36.71it/s]
episode 1621 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 296it [00:08, 36.64it/s]
episode 1622 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 410it [00:11, 36.63it/s]
episode 1623 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 452it [00:12, 37.16it/s]
episode 1624 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 275it [00:07, 36.74it/s]
episode 1625 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 247it [00:06, 36.22it/s]
episode 1626 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:08, 36.50it/s]
episode 1627 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:09, 36.60it/s]
episode 1628 avg_loss: 0.001 total_reward [tra

episode 1696 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 453it [00:12, 36.40it/s]
episode 1697 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 356it [00:09, 36.62it/s]
episode 1698 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 408it [00:11, 37.13it/s]
episode 1699 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 461it [00:12, 36.71it/s]
episode 1700 avg_loss: 0.001 total_reward [train:4.000 test:8.000] e-greedy:0.900: : 271it [00:10, 26.87it/s]
episode 1701 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 507it [00:13, 36.85it/s]
episode 1702 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 447it [00:12, 36.59it/s]
episode 1703 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 224it [00:06, 36.75it/s]
episode 1704 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 35.89it/s]
episode 1705 avg_loss: 0.001 total_reward 

episode 1773 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 501it [00:13, 36.48it/s]
episode 1774 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:09, 35.97it/s]
episode 1775 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 422it [00:11, 36.21it/s]
episode 1776 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 337it [00:09, 36.23it/s]
episode 1777 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 343it [00:09, 36.12it/s]
episode 1778 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 254it [00:07, 36.00it/s]
episode 1779 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 274it [00:07, 35.94it/s]
episode 1780 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 366it [00:10, 36.19it/s]
episode 1781 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 355it [00:09, 36.01it/s]
episode 1782 avg_loss: 0.001 total_reward [tra

episode 1850 avg_loss: 0.001 total_reward [train:6.000 test:7.000] e-greedy:0.900: : 424it [00:14, 36.07it/s]
episode 1851 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 580it [00:15, 36.62it/s]
episode 1852 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 430it [00:11, 36.07it/s]
episode 1853 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 35.96it/s]
episode 1854 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 410it [00:11, 35.99it/s]
episode 1855 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 353it [00:09, 36.05it/s]
episode 1856 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 433it [00:11, 36.10it/s]
episode 1857 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:09, 36.06it/s]
episode 1858 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 349it [00:09, 36.02it/s]
episode 1859 avg_loss: 0.001 total_reward 

episode 1927 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 317it [00:08, 36.00it/s]
episode 1928 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 515it [00:14, 36.30it/s]
episode 1929 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:09, 35.75it/s]
episode 1930 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 303it [00:08, 36.02it/s]
episode 1931 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 456it [00:12, 36.33it/s]
episode 1932 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:10, 36.17it/s]
episode 1933 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 263it [00:07, 35.68it/s]
episode 1934 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 416it [00:11, 36.11it/s]
episode 1935 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 397it [00:11, 35.84it/s]
episode 1936 avg_loss: 0.001 total_reward [tr

episode 2004 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 399it [00:11, 36.11it/s]
episode 2005 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 400it [00:11, 36.24it/s]
episode 2006 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 395it [00:10, 36.11it/s]
episode 2007 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 364it [00:10, 36.33it/s]
episode 2008 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 438it [00:12, 35.77it/s]
episode 2009 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 435it [00:12, 35.82it/s]
episode 2010 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 485it [00:13, 36.09it/s]
episode 2011 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 369it [00:10, 35.67it/s]
episode 2012 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 389it [00:10, 35.59it/s]
episode 2013 avg_loss: 0.001 total_reward [tr

episode 2081 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 635it [00:17, 36.26it/s]
episode 2082 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 413it [00:11, 36.11it/s]
episode 2083 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 555it [00:15, 36.41it/s]
episode 2084 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 254it [00:07, 35.65it/s]
episode 2085 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 533it [00:14, 36.01it/s]
episode 2086 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 471it [00:13, 36.10it/s]
episode 2087 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 553it [00:15, 36.14it/s]
episode 2088 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 434it [00:12, 36.06it/s]
episode 2089 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 700it [00:19, 35.43it/s]
episode 2090 avg_loss: 0.001 total_rewar

episode 2157 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 386it [00:10, 35.29it/s]
episode 2158 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 482it [00:13, 36.12it/s]
episode 2159 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 399it [00:11, 36.19it/s]
episode 2160 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.900: : 184it [00:05, 36.63it/s]
episode 2161 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 307it [00:08, 36.21it/s]
episode 2162 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 259it [00:07, 35.87it/s]
episode 2163 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 435it [00:12, 36.23it/s]
episode 2164 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 463it [00:12, 36.31it/s]
episode 2165 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 406it [00:11, 35.93it/s]
episode 2166 avg_loss: 0.001 total_reward [tr

episode 2233 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 491it [00:13, 35.86it/s]
episode 2234 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 522it [00:14, 35.99it/s]
episode 2235 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 433it [00:11, 36.20it/s]
episode 2236 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 460it [00:12, 36.82it/s]
episode 2237 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 416it [00:11, 34.47it/s]
episode 2238 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 530it [00:14, 36.20it/s]
episode 2239 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 315it [00:08, 36.41it/s]
episode 2240 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 360it [00:10, 36.46it/s]
episode 2241 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 350it [00:09, 35.69it/s]
episode 2242 avg_loss: 0.002 total_reward [

episode 2309 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 447it [00:12, 35.61it/s]
episode 2310 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 492it [00:13, 36.47it/s]
episode 2311 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 414it [00:11, 36.21it/s]
episode 2312 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 570it [00:15, 36.16it/s]
episode 2313 avg_loss: 0.001 total_reward [train:15.000 test:-] e-greedy:0.900: : 642it [00:17, 36.19it/s]
episode 2314 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 289it [00:08, 36.05it/s]
episode 2315 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 446it [00:12, 35.94it/s]
episode 2316 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 471it [00:12, 36.31it/s]
episode 2317 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 663it [00:18, 36.39it/s]
episode 2318 avg_loss: 0.001 total_rewar

episode 2385 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 616it [00:17, 36.39it/s]
episode 2386 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 480it [00:13, 36.67it/s]
episode 2387 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 418it [00:11, 36.29it/s]
episode 2388 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 349it [00:09, 36.41it/s]
episode 2389 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 279it [00:07, 36.30it/s]
episode 2390 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 399it [00:11, 36.25it/s]
episode 2391 avg_loss: 0.001 total_reward [train:17.000 test:-] e-greedy:0.900: : 761it [00:20, 36.40it/s]
episode 2392 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 438it [00:12, 36.36it/s]
episode 2393 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:10, 36.26it/s]
episode 2394 avg_loss: 0.001 total_reward 

episode 2461 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 521it [00:14, 36.23it/s]
episode 2462 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 377it [00:10, 36.23it/s]
episode 2463 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 404it [00:11, 36.28it/s]
episode 2464 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 528it [00:14, 36.52it/s]
episode 2465 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 504it [00:13, 36.57it/s]
episode 2466 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 464it [00:12, 37.29it/s]
episode 2467 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 430it [00:11, 36.14it/s]
episode 2468 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 386it [00:10, 36.27it/s]
episode 2469 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 322it [00:08, 36.45it/s]
episode 2470 avg_loss: 0.001 total_reward [

episode 2537 avg_loss: 0.002 total_reward [train:13.000 test:-] e-greedy:0.900: : 592it [00:16, 36.46it/s]
episode 2538 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 467it [00:12, 36.20it/s]
episode 2539 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 262it [00:07, 35.94it/s]
episode 2540 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 36.15it/s]
episode 2541 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 592it [00:16, 36.96it/s]
episode 2542 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 385it [00:10, 36.03it/s]
episode 2543 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 548it [00:14, 36.73it/s]
episode 2544 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 560it [00:15, 37.17it/s]
episode 2545 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 491it [00:13, 36.67it/s]
episode 2546 avg_loss: 0.001 total_rewar

episode 2613 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 481it [00:13, 36.53it/s]
episode 2614 avg_loss: 0.001 total_reward [train:16.000 test:-] e-greedy:0.900: : 714it [00:19, 36.81it/s]
episode 2615 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 583it [00:15, 36.69it/s]
episode 2616 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 384it [00:10, 37.09it/s]
episode 2617 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 586it [00:16, 36.61it/s]
episode 2618 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 475it [00:12, 36.62it/s]
episode 2619 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 476it [00:12, 37.43it/s]
episode 2620 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 200it [00:05, 36.73it/s]
episode 2621 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 413it [00:11, 36.79it/s]
episode 2622 avg_loss: 0.001 total_reward 

episode 2690 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 387it [00:10, 36.73it/s]
episode 2691 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 473it [00:12, 36.88it/s]
episode 2692 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 474it [00:12, 36.71it/s]
episode 2693 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 236it [00:06, 36.72it/s]
episode 2694 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 591it [00:16, 36.41it/s]
episode 2695 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 347it [00:09, 36.29it/s]
episode 2696 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 36.41it/s]
episode 2697 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 215it [00:05, 35.95it/s]
episode 2698 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 394it [00:10, 36.61it/s]
episode 2699 avg_loss: 0.001 total_reward [t

episode 2767 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 372it [00:10, 36.99it/s]
episode 2768 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 498it [00:13, 36.78it/s]
episode 2769 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 521it [00:14, 36.52it/s]
episode 2770 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 420it [00:11, 36.81it/s]
episode 2771 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 506it [00:13, 36.37it/s]
episode 2772 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 443it [00:12, 36.67it/s]
episode 2773 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 452it [00:12, 37.06it/s]
episode 2774 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 657it [00:17, 36.90it/s]
episode 2775 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 583it [00:15, 36.73it/s]
episode 2776 avg_loss: 0.001 total_rewar

episode 2843 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 364it [00:09, 37.32it/s]
episode 2844 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.900: : 204it [00:05, 36.85it/s]
episode 2845 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 539it [00:14, 36.74it/s]
episode 2846 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 435it [00:11, 36.31it/s]
episode 2847 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 393it [00:10, 36.32it/s]
episode 2848 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 409it [00:11, 36.81it/s]
episode 2849 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:09, 36.53it/s]
episode 2850 avg_loss: 0.001 total_reward [train:10.000 test:9.000] e-greedy:0.900: : 449it [00:15, 28.91it/s]
episode 2851 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 194it [00:05, 35.86it/s]
episode 2852 avg_loss: 0.001 total_rewar

episode 2920 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 428it [00:11, 36.94it/s]
episode 2921 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 351it [00:09, 36.57it/s]
episode 2922 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 382it [00:10, 36.39it/s]
episode 2923 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 484it [00:13, 35.78it/s]
episode 2924 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 368it [00:09, 36.90it/s]
episode 2925 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 425it [00:11, 36.83it/s]
episode 2926 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 461it [00:12, 36.84it/s]
episode 2927 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 420it [00:11, 36.81it/s]
episode 2928 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 401it [00:10, 36.90it/s]
episode 2929 avg_loss: 0.001 total_reward [t

episode 2997 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 633it [00:17, 36.98it/s]
episode 2998 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 352it [00:09, 37.01it/s]
episode 2999 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 500it [00:13, 36.91it/s]
episode 3000 avg_loss: 0.001 total_reward [train:8.000 test:7.000] e-greedy:0.900: : 373it [00:12, 30.03it/s]
episode 3001 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 492it [00:13, 37.29it/s]
episode 3002 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 666it [00:18, 36.90it/s]
episode 3003 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 324it [00:08, 36.92it/s]
episode 3004 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 36.70it/s]
episode 3005 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 504it [00:13, 37.08it/s]
episode 3006 avg_loss: 0.002 total_rew

episode 3074 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 399it [00:10, 36.42it/s]
episode 3075 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 355it [00:09, 36.82it/s]
episode 3076 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 521it [00:14, 36.60it/s]
episode 3077 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 379it [00:10, 36.57it/s]
episode 3078 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 417it [00:11, 36.42it/s]
episode 3079 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 420it [00:11, 37.14it/s]
episode 3080 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 450it [00:12, 36.45it/s]
episode 3081 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 562it [00:15, 36.72it/s]
episode 3082 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 434it [00:11, 36.68it/s]
episode 3083 avg_loss: 0.002 total_reward [t

episode 3150 avg_loss: 0.001 total_reward [train:10.000 test:6.000] e-greedy:0.900: : 498it [00:15, 32.07it/s]
episode 3151 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 559it [00:15, 36.53it/s]
episode 3152 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 302it [00:08, 36.49it/s]
episode 3153 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 497it [00:13, 36.54it/s]
episode 3154 avg_loss: 0.001 total_reward [train:15.000 test:-] e-greedy:0.900: : 722it [00:19, 36.81it/s]
episode 3155 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 329it [00:09, 36.46it/s]
episode 3156 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 550it [00:14, 36.75it/s]
episode 3157 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 412it [00:11, 37.18it/s]
episode 3158 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 338it [00:09, 36.65it/s]
episode 3159 avg_loss: 0.002 total_re

episode 3226 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 249it [00:06, 36.44it/s]
episode 3227 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 423it [00:11, 36.52it/s]
episode 3228 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 406it [00:11, 36.48it/s]
episode 3229 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 394it [00:10, 36.43it/s]
episode 3230 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 291it [00:07, 36.40it/s]
episode 3231 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 409it [00:11, 36.87it/s]
episode 3232 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 221it [00:06, 36.03it/s]
episode 3233 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 275it [00:07, 36.55it/s]
episode 3234 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 248it [00:06, 37.59it/s]
episode 3235 avg_loss: 0.002 total_reward [tra

episode 3303 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 360it [00:09, 37.24it/s]
episode 3304 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 489it [00:13, 36.77it/s]
episode 3305 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 538it [00:14, 37.04it/s]
episode 3306 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:08, 36.54it/s]
episode 3307 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 349it [00:09, 36.81it/s]
episode 3308 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 321it [00:08, 36.22it/s]
episode 3309 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 637it [00:17, 36.71it/s]
episode 3310 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 446it [00:12, 36.96it/s]
episode 3311 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 36.99it/s]
episode 3312 avg_loss: 0.001 total_reward [

episode 3380 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 322it [00:08, 36.33it/s]
episode 3381 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 591it [00:16, 36.67it/s]
episode 3382 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 334it [00:09, 36.76it/s]
episode 3383 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 375it [00:10, 36.49it/s]
episode 3384 avg_loss: 0.001 total_reward [train:17.000 test:-] e-greedy:0.900: : 708it [00:19, 36.77it/s]
episode 3385 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 479it [00:13, 36.74it/s]
episode 3386 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 250it [00:06, 36.67it/s]
episode 3387 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 36.16it/s]
episode 3388 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 186it [00:05, 36.06it/s]
episode 3389 avg_loss: 0.001 total_reward [t

episode 3457 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 390it [00:10, 36.59it/s]
episode 3458 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 307it [00:08, 36.33it/s]
episode 3459 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 428it [00:11, 36.87it/s]
episode 3460 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 327it [00:08, 36.66it/s]
episode 3461 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 414it [00:11, 36.70it/s]
episode 3462 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 227it [00:06, 36.13it/s]
episode 3463 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 524it [00:14, 37.00it/s]
episode 3464 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 484it [00:13, 37.18it/s]
episode 3465 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 381it [00:10, 36.52it/s]
episode 3466 avg_loss: 0.001 total_reward [t

episode 3534 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.67it/s]
episode 3535 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 475it [00:12, 36.76it/s]
episode 3536 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 375it [00:10, 36.56it/s]
episode 3537 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 309it [00:08, 36.25it/s]
episode 3538 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 381it [00:10, 36.74it/s]
episode 3539 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 450it [00:12, 36.46it/s]
episode 3540 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 323it [00:08, 36.37it/s]
episode 3541 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 250it [00:06, 36.15it/s]
episode 3542 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 388it [00:10, 36.77it/s]
episode 3543 avg_loss: 0.001 total_reward [t

episode 3611 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 208it [00:05, 36.56it/s]
episode 3612 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 405it [00:11, 36.53it/s]
episode 3613 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:09, 36.28it/s]
episode 3614 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 535it [00:14, 36.59it/s]
episode 3615 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 465it [00:12, 36.47it/s]
episode 3616 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 363it [00:09, 36.40it/s]
episode 3617 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 494it [00:13, 36.61it/s]
episode 3618 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 428it [00:11, 37.01it/s]
episode 3619 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 36.25it/s]
episode 3620 avg_loss: 0.001 total_reward [t

episode 3687 avg_loss: 0.001 total_reward [train:13.000 test:-] e-greedy:0.900: : 559it [00:15, 36.83it/s]
episode 3688 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:08, 37.30it/s]
episode 3689 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 431it [00:11, 36.77it/s]
episode 3690 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 393it [00:10, 36.58it/s]
episode 3691 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 473it [00:12, 36.66it/s]
episode 3692 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 542it [00:14, 36.67it/s]
episode 3693 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 444it [00:12, 36.94it/s]
episode 3694 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 420it [00:11, 36.69it/s]
episode 3695 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 363it [00:09, 36.61it/s]
episode 3696 avg_loss: 0.002 total_reward [

episode 3763 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 492it [00:13, 36.87it/s]
episode 3764 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 563it [00:15, 36.77it/s]
episode 3765 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 520it [00:14, 37.34it/s]
episode 3766 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 339it [00:09, 36.57it/s]
episode 3767 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 375it [00:10, 36.67it/s]
episode 3768 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 386it [00:10, 36.16it/s]
episode 3769 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 228it [00:06, 36.72it/s]
episode 3770 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 393it [00:10, 36.47it/s]
episode 3771 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 261it [00:07, 36.25it/s]
episode 3772 avg_loss: 0.002 total_reward [

episode 3840 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 210it [00:05, 36.14it/s]
episode 3841 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 422it [00:11, 36.62it/s]
episode 3842 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 389it [00:10, 36.40it/s]
episode 3843 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 374it [00:10, 36.41it/s]
episode 3844 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 450it [00:12, 36.70it/s]
episode 3845 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 329it [00:09, 36.20it/s]
episode 3846 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 37.15it/s]
episode 3847 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 512it [00:13, 36.60it/s]
episode 3848 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 377it [00:10, 36.26it/s]
episode 3849 avg_loss: 0.002 total_reward [t

episode 3917 avg_loss: 0.001 total_reward [train:11.000 test:-] e-greedy:0.900: : 490it [00:13, 36.56it/s]
episode 3918 avg_loss: 0.001 total_reward [train:4.000 test:-] e-greedy:0.900: : 246it [00:06, 36.89it/s]
episode 3919 avg_loss: 0.001 total_reward [train:12.000 test:-] e-greedy:0.900: : 476it [00:12, 37.96it/s]
episode 3920 avg_loss: 0.001 total_reward [train:9.000 test:-] e-greedy:0.900: : 420it [00:11, 37.09it/s]
episode 3921 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.13it/s]
episode 3922 avg_loss: 0.001 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:10, 36.30it/s]
episode 3923 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 398it [00:10, 36.56it/s]
episode 3924 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 494it [00:13, 36.81it/s]
episode 3925 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 410it [00:11, 36.73it/s]
episode 3926 avg_loss: 0.002 total_reward [

episode 3994 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 185it [00:05, 36.23it/s]
episode 3995 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 232it [00:06, 37.47it/s]
episode 3996 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 478it [00:13, 36.47it/s]
episode 3997 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 338it [00:09, 36.33it/s]
episode 3998 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 480it [00:13, 36.99it/s]
episode 3999 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 209it [00:05, 36.57it/s]
episode 4000 avg_loss: 0.001 total_reward [train:7.000 test:6.000] e-greedy:0.900: : 345it [00:11, 30.01it/s]
episode 4001 avg_loss: 0.001 total_reward [train:8.000 test:-] e-greedy:0.900: : 387it [00:10, 36.70it/s]
episode 4002 avg_loss: 0.001 total_reward [train:10.000 test:-] e-greedy:0.900: : 459it [00:12, 36.44it/s]
episode 4003 avg_loss: 0.001 total_rewa

episode 4071 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 201it [00:05, 36.40it/s]
episode 4072 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 531it [00:14, 36.75it/s]
episode 4073 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 261it [00:07, 36.34it/s]
episode 4074 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 260it [00:07, 36.92it/s]
episode 4075 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 231it [00:06, 35.93it/s]
episode 4076 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 258it [00:07, 36.45it/s]
episode 4077 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 334it [00:09, 36.51it/s]
episode 4078 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 195it [00:05, 36.16it/s]
episode 4079 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 400it [00:10, 36.79it/s]
episode 4080 avg_loss: 0.002 total_reward [tr

episode 4148 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 381it [00:10, 36.37it/s]
episode 4149 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 442it [00:12, 36.49it/s]
episode 4150 avg_loss: 0.001 total_reward [train:5.000 test:14.000] e-greedy:0.900: : 294it [00:11, 24.96it/s]
episode 4151 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 207it [00:05, 35.95it/s]
episode 4152 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 387it [00:10, 36.59it/s]
episode 4153 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 363it [00:10, 36.21it/s]
episode 4154 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 486it [00:13, 36.44it/s]
episode 4155 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 479it [00:13, 36.30it/s]
episode 4156 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 437it [00:11, 36.72it/s]
episode 4157 avg_loss: 0.002 total_rewa

episode 4225 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 373it [00:10, 36.34it/s]
episode 4226 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 491it [00:13, 36.57it/s]
episode 4227 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 210it [00:05, 36.23it/s]
episode 4228 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 261it [00:07, 36.61it/s]
episode 4229 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:08, 36.69it/s]
episode 4230 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 520it [00:14, 36.80it/s]
episode 4231 avg_loss: 0.002 total_reward [train:15.000 test:-] e-greedy:0.900: : 658it [00:17, 36.64it/s]
episode 4232 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 340it [00:09, 36.98it/s]
episode 4233 avg_loss: 0.002 total_reward [train:14.000 test:-] e-greedy:0.900: : 600it [00:16, 36.84it/s]
episode 4234 avg_loss: 0.002 total_reward 

episode 4302 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 274it [00:07, 36.64it/s]
episode 4303 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 316it [00:08, 36.86it/s]
episode 4304 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 380it [00:10, 36.80it/s]
episode 4305 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 309it [00:08, 36.44it/s]
episode 4306 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 419it [00:11, 36.58it/s]
episode 4307 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 349it [00:09, 36.30it/s]
episode 4308 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 328it [00:08, 36.72it/s]
episode 4309 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 404it [00:11, 37.00it/s]
episode 4310 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 473it [00:12, 36.64it/s]
episode 4311 avg_loss: 0.002 total_reward [tra

episode 4379 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 339it [00:09, 36.36it/s]
episode 4380 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:06, 36.01it/s]
episode 4381 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 312it [00:08, 37.28it/s]
episode 4382 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 503it [00:13, 36.65it/s]
episode 4383 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 420it [00:11, 36.70it/s]
episode 4384 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 378it [00:10, 36.65it/s]
episode 4385 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 417it [00:11, 36.80it/s]
episode 4386 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 415it [00:11, 36.60it/s]
episode 4387 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 193it [00:05, 35.95it/s]
episode 4388 avg_loss: 0.002 total_reward [tr

episode 4456 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.65it/s]
episode 4457 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 431it [00:11, 36.50it/s]
episode 4458 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 343it [00:09, 36.79it/s]
episode 4459 avg_loss: 0.002 total_reward [train:13.000 test:-] e-greedy:0.900: : 593it [00:16, 36.74it/s]
episode 4460 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 501it [00:13, 36.59it/s]
episode 4461 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 286it [00:07, 36.57it/s]
episode 4462 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 213it [00:05, 35.70it/s]
episode 4463 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 447it [00:12, 36.67it/s]
episode 4464 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 293it [00:08, 36.36it/s]
episode 4465 avg_loss: 0.002 total_reward [t

episode 4533 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:05, 36.24it/s]
episode 4534 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 428it [00:11, 37.31it/s]
episode 4535 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.45it/s]
episode 4536 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 414it [00:11, 36.40it/s]
episode 4537 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 341it [00:09, 36.51it/s]
episode 4538 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 492it [00:13, 37.21it/s]
episode 4539 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 394it [00:10, 36.36it/s]
episode 4540 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 36.95it/s]
episode 4541 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 319it [00:08, 36.39it/s]
episode 4542 avg_loss: 0.002 total_reward [tr

episode 4610 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 36.68it/s]
episode 4611 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 387it [00:10, 36.54it/s]
episode 4612 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:06, 37.12it/s]
episode 4613 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 432it [00:11, 36.45it/s]
episode 4614 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 199it [00:05, 36.05it/s]
episode 4615 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 330it [00:09, 36.27it/s]
episode 4616 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 285it [00:07, 36.70it/s]
episode 4617 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 246it [00:06, 36.24it/s]
episode 4618 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:09, 36.62it/s]
episode 4619 avg_loss: 0.002 total_reward [tra

episode 4687 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 391it [00:10, 36.45it/s]
episode 4688 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 275it [00:07, 36.45it/s]
episode 4689 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 330it [00:09, 36.13it/s]
episode 4690 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 350it [00:09, 36.19it/s]
episode 4691 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 382it [00:10, 36.20it/s]
episode 4692 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 330it [00:09, 36.24it/s]
episode 4693 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 377it [00:10, 36.62it/s]
episode 4694 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 36.09it/s]
episode 4695 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 159it [00:04, 36.58it/s]
episode 4696 avg_loss: 0.002 total_reward [tra

episode 4764 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 354it [00:10, 33.66it/s]
episode 4765 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 438it [00:12, 34.84it/s]
episode 4766 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 290it [00:08, 35.08it/s]
episode 4767 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 509it [00:14, 34.12it/s]
episode 4768 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 304it [00:08, 37.00it/s]
episode 4769 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 304it [00:09, 18.50it/s]
episode 4770 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 333it [00:14, 23.22it/s]
episode 4771 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 277it [00:07, 35.82it/s]
episode 4772 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 290it [00:07, 36.77it/s]
episode 4773 avg_loss: 0.002 total_reward [tr

episode 4841 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 248it [00:06, 36.70it/s]
episode 4842 avg_loss: 0.001 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.44it/s]
episode 4843 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 569it [00:15, 36.66it/s]
episode 4844 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 495it [00:13, 36.50it/s]
episode 4845 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 36.43it/s]
episode 4846 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 227it [00:06, 36.16it/s]
episode 4847 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.900: : 200it [00:05, 36.62it/s]
episode 4848 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 421it [00:11, 36.53it/s]
episode 4849 avg_loss: 0.001 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:08, 37.10it/s]
episode 4850 avg_loss: 0.001 total_reward [t

episode 4918 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:08, 36.48it/s]
episode 4919 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 156it [00:04, 36.71it/s]
episode 4920 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 301it [00:08, 36.25it/s]
episode 4921 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 184it [00:05, 36.85it/s]
episode 4922 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 394it [00:10, 36.59it/s]
episode 4923 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 35.91it/s]
episode 4924 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 232it [00:06, 36.58it/s]
episode 4925 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.61it/s]
episode 4926 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 330it [00:09, 36.52it/s]
episode 4927 avg_loss: 0.002 total_reward [tra

episode 4995 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 154it [00:04, 35.63it/s]
episode 4996 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.48it/s]
episode 4997 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 268it [00:07, 37.51it/s]
episode 4998 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 447it [00:12, 36.38it/s]
episode 4999 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.33it/s]
episode 5000 avg_loss: 0.002 total_reward [train:3.000 test:4.000] e-greedy:0.900: : 220it [00:07, 36.99it/s]
episode 5001 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 264it [00:07, 37.26it/s]
episode 5002 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 292it [00:08, 37.15it/s]
episode 5003 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 325it [00:08, 36.31it/s]
episode 5004 avg_loss: 0.002 total_reward 

episode 5072 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:10, 37.40it/s]
episode 5073 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 368it [00:10, 37.02it/s]
episode 5074 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.47it/s]
episode 5075 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 340it [00:09, 36.64it/s]
episode 5076 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 423it [00:11, 36.33it/s]
episode 5077 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 288it [00:07, 37.11it/s]
episode 5078 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 291it [00:08, 36.28it/s]
episode 5079 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:08, 36.84it/s]
episode 5080 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 281it [00:07, 36.39it/s]
episode 5081 avg_loss: 0.002 total_reward [tra

episode 5149 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 36.49it/s]
episode 5150 avg_loss: 0.002 total_reward [train:6.000 test:10.000] e-greedy:0.900: : 319it [00:11, 27.69it/s]
episode 5151 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.44it/s]
episode 5152 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 302it [00:08, 36.57it/s]
episode 5153 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 420it [00:11, 36.96it/s]
episode 5154 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 328it [00:09, 36.43it/s]
episode 5155 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 276it [00:07, 36.67it/s]
episode 5156 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:08, 36.18it/s]
episode 5157 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:07, 36.19it/s]
episode 5158 avg_loss: 0.002 total_reward

episode 5226 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 385it [00:10, 36.82it/s]
episode 5227 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 36.44it/s]
episode 5228 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 308it [00:08, 36.98it/s]
episode 5229 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 246it [00:06, 36.31it/s]
episode 5230 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 274it [00:07, 36.55it/s]
episode 5231 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 36.74it/s]
episode 5232 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 365it [00:09, 36.51it/s]
episode 5233 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 338it [00:09, 36.53it/s]
episode 5234 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 301it [00:08, 36.27it/s]
episode 5235 avg_loss: 0.002 total_reward [tra

episode 5303 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 243it [00:06, 35.96it/s]
episode 5304 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 296it [00:08, 36.53it/s]
episode 5305 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 36.30it/s]
episode 5306 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 36.40it/s]
episode 5307 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 189it [00:05, 36.15it/s]
episode 5308 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 199it [00:05, 35.87it/s]
episode 5309 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 303it [00:08, 36.66it/s]
episode 5310 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 245it [00:06, 36.57it/s]
episode 5311 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.03it/s]
episode 5312 avg_loss: 0.002 total_reward [tra

episode 5380 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 211it [00:05, 36.13it/s]
episode 5381 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 265it [00:07, 36.16it/s]
episode 5382 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 352it [00:09, 36.88it/s]
episode 5383 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 245it [00:06, 36.07it/s]
episode 5384 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 292it [00:08, 37.15it/s]
episode 5385 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 553it [00:15, 36.76it/s]
episode 5386 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:09, 36.55it/s]
episode 5387 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 37.59it/s]
episode 5388 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 283it [00:07, 36.56it/s]
episode 5389 avg_loss: 0.002 total_reward [tr

episode 5457 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 261it [00:07, 36.12it/s]
episode 5458 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 36.28it/s]
episode 5459 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 37.46it/s]
episode 5460 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 36.78it/s]
episode 5461 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 36.79it/s]
episode 5462 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 37.24it/s]
episode 5463 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 358it [00:09, 36.59it/s]
episode 5464 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 446it [00:12, 36.86it/s]
episode 5465 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 367it [00:10, 36.67it/s]
episode 5466 avg_loss: 0.002 total_reward [tra

episode 5534 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 333it [00:09, 36.58it/s]
episode 5535 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 460it [00:12, 36.83it/s]
episode 5536 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 37.12it/s]
episode 5537 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 233it [00:06, 36.80it/s]
episode 5538 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.18it/s]
episode 5539 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 212it [00:05, 36.84it/s]
episode 5540 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 296it [00:08, 37.06it/s]
episode 5541 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.90it/s]
episode 5542 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 238it [00:06, 36.10it/s]
episode 5543 avg_loss: 0.002 total_reward [tra

episode 5611 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 37.31it/s]
episode 5612 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 341it [00:09, 36.60it/s]
episode 5613 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 36.30it/s]
episode 5614 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 36.47it/s]
episode 5615 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:08, 36.62it/s]
episode 5616 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 577it [00:15, 36.81it/s]
episode 5617 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 36.60it/s]
episode 5618 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 268it [00:07, 36.76it/s]
episode 5619 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:08, 36.39it/s]
episode 5620 avg_loss: 0.002 total_reward [tr

episode 5688 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 325it [00:08, 36.38it/s]
episode 5689 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 285it [00:07, 36.45it/s]
episode 5690 avg_loss: 0.002 total_reward [train:12.000 test:-] e-greedy:0.900: : 533it [00:14, 36.96it/s]
episode 5691 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 295it [00:08, 36.86it/s]
episode 5692 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 238it [00:06, 36.33it/s]
episode 5693 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:07, 36.70it/s]
episode 5694 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 243it [00:06, 36.49it/s]
episode 5695 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 363it [00:09, 36.51it/s]
episode 5696 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 36.59it/s]
episode 5697 avg_loss: 0.002 total_reward [tr

episode 5765 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 281it [00:07, 36.09it/s]
episode 5766 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:08, 36.74it/s]
episode 5767 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 305it [00:08, 36.40it/s]
episode 5768 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.56it/s]
episode 5769 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 353it [00:09, 36.28it/s]
episode 5770 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 276it [00:07, 36.86it/s]
episode 5771 avg_loss: 0.002 total_reward [train:13.000 test:-] e-greedy:0.900: : 613it [00:16, 36.65it/s]
episode 5772 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 328it [00:08, 36.63it/s]
episode 5773 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 36.44it/s]
episode 5774 avg_loss: 0.002 total_reward [tr

episode 5842 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 237it [00:06, 36.89it/s]
episode 5843 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 154it [00:04, 35.87it/s]
episode 5844 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 152it [00:04, 36.91it/s]
episode 5845 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 332it [00:09, 37.47it/s]
episode 5846 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 263it [00:07, 36.69it/s]
episode 5847 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 289it [00:07, 36.64it/s]
episode 5848 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 475it [00:12, 37.03it/s]
episode 5849 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:09, 36.52it/s]
episode 5850 avg_loss: 0.002 total_reward [train:6.000 test:6.000] e-greedy:0.900: : 301it [00:10, 28.97it/s]
episode 5851 avg_loss: 0.002 total_reward

episode 5919 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 37.06it/s]
episode 5920 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 189it [00:05, 36.59it/s]
episode 5921 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.47it/s]
episode 5922 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 156it [00:04, 36.83it/s]
episode 5923 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 347it [00:09, 36.34it/s]
episode 5924 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:08, 36.10it/s]
episode 5925 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 188it [00:05, 36.70it/s]
episode 5926 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 310it [00:08, 36.29it/s]
episode 5927 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:09, 36.16it/s]
episode 5928 avg_loss: 0.002 total_reward [tra

episode 5996 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 237it [00:06, 35.64it/s]
episode 5997 avg_loss: 0.002 total_reward [train:13.000 test:-] e-greedy:0.900: : 561it [00:15, 35.55it/s]
episode 5998 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 254it [00:07, 35.80it/s]
episode 5999 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 367it [00:10, 35.88it/s]
episode 6000 avg_loss: 0.002 total_reward [train:8.000 test:3.000] e-greedy:0.900: : 403it [00:12, 32.32it/s]
episode 6001 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 252it [00:06, 36.20it/s]
episode 6002 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 35.78it/s]
episode 6003 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 461it [00:12, 35.52it/s]
episode 6004 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 332it [00:09, 36.24it/s]
episode 6005 avg_loss: 0.002 total_reward

episode 6073 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 352it [00:09, 37.72it/s]
episode 6074 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 278it [00:07, 38.16it/s]
episode 6075 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:07, 37.89it/s]
episode 6076 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 339it [00:09, 37.66it/s]
episode 6077 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 236it [00:06, 35.02it/s]
episode 6078 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 209it [00:05, 36.69it/s]
episode 6079 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 277it [00:07, 37.90it/s]
episode 6080 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 292it [00:07, 36.29it/s]
episode 6081 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 234it [00:06, 37.60it/s]
episode 6082 avg_loss: 0.002 total_reward [tra

episode 6150 avg_loss: 0.002 total_reward [train:6.000 test:3.000] e-greedy:0.900: : 328it [00:10, 38.01it/s]
episode 6151 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 278it [00:07, 36.75it/s]
episode 6152 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 38.64it/s]
episode 6153 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 347it [00:09, 35.99it/s]
episode 6154 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 325it [00:08, 36.48it/s]
episode 6155 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 300it [00:08, 36.71it/s]
episode 6156 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 456it [00:12, 38.67it/s]
episode 6157 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 297it [00:08, 36.93it/s]
episode 6158 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 484it [00:12, 38.10it/s]
episode 6159 avg_loss: 0.002 total_rewar

episode 6227 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 281it [00:07, 37.44it/s]
episode 6228 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 254it [00:06, 37.32it/s]
episode 6229 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:08, 37.66it/s]
episode 6230 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 301it [00:08, 37.62it/s]
episode 6231 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 38.08it/s]
episode 6232 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:08, 37.17it/s]
episode 6233 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 444it [00:11, 37.68it/s]
episode 6234 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 270it [00:07, 37.57it/s]
episode 6235 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 341it [00:09, 37.63it/s]
episode 6236 avg_loss: 0.002 total_reward [tra

episode 6304 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 256it [00:07, 37.58it/s]
episode 6305 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 269it [00:07, 38.49it/s]
episode 6306 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 409it [00:10, 36.48it/s]
episode 6307 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 443it [00:11, 37.79it/s]
episode 6308 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 248it [00:06, 38.12it/s]
episode 6309 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 38.43it/s]
episode 6310 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:08, 37.81it/s]
episode 6311 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 37.69it/s]
episode 6312 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 229it [00:06, 37.11it/s]
episode 6313 avg_loss: 0.002 total_reward [tra

episode 6381 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 325it [00:08, 36.65it/s]
episode 6382 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 466it [00:12, 37.08it/s]
episode 6383 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 315it [00:08, 36.22it/s]
episode 6384 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 217it [00:06, 35.75it/s]
episode 6385 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 375it [00:10, 36.52it/s]
episode 6386 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 310it [00:08, 36.74it/s]
episode 6387 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 246it [00:06, 36.24it/s]
episode 6388 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 334it [00:09, 36.96it/s]
episode 6389 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 401it [00:10, 36.60it/s]
episode 6390 avg_loss: 0.003 total_reward [tra

episode 6458 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 37.59it/s]
episode 6459 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 306it [00:08, 37.49it/s]
episode 6460 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 410it [00:10, 37.61it/s]
episode 6461 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 294it [00:07, 37.58it/s]
episode 6462 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 185it [00:04, 38.07it/s]
episode 6463 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 347it [00:09, 37.47it/s]
episode 6464 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 521it [00:13, 38.05it/s]
episode 6465 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:05, 37.57it/s]
episode 6466 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 274it [00:07, 37.35it/s]
episode 6467 avg_loss: 0.002 total_reward [tra

episode 6535 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 379it [00:10, 37.36it/s]
episode 6536 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 375it [00:09, 37.74it/s]
episode 6537 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 37.31it/s]
episode 6538 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 276it [00:07, 37.68it/s]
episode 6539 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 154it [00:04, 36.78it/s]
episode 6540 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 382it [00:10, 37.35it/s]
episode 6541 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 156it [00:04, 37.87it/s]
episode 6542 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 326it [00:08, 37.64it/s]
episode 6543 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 274it [00:07, 37.44it/s]
episode 6544 avg_loss: 0.003 total_reward [tra

episode 6612 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 225it [00:06, 36.87it/s]
episode 6613 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 502it [00:13, 37.61it/s]
episode 6614 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:07, 37.22it/s]
episode 6615 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 426it [00:11, 37.81it/s]
episode 6616 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 452it [00:12, 37.64it/s]
episode 6617 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 212it [00:05, 38.73it/s]
episode 6618 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:08, 38.19it/s]
episode 6619 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 37.36it/s]
episode 6620 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 375it [00:09, 37.66it/s]
episode 6621 avg_loss: 0.002 total_reward [tr

episode 6689 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 304it [00:08, 38.22it/s]
episode 6690 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 278it [00:07, 37.13it/s]
episode 6691 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 277it [00:07, 37.38it/s]
episode 6692 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 342it [00:09, 37.47it/s]
episode 6693 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 647it [00:17, 37.68it/s]
episode 6694 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 343it [00:09, 37.87it/s]
episode 6695 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 282it [00:07, 37.68it/s]
episode 6696 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 311it [00:08, 37.46it/s]
episode 6697 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 490it [00:12, 37.87it/s]
episode 6698 avg_loss: 0.002 total_reward [tra

episode 6766 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 313it [00:08, 37.76it/s]
episode 6767 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:07, 37.77it/s]
episode 6768 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 393it [00:10, 37.33it/s]
episode 6769 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:06, 37.49it/s]
episode 6770 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 327it [00:08, 37.59it/s]
episode 6771 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 400it [00:10, 37.73it/s]
episode 6772 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 378it [00:10, 37.78it/s]
episode 6773 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 399it [00:10, 37.61it/s]
episode 6774 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 157it [00:04, 38.14it/s]
episode 6775 avg_loss: 0.002 total_reward [tra

episode 6843 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 321it [00:08, 36.89it/s]
episode 6844 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 656it [00:17, 35.02it/s]
episode 6845 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 328it [00:08, 38.06it/s]
episode 6846 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 427it [00:11, 37.71it/s]
episode 6847 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 452it [00:12, 37.89it/s]
episode 6848 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 533it [00:14, 37.57it/s]
episode 6849 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 343it [00:09, 37.48it/s]
episode 6850 avg_loss: 0.002 total_reward [train:8.000 test:5.000] e-greedy:0.900: : 401it [00:12, 31.41it/s]
episode 6851 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:08, 37.78it/s]
episode 6852 avg_loss: 0.002 total_rewar

episode 6920 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 371it [00:09, 37.44it/s]
episode 6921 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 454it [00:12, 37.68it/s]
episode 6922 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 249it [00:06, 38.05it/s]
episode 6923 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 336it [00:08, 38.15it/s]
episode 6924 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 318it [00:08, 37.67it/s]
episode 6925 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 37.48it/s]
episode 6926 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 305it [00:08, 37.46it/s]
episode 6927 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 382it [00:10, 37.40it/s]
episode 6928 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 395it [00:10, 37.88it/s]
episode 6929 avg_loss: 0.002 total_reward [tra

episode 6997 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:08, 38.68it/s]
episode 6998 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 383it [00:10, 37.89it/s]
episode 6999 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 435it [00:11, 37.41it/s]
episode 7000 avg_loss: 0.002 total_reward [train:5.000 test:5.000] e-greedy:0.900: : 317it [00:10, 29.30it/s]
episode 7001 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:08, 37.29it/s]
episode 7002 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 209it [00:05, 37.02it/s]
episode 7003 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.900: : 127it [00:03, 36.41it/s]
episode 7004 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 38.15it/s]
episode 7005 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 374it [00:09, 37.55it/s]
episode 7006 avg_loss: 0.002 total_reward 

episode 7074 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 205it [00:05, 37.14it/s]
episode 7075 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 315it [00:08, 35.96it/s]
episode 7076 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 382it [00:10, 35.67it/s]
episode 7077 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 410it [00:10, 37.48it/s]
episode 7078 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 399it [00:10, 37.49it/s]
episode 7079 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:09, 36.17it/s]
episode 7080 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 397it [00:10, 36.22it/s]
episode 7081 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 439it [00:12, 35.69it/s]
episode 7082 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:09, 36.59it/s]
episode 7083 avg_loss: 0.002 total_reward [tra

episode 7151 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 354it [00:09, 36.59it/s]
episode 7152 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 263it [00:07, 36.25it/s]
episode 7153 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 36.54it/s]
episode 7154 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 188it [00:05, 36.73it/s]
episode 7155 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 436it [00:11, 38.11it/s]
episode 7156 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 425it [00:11, 37.75it/s]
episode 7157 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 294it [00:07, 37.29it/s]
episode 7158 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 409it [00:10, 37.23it/s]
episode 7159 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 337it [00:09, 37.12it/s]
episode 7160 avg_loss: 0.002 total_reward [tra

episode 7228 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 239it [00:06, 37.25it/s]
episode 7229 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 343it [00:09, 37.01it/s]
episode 7230 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 219it [00:05, 36.83it/s]
episode 7231 avg_loss: 0.002 total_reward [train:11.000 test:-] e-greedy:0.900: : 538it [00:14, 37.33it/s]
episode 7232 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 265it [00:07, 37.65it/s]
episode 7233 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 339it [00:09, 36.98it/s]
episode 7234 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 36.89it/s]
episode 7235 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 410it [00:11, 37.20it/s]
episode 7236 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 249it [00:06, 37.28it/s]
episode 7237 avg_loss: 0.002 total_reward [tr

episode 7305 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 349it [00:09, 37.12it/s]
episode 7306 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 246it [00:06, 36.95it/s]
episode 7307 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 396it [00:10, 37.47it/s]
episode 7308 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 211it [00:05, 37.25it/s]
episode 7309 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 403it [00:10, 37.53it/s]
episode 7310 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 250it [00:06, 37.67it/s]
episode 7311 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 280it [00:07, 38.16it/s]
episode 7312 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 422it [00:11, 37.67it/s]
episode 7313 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 364it [00:09, 37.60it/s]
episode 7314 avg_loss: 0.002 total_reward [tra

episode 7382 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 37.34it/s]
episode 7383 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 295it [00:07, 37.22it/s]
episode 7384 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:08, 37.44it/s]
episode 7385 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 338it [00:09, 37.05it/s]
episode 7386 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 207it [00:05, 36.19it/s]
episode 7387 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 472it [00:12, 37.81it/s]
episode 7388 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 417it [00:11, 37.41it/s]
episode 7389 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 283it [00:07, 37.08it/s]
episode 7390 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 256it [00:06, 37.69it/s]
episode 7391 avg_loss: 0.002 total_reward [tra

episode 7459 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 458it [00:12, 37.58it/s]
episode 7460 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 464it [00:12, 37.24it/s]
episode 7461 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 318it [00:08, 37.61it/s]
episode 7462 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 472it [00:12, 38.26it/s]
episode 7463 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 294it [00:07, 37.47it/s]
episode 7464 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 395it [00:10, 37.38it/s]
episode 7465 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:08, 37.72it/s]
episode 7466 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 253it [00:06, 37.62it/s]
episode 7467 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 265it [00:07, 38.00it/s]
episode 7468 avg_loss: 0.002 total_reward [tr

episode 7536 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 352it [00:09, 36.48it/s]
episode 7537 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 347it [00:09, 36.18it/s]
episode 7538 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:07, 36.27it/s]
episode 7539 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 343it [00:09, 36.13it/s]
episode 7540 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 353it [00:09, 36.22it/s]
episode 7541 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 346it [00:09, 35.68it/s]
episode 7542 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 334it [00:09, 35.74it/s]
episode 7543 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 210it [00:05, 35.42it/s]
episode 7544 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 423it [00:11, 36.18it/s]
episode 7545 avg_loss: 0.002 total_reward [tra

episode 7613 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 381it [00:10, 36.48it/s]
episode 7614 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 270it [00:07, 36.47it/s]
episode 7615 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 384it [00:10, 37.58it/s]
episode 7616 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 364it [00:10, 36.25it/s]
episode 7617 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 278it [00:08, 35.70it/s]
episode 7618 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 443it [00:12, 35.30it/s]
episode 7619 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 283it [00:08, 35.32it/s]
episode 7620 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 292it [00:08, 36.98it/s]
episode 7621 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 266it [00:07, 36.52it/s]
episode 7622 avg_loss: 0.003 total_reward [tra

episode 7690 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 342it [00:09, 36.66it/s]
episode 7691 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 327it [00:08, 36.52it/s]
episode 7692 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 277it [00:07, 36.46it/s]
episode 7693 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 317it [00:08, 36.30it/s]
episode 7694 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 36.73it/s]
episode 7695 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:06, 36.32it/s]
episode 7696 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 319it [00:08, 36.70it/s]
episode 7697 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 440it [00:12, 37.03it/s]
episode 7698 avg_loss: 0.002 total_reward [train:15.000 test:-] e-greedy:0.900: : 634it [00:17, 36.68it/s]
episode 7699 avg_loss: 0.002 total_reward [tr

episode 7767 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 385it [00:10, 35.54it/s]
episode 7768 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 486it [00:13, 35.61it/s]
episode 7769 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:06, 37.25it/s]
episode 7770 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 197it [00:05, 36.02it/s]
episode 7771 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 382it [00:10, 36.47it/s]
episode 7772 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 506it [00:13, 36.88it/s]
episode 7773 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 473it [00:12, 36.58it/s]
episode 7774 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 491it [00:13, 36.70it/s]
episode 7775 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 239it [00:06, 35.86it/s]
episode 7776 avg_loss: 0.002 total_reward 

episode 7844 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 401it [00:11, 35.87it/s]
episode 7845 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 581it [00:16, 35.99it/s]
episode 7846 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 333it [00:09, 35.20it/s]
episode 7847 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:06, 36.28it/s]
episode 7848 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 287it [00:08, 35.30it/s]
episode 7849 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 245it [00:06, 35.18it/s]
episode 7850 avg_loss: 0.002 total_reward [train:5.000 test:7.000] e-greedy:0.900: : 349it [00:12, 28.59it/s]
episode 7851 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 423it [00:11, 35.51it/s]
episode 7852 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 313it [00:08, 35.71it/s]
episode 7853 avg_loss: 0.002 total_reward 

episode 7921 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 308it [00:08, 36.33it/s]
episode 7922 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 246it [00:06, 36.07it/s]
episode 7923 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 268it [00:07, 37.63it/s]
episode 7924 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 212it [00:05, 37.25it/s]
episode 7925 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 208it [00:05, 37.65it/s]
episode 7926 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 289it [00:08, 35.77it/s]
episode 7927 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:07, 34.71it/s]
episode 7928 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 208it [00:05, 37.91it/s]
episode 7929 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.22it/s]
episode 7930 avg_loss: 0.002 total_reward [tra

episode 7998 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:06, 36.40it/s]
episode 7999 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 35.89it/s]
episode 8000 avg_loss: 0.002 total_reward [train:7.000 test:3.000] e-greedy:0.900: : 341it [00:10, 31.30it/s]
episode 8001 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 226it [00:06, 36.75it/s]
episode 8002 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 393it [00:10, 36.51it/s]
episode 8003 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 280it [00:07, 37.13it/s]
episode 8004 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 416it [00:11, 37.03it/s]
episode 8005 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 36.30it/s]
episode 8006 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 387it [00:10, 36.57it/s]
episode 8007 avg_loss: 0.002 total_reward 

episode 8075 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 361it [00:09, 36.43it/s]
episode 8076 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 293it [00:08, 36.62it/s]
episode 8077 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 188it [00:05, 37.42it/s]
episode 8078 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 237it [00:06, 36.23it/s]
episode 8079 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 323it [00:08, 36.62it/s]
episode 8080 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 439it [00:11, 36.79it/s]
episode 8081 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 36.57it/s]
episode 8082 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 469it [00:12, 36.90it/s]
episode 8083 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 307it [00:08, 36.58it/s]
episode 8084 avg_loss: 0.003 total_reward [tr

episode 8152 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 265it [00:07, 36.54it/s]
episode 8153 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 311it [00:08, 35.25it/s]
episode 8154 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 389it [00:10, 36.02it/s]
episode 8155 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 36.43it/s]
episode 8156 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 266it [00:07, 36.18it/s]
episode 8157 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 388it [00:10, 36.25it/s]
episode 8158 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 222it [00:06, 36.74it/s]
episode 8159 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 426it [00:11, 37.26it/s]
episode 8160 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 213it [00:05, 37.04it/s]
episode 8161 avg_loss: 0.002 total_reward [tra

episode 8229 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 271it [00:07, 36.95it/s]
episode 8230 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 236it [00:06, 37.09it/s]
episode 8231 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 376it [00:10, 36.74it/s]
episode 8232 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 330it [00:09, 36.34it/s]
episode 8233 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 216it [00:05, 36.66it/s]
episode 8234 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:09, 36.69it/s]
episode 8235 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 265it [00:07, 36.60it/s]
episode 8236 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:07, 36.87it/s]
episode 8237 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:06, 36.98it/s]
episode 8238 avg_loss: 0.002 total_reward [tra

episode 8306 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:05, 35.60it/s]
episode 8307 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 210it [00:05, 36.14it/s]
episode 8308 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 286it [00:07, 36.83it/s]
episode 8309 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 199it [00:05, 36.40it/s]
episode 8310 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 310it [00:08, 37.18it/s]
episode 8311 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 301it [00:08, 36.66it/s]
episode 8312 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 235it [00:06, 36.27it/s]
episode 8313 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 358it [00:09, 36.91it/s]
episode 8314 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:06, 36.76it/s]
episode 8315 avg_loss: 0.002 total_reward [tra

episode 8383 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 221it [00:06, 35.98it/s]
episode 8384 avg_loss: 0.002 total_reward [train:13.000 test:-] e-greedy:0.900: : 938it [00:26, 35.79it/s]
episode 8385 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 360it [00:09, 37.63it/s]
episode 8386 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 203it [00:05, 36.66it/s]
episode 8387 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 35.03it/s]
episode 8388 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 289it [00:08, 35.53it/s]
episode 8389 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 200it [00:05, 35.73it/s]
episode 8390 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 352it [00:10, 36.84it/s]
episode 8391 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 260it [00:07, 37.26it/s]
episode 8392 avg_loss: 0.003 total_reward [tr

episode 8460 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 174it [00:05, 34.70it/s]
episode 8461 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 161it [00:04, 34.81it/s]
episode 8462 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 204it [00:05, 35.00it/s]
episode 8463 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 364it [00:09, 37.30it/s]
episode 8464 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 337it [00:09, 36.88it/s]
episode 8465 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 351it [00:09, 36.42it/s]
episode 8466 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 349it [00:09, 36.81it/s]
episode 8467 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:08, 36.42it/s]
episode 8468 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 253it [00:06, 36.16it/s]
episode 8469 avg_loss: 0.002 total_reward [tra

episode 8537 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 329it [00:09, 35.20it/s]
episode 8538 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 217it [00:06, 34.90it/s]
episode 8539 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 357it [00:09, 36.05it/s]
episode 8540 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 275it [00:07, 35.76it/s]
episode 8541 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 358it [00:09, 36.27it/s]
episode 8542 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:08, 36.03it/s]
episode 8543 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 334it [00:09, 35.14it/s]
episode 8544 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 264it [00:07, 35.84it/s]
episode 8545 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 253it [00:07, 35.06it/s]
episode 8546 avg_loss: 0.002 total_reward [tra

episode 8614 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 245it [00:06, 36.90it/s]
episode 8615 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 201it [00:05, 36.42it/s]
episode 8616 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:10, 37.52it/s]
episode 8617 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 350it [00:09, 36.97it/s]
episode 8618 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 235it [00:06, 35.75it/s]
episode 8619 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 306it [00:08, 34.45it/s]
episode 8620 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 275it [00:07, 36.34it/s]
episode 8621 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 520it [00:14, 37.49it/s]
episode 8622 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 265it [00:07, 36.84it/s]
episode 8623 avg_loss: 0.002 total_reward [tr

episode 8691 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 419it [00:11, 36.84it/s]
episode 8692 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 270it [00:07, 36.35it/s]
episode 8693 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 279it [00:07, 36.87it/s]
episode 8694 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 197it [00:05, 36.27it/s]
episode 8695 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 278it [00:07, 36.54it/s]
episode 8696 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 332it [00:09, 36.78it/s]
episode 8697 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 187it [00:05, 35.78it/s]
episode 8698 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 353it [00:09, 36.53it/s]
episode 8699 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 448it [00:12, 37.05it/s]
episode 8700 avg_loss: 0.002 total_reward [tra

episode 8768 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:06, 37.37it/s]
episode 8769 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 253it [00:06, 36.40it/s]
episode 8770 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 301it [00:08, 36.28it/s]
episode 8771 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.900: : 197it [00:05, 36.18it/s]
episode 8772 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 268it [00:07, 37.07it/s]
episode 8773 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 353it [00:09, 36.57it/s]
episode 8774 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 208it [00:05, 36.53it/s]
episode 8775 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 327it [00:09, 35.17it/s]
episode 8776 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 355it [00:09, 35.87it/s]
episode 8777 avg_loss: 0.003 total_reward [tra

episode 8845 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 269it [00:07, 34.67it/s]
episode 8846 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 299it [00:08, 35.31it/s]
episode 8847 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.900: : 218it [00:06, 35.39it/s]
episode 8848 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 411it [00:11, 35.47it/s]
episode 8849 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 433it [00:12, 35.63it/s]
episode 8850 avg_loss: 0.002 total_reward [train:5.000 test:5.000] e-greedy:0.900: : 308it [00:10, 34.87it/s]
episode 8851 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 262it [00:07, 35.00it/s]
episode 8852 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 258it [00:07, 34.80it/s]
episode 8853 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 278it [00:07, 35.08it/s]
episode 8854 avg_loss: 0.002 total_reward 

episode 8922 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 380it [00:10, 35.89it/s]
episode 8923 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 319it [00:08, 36.43it/s]
episode 8924 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:06, 36.31it/s]
episode 8925 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 243it [00:06, 36.20it/s]
episode 8926 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 386it [00:10, 36.30it/s]
episode 8927 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 316it [00:08, 36.91it/s]
episode 8928 avg_loss: 0.002 total_reward [train:10.000 test:-] e-greedy:0.900: : 694it [00:18, 36.66it/s]
episode 8929 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 436it [00:12, 31.86it/s]
episode 8930 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 216it [00:06, 35.94it/s]
episode 8931 avg_loss: 0.002 total_reward [tr

episode 8999 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 504it [00:13, 37.18it/s]
episode 9000 avg_loss: 0.003 total_reward [train:7.000 test:6.000] e-greedy:0.900: : 375it [00:12, 30.15it/s]
episode 9001 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.62it/s]
episode 9002 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 223it [00:06, 36.26it/s]
episode 9003 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 361it [00:09, 36.40it/s]
episode 9004 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 247it [00:06, 36.18it/s]
episode 9005 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 308it [00:08, 36.37it/s]
episode 9006 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 355it [00:10, 34.94it/s]
episode 9007 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 286it [00:08, 35.63it/s]
episode 9008 avg_loss: 0.002 total_reward 

episode 9076 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:07, 36.32it/s]
episode 9077 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 302it [00:08, 36.01it/s]
episode 9078 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:06, 36.51it/s]
episode 9079 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 243it [00:06, 36.19it/s]
episode 9080 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.900: : 290it [00:07, 36.77it/s]
episode 9081 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 243it [00:06, 36.21it/s]
episode 9082 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 371it [00:10, 36.61it/s]
episode 9083 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 460it [00:12, 36.93it/s]
episode 9084 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 331it [00:09, 36.42it/s]
episode 9085 avg_loss: 0.003 total_reward [tra

episode 9153 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 309it [00:08, 36.46it/s]
episode 9154 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 307it [00:08, 36.12it/s]
episode 9155 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 285it [00:07, 36.62it/s]
episode 9156 avg_loss: 0.002 total_reward [train:9.000 test:-] e-greedy:0.900: : 462it [00:12, 36.57it/s]
episode 9157 avg_loss: 0.002 total_reward [train:5.000 test:-] e-greedy:0.900: : 287it [00:07, 36.62it/s]
episode 9158 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 335it [00:09, 36.45it/s]
episode 9159 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 346it [00:09, 36.65it/s]
episode 9160 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 296it [00:08, 37.02it/s]
episode 9161 avg_loss: 0.002 total_reward [train:6.000 test:-] e-greedy:0.900: : 361it [00:09, 36.67it/s]
episode 9162 avg_loss: 0.003 total_reward [tra

episode 9230 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 231it [00:06, 35.36it/s]
episode 9231 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 464it [00:12, 34.86it/s]
episode 9232 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 322it [00:09, 33.05it/s]
episode 9233 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:09, 36.54it/s]
episode 9234 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 333it [00:09, 35.42it/s]
episode 9235 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 247it [00:06, 36.06it/s]
episode 9236 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 389it [00:10, 36.12it/s]
episode 9237 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 342it [00:09, 36.30it/s]
episode 9238 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 248it [00:06, 36.18it/s]
episode 9239 avg_loss: 0.003 total_reward [tra

episode 9307 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 257it [00:07, 35.59it/s]
episode 9308 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 382it [00:10, 35.90it/s]
episode 9309 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 289it [00:07, 36.13it/s]
episode 9310 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 391it [00:10, 36.21it/s]
episode 9311 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 328it [00:09, 36.62it/s]
episode 9312 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 264it [00:07, 33.06it/s]
episode 9313 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 337it [00:09, 35.78it/s]
episode 9314 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 251it [00:07, 35.81it/s]
episode 9315 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 365it [00:10, 35.48it/s]
episode 9316 avg_loss: 0.003 total_reward [tra

episode 9384 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 387it [00:10, 36.00it/s]
episode 9385 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 255it [00:06, 36.68it/s]
episode 9386 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 300it [00:08, 36.65it/s]
episode 9387 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 370it [00:10, 36.25it/s]
episode 9388 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 528it [00:14, 36.00it/s]
episode 9389 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 355it [00:09, 36.14it/s]
episode 9390 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 468it [00:12, 36.35it/s]
episode 9391 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 284it [00:07, 37.10it/s]
episode 9392 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 473it [00:12, 36.57it/s]
episode 9393 avg_loss: 0.003 total_reward [t

episode 9461 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 437it [00:12, 35.86it/s]
episode 9462 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 405it [00:11, 35.73it/s]
episode 9463 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 392it [00:10, 36.63it/s]
episode 9464 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 215it [00:06, 35.57it/s]
episode 9465 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 390it [00:10, 35.48it/s]
episode 9466 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 419it [00:11, 35.68it/s]
episode 9467 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 241it [00:06, 35.14it/s]
episode 9468 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 363it [00:10, 35.50it/s]
episode 9469 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 227it [00:06, 36.24it/s]
episode 9470 avg_loss: 0.003 total_reward [tra

episode 9538 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 267it [00:07, 35.41it/s]
episode 9539 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 316it [00:08, 36.02it/s]
episode 9540 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 389it [00:10, 35.75it/s]
episode 9541 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 376it [00:10, 37.25it/s]
episode 9542 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 436it [00:12, 35.44it/s]
episode 9543 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 256it [00:07, 35.08it/s]
episode 9544 avg_loss: 0.002 total_reward [train:8.000 test:-] e-greedy:0.900: : 390it [00:10, 35.59it/s]
episode 9545 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 443it [00:12, 35.80it/s]
episode 9546 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 342it [00:09, 34.58it/s]
episode 9547 avg_loss: 0.003 total_reward [tra

episode 9615 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 229it [00:06, 36.20it/s]
episode 9616 avg_loss: 0.002 total_reward [train:7.000 test:-] e-greedy:0.900: : 341it [00:09, 36.61it/s]
episode 9617 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 324it [00:08, 37.56it/s]
episode 9618 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 267it [00:07, 36.50it/s]
episode 9619 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 272it [00:07, 36.79it/s]
episode 9620 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 308it [00:08, 36.94it/s]
episode 9621 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 298it [00:08, 36.57it/s]
episode 9622 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 256it [00:07, 36.89it/s]
episode 9623 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 291it [00:08, 36.35it/s]
episode 9624 avg_loss: 0.003 total_reward [tra

episode 9692 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 372it [00:10, 37.08it/s]
episode 9693 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 312it [00:08, 36.48it/s]
episode 9694 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 339it [00:09, 34.97it/s]
episode 9695 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.900: : 322it [00:09, 34.65it/s]
episode 9696 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 411it [00:11, 36.00it/s]
episode 9697 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 267it [00:07, 35.91it/s]
episode 9698 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 417it [00:11, 36.11it/s]
episode 9699 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 464it [00:12, 36.65it/s]
episode 9700 avg_loss: 0.003 total_reward [train:6.000 test:4.000] e-greedy:0.900: : 368it [00:12, 35.78it/s]
episode 9701 avg_loss: 0.003 total_reward 

episode 9769 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 359it [00:09, 36.55it/s]
episode 9770 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 310it [00:08, 36.27it/s]
episode 9771 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 313it [00:08, 36.24it/s]
episode 9772 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 411it [00:11, 36.28it/s]
episode 9773 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 502it [00:13, 36.64it/s]
episode 9774 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 785it [00:21, 36.77it/s]
episode 9775 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 254it [00:06, 36.53it/s]
episode 9776 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 244it [00:06, 36.65it/s]
episode 9777 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 507it [00:13, 36.87it/s]
episode 9778 avg_loss: 0.003 total_reward [

episode 9846 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.900: : 206it [00:05, 35.41it/s]
episode 9847 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 253it [00:07, 35.10it/s]
episode 9848 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 390it [00:11, 35.36it/s]
episode 9849 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 412it [00:11, 36.32it/s]
episode 9850 avg_loss: 0.003 total_reward [train:6.000 test:4.000] e-greedy:0.900: : 314it [00:10, 29.61it/s]
episode 9851 avg_loss: 0.003 total_reward [train:9.000 test:-] e-greedy:0.900: : 460it [00:13, 35.53it/s]
episode 9852 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 252it [00:07, 35.44it/s]
episode 9853 avg_loss: 0.003 total_reward [train:8.000 test:-] e-greedy:0.900: : 408it [00:11, 34.83it/s]
episode 9854 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 415it [00:11, 35.11it/s]
episode 9855 avg_loss: 0.003 total_reward 

episode 9923 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 242it [00:06, 36.77it/s]
episode 9924 avg_loss: 0.003 total_reward [train:10.000 test:-] e-greedy:0.900: : 470it [00:12, 36.21it/s]
episode 9925 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.900: : 257it [00:07, 34.86it/s]
episode 9926 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.900: : 377it [00:10, 36.52it/s]
episode 9927 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 314it [00:08, 36.14it/s]
episode 9928 avg_loss: 0.003 total_reward [train:11.000 test:-] e-greedy:0.900: : 737it [00:20, 36.70it/s]
episode 9929 avg_loss: 0.003 total_reward [train:6.000 test:-] e-greedy:0.900: : 338it [00:09, 36.62it/s]
episode 9930 each step reward:2.000: : 105it [00:02, 36.52it/s]

In [None]:
import time
start_t = time.time()
a = np.random.permutation(int(1e1))
print(time.time()-start_t)

# 