# DQNの解説

![image.png](attachment:image.png)

画像引用:  
https://qiita.com/sugulu/items/3c7d6cbe600d455e853b

### DQNの特徴
- Q学習において状態行動テーブルを関数で表したもの.
- 離散的な行動を扱うことができる.

参考:  
http://blog.syundo.org/post/20171208-reinforcement-learning-dqn-and-impl/

### OpenAI gymのインストール

githubのレポジトリを参考に, gymモジュールをインストールしてください.  
https://github.com/openai/gym

In [1]:
import gym
import numpy as np
import renom as rm
import matplotlib.pyplot as plt
from renom.utility.initializer import Gaussian
from renom.cuda import set_cuda_active
from renom_rl.dqn import DQN
from renom_rl.env import BaseEnv
from gym.core import Env
from PIL import Image
from logging import getLogger, StreamHandler, DEBUG, FileHandler

logger = getLogger(__name__)
handler = FileHandler(os.path.join(save_dir, "train.log"))
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False
logger.debug("Train Started.")

set_cuda_active(True)
env = gym.make('BreakoutNoFrameskip-v4')

class CustomEnv(BaseEnv):
    
    def __init__(self, env):
        self.env = env
        self.action_shape = 4
        self.state_shape = (4, 84, 84)
        self.previous_frames = []
        super(CustomEnv, self).__init__()
    
    def reset(self):
        self.env.reset()
        n_step = np.random.randint(4, 32+1)
        for _ in range(n_step):
            state, _, _ = self.step(self.env.action_space.sample())
        return state
    
    def sample(self):
        return self.env.action_space.sample()
    
    def render(self):
        self.env.render()

    def _preprocess(self, state):
        resized_image = Image.fromarray(state).resize((84, 110)).convert('L')
        image_array = np.asarray(resized_image)/255.
        final_image = image_array[26:110]
        # Confirm that the image is processed correctly.
        # Image.fromarray(np.clip(final_image.reshape(84, 84)*255, 0, 255).astype(np.uint8)).save("test.png")
        return final_image
    
    def step(self, action):
        state_list = []
        reward_list = []
        terminal = False
        for _ in range(4):
            # Use last frame. Other frames will be skipped.
            s, r, t, _ = self.env.step(action)
            state = self._preprocess(s)
            reward_list.append(r)
            if t:
                terminal = True
                
        if len(self.previous_frames) > 3:
            self.previous_frames = self.previous_frames[1:] + [state]
        else:
            self.previous_frames += [state]
        state = np.stack(self.previous_frames)
        return state, np.sum(reward_list), terminal
    
custom_env = CustomEnv(env)
q_network = rm.Sequential([rm.Conv2d(32, filter=8, stride=4, ignore_bias=True),
                           rm.Relu(),
                           rm.Conv2d(64, filter=4, stride=2, ignore_bias=True),
                           rm.Relu(),
                           rm.Conv2d(64, filter=3, stride=1, ignore_bias=True),
                           rm.Relu(), 
                           rm.Flatten(), 
                           rm.Dense(512, ignore_bias=True),
                           rm.Relu(),
                           rm.Dense(custom_env.action_shape, ignore_bias=True)])

In [None]:
model = DQN(custom_env, q_network)

In [None]:
model.fit(render=True, greedy_step=1000000)

Run random 5000 step for storing experiences


episode 001 avg_loss: 0.004 total_reward [train:2.000 test:-] e-greedy:0.000: : 190it [00:03, 48.42it/s]
episode 002 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.000: : 126it [00:02, 50.59it/s]
episode 003 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.001: : 250it [00:04, 51.31it/s]
episode 004 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.001: : 113it [00:02, 50.77it/s]
episode 005 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.001: : 142it [00:02, 50.81it/s]
episode 006 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.001: : 170it [00:03, 50.65it/s]
episode 007 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.001: : 127it [00:02, 50.78it/s]
episode 008 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.001: : 108it [00:02, 50.95it/s]
episode 009 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.001: : 127it [00:02, 50.53it/s]
episode 010 avg_loss: 0.002 total_reward [train:2.000 t

episode 078 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.011: : 172it [00:03, 51.62it/s]
episode 079 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.011: : 135it [00:02, 51.39it/s]
episode 080 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.011: : 209it [00:04, 50.52it/s]
episode 081 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.011: : 115it [00:02, 50.47it/s]
episode 082 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.012: : 196it [00:03, 51.33it/s]
episode 083 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.012: : 201it [00:03, 51.95it/s]
episode 084 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.012: : 144it [00:02, 51.11it/s]
episode 085 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.012: : 154it [00:02, 51.35it/s]
episode 086 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.012: : 151it [00:02, 51.41it/s]
episode 087 avg_loss: 0.002 total_reward [train:0.000 t

episode 155 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.023: : 247it [00:04, 49.55it/s]
episode 156 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.023: : 127it [00:02, 50.03it/s]
episode 157 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.023: : 261it [00:05, 51.10it/s]
episode 158 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.023: : 179it [00:03, 50.08it/s]
episode 159 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.023: : 113it [00:02, 50.77it/s]
episode 160 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.023: : 216it [00:04, 50.41it/s]
episode 161 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.024: : 224it [00:04, 49.13it/s]
episode 162 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.024: : 124it [00:02, 51.53it/s]
episode 163 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.024: : 216it [00:04, 51.00it/s]
episode 164 avg_loss: 0.002 total_reward [train:0.000 t

episode 233 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.034: : 131it [00:02, 46.95it/s]
episode 234 avg_loss: 0.001 total_reward [train:2.000 test:-] e-greedy:0.034: : 191it [00:03, 53.83it/s]
episode 235 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.034: : 203it [00:03, 53.99it/s]
episode 236 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.034: : 293it [00:05, 52.32it/s]
episode 237 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.035: : 118it [00:02, 50.89it/s]
episode 238 avg_loss: 0.001 total_reward [train:3.000 test:-] e-greedy:0.035: : 250it [00:04, 51.22it/s]
episode 239 avg_loss: 0.001 total_reward [train:0.000 test:-] e-greedy:0.035: : 108it [00:02, 53.00it/s]
episode 240 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.035: : 307it [00:06, 49.12it/s]
episode 241 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.035: : 128it [00:02, 52.75it/s]
episode 242 avg_loss: 0.001 total_reward [train:0.000 t

episode 310 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.045: : 110it [00:02, 54.01it/s]
episode 311 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.045: : 191it [00:03, 52.31it/s]
episode 312 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.046: : 166it [00:03, 51.05it/s]
episode 313 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.046: : 195it [00:03, 50.83it/s]
episode 314 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.046: : 207it [00:04, 51.45it/s]
episode 315 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.046: : 175it [00:03, 51.13it/s]
episode 316 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.046: : 129it [00:02, 51.03it/s]
episode 317 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.046: : 171it [00:03, 51.31it/s]
episode 318 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.047: : 185it [00:03, 51.86it/s]
episode 319 avg_loss: 0.002 total_reward [train:0.000 t

episode 387 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.057: : 118it [00:03, 37.44it/s]
episode 388 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.057: : 196it [00:05, 38.94it/s]
episode 389 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.057: : 175it [00:04, 38.08it/s]
episode 390 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.057: : 148it [00:03, 39.09it/s]
episode 391 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.058: : 218it [00:05, 39.36it/s]
episode 392 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.058: : 158it [00:03, 39.67it/s]
episode 393 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.058: : 118it [00:02, 36.28it/s]
episode 394 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.058: : 132it [00:03, 38.96it/s]
episode 395 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.058: : 201it [00:05, 38.36it/s]
episode 396 avg_loss: 0.002 total_reward [train:1.000 t

episode 464 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.068: : 116it [00:02, 40.14it/s]
episode 465 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.068: : 206it [00:05, 39.62it/s]
episode 466 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.068: : 126it [00:03, 39.33it/s]
episode 467 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.068: : 143it [00:03, 37.81it/s]
episode 468 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.069: : 150it [00:03, 39.03it/s]
episode 469 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.069: : 143it [00:03, 38.37it/s]
episode 470 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.069: : 113it [00:02, 38.37it/s]
episode 471 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.069: : 293it [00:07, 39.60it/s]
episode 472 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.069: : 140it [00:03, 40.10it/s]
episode 473 avg_loss: 0.002 total_reward [train:2.000 t

episode 542 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.079: : 159it [00:03, 41.10it/s]
episode 543 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.079: : 106it [00:02, 41.23it/s]
episode 544 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.079: : 108it [00:02, 41.09it/s]
episode 545 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.079: : 181it [00:04, 41.35it/s]
episode 546 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.079: : 190it [00:04, 41.24it/s]
episode 547 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.080: : 134it [00:03, 42.96it/s]
episode 548 avg_loss: 0.001 total_reward [train:1.000 test:-] e-greedy:0.080: : 141it [00:03, 40.76it/s]
episode 549 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.080: : 182it [00:04, 41.12it/s]
episode 550 avg_loss: 0.001 total_reward [train:0.000 test:0.000] e-greedy:0.080: : 132it [00:04, 32.74it/s]
episode 551 avg_loss: 0.002 total_reward [train:2.0

episode 619 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.090: : 111it [00:02, 40.86it/s]
episode 620 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.090: : 121it [00:02, 41.06it/s]
episode 621 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.091: : 222it [00:05, 40.48it/s]
episode 622 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.091: : 199it [00:04, 40.28it/s]
episode 623 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.091: : 128it [00:03, 41.40it/s]
episode 624 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.091: : 195it [00:04, 40.85it/s]
episode 625 avg_loss: 0.002 total_reward [train:4.000 test:-] e-greedy:0.091: : 263it [00:06, 44.58it/s]
episode 626 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.092: : 146it [00:03, 40.62it/s]
episode 627 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.092: : 114it [00:02, 40.90it/s]
episode 628 avg_loss: 0.003 total_reward [train:1.000 t

episode 696 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.101: : 215it [00:05, 40.99it/s]
episode 697 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.101: : 155it [00:03, 40.75it/s]
episode 698 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.101: : 212it [00:05, 41.11it/s]
episode 699 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.102: : 203it [00:04, 40.90it/s]
episode 700 avg_loss: 0.002 total_reward [train:1.000 test:0.000] e-greedy:0.102: : 130it [00:04, 30.63it/s]
episode 701 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.102: : 152it [00:03, 41.06it/s]
episode 702 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.102: : 162it [00:03, 40.97it/s]
episode 703 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.102: : 171it [00:04, 41.42it/s]
episode 704 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.102: : 117it [00:02, 41.80it/s]
episode 705 avg_loss: 0.002 total_reward [train:1.0

episode 773 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.112: : 247it [00:06, 41.09it/s]
episode 774 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.112: : 137it [00:03, 40.48it/s]
episode 775 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.112: : 109it [00:02, 42.17it/s]
episode 776 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.112: : 212it [00:05, 41.54it/s]
episode 777 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.112: : 130it [00:03, 41.32it/s]
episode 778 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.112: : 209it [00:05, 41.02it/s]
episode 779 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.112: : 143it [00:03, 41.27it/s]
episode 780 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.113: : 243it [00:06, 40.49it/s]
episode 781 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.113: : 110it [00:02, 35.74it/s]
episode 782 avg_loss: 0.002 total_reward [train:1.000 t

episode 850 avg_loss: 0.002 total_reward [train:1.000 test:0.000] e-greedy:0.122: : 138it [00:04, 34.20it/s]
episode 851 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.122: : 116it [00:02, 39.12it/s]
episode 852 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.123: : 121it [00:03, 37.58it/s]
episode 853 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.123: : 142it [00:03, 39.36it/s]
episode 854 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.123: : 110it [00:02, 39.24it/s]
episode 855 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.123: : 151it [00:03, 35.34it/s]
episode 856 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.123: : 160it [00:03, 45.40it/s]
episode 857 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.123: : 129it [00:03, 40.92it/s]
episode 858 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.123: : 206it [00:05, 37.59it/s]
episode 859 avg_loss: 0.002 total_reward [train:1.0

episode 927 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.132: : 108it [00:02, 39.73it/s]
episode 928 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.132: : 105it [00:02, 38.92it/s]
episode 929 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.132: : 121it [00:03, 38.95it/s]
episode 930 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.132: : 150it [00:03, 39.16it/s]
episode 931 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.132: : 157it [00:04, 39.11it/s]
episode 932 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.133: : 147it [00:03, 38.58it/s]
episode 933 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.133: : 159it [00:04, 39.08it/s]
episode 934 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.133: : 130it [00:03, 41.45it/s]
episode 935 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.133: : 105it [00:02, 38.35it/s]
episode 936 avg_loss: 0.003 total_reward [train:0.000 t

episode 1004 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.142: : 122it [00:03, 39.05it/s]
episode 1005 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.142: : 259it [00:06, 37.98it/s]
episode 1006 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.143: : 179it [00:04, 41.22it/s]
episode 1007 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.143: : 193it [00:04, 39.86it/s]
episode 1008 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.143: : 153it [00:03, 39.90it/s]
episode 1009 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.143: : 160it [00:03, 40.31it/s]
episode 1010 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.143: : 147it [00:03, 40.18it/s]
episode 1011 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.143: : 121it [00:03, 40.16it/s]
episode 1012 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.143: : 134it [00:03, 41.40it/s]
episode 1013 avg_loss: 0.002 total_reward [tra

episode 1081 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.153: : 125it [00:03, 40.71it/s]
episode 1082 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.153: : 193it [00:04, 40.68it/s]
episode 1083 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.154: : 181it [00:04, 40.78it/s]
episode 1084 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.154: : 122it [00:02, 40.83it/s]
episode 1085 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.154: : 118it [00:02, 40.54it/s]
episode 1086 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.154: : 109it [00:02, 40.81it/s]
episode 1087 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.154: : 110it [00:02, 40.11it/s]
episode 1088 avg_loss: 0.002 total_reward [train:1.000 test:-] e-greedy:0.154: : 152it [00:03, 40.72it/s]
episode 1089 avg_loss: 0.002 total_reward [train:3.000 test:-] e-greedy:0.154: : 223it [00:05, 40.12it/s]
episode 1090 avg_loss: 0.003 total_reward [tra

episode 1158 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.163: : 104it [00:02, 39.25it/s]
episode 1159 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.163: : 159it [00:03, 39.78it/s]
episode 1160 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.163: : 97it [00:02, 38.80it/s]
episode 1161 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.164: : 219it [00:05, 39.98it/s]
episode 1162 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.164: : 108it [00:02, 40.11it/s]
episode 1163 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.164: : 138it [00:03, 39.18it/s]
episode 1164 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.164: : 121it [00:03, 39.96it/s]
episode 1165 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.164: : 108it [00:02, 39.56it/s]
episode 1166 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.164: : 120it [00:03, 38.99it/s]
episode 1167 avg_loss: 0.002 total_reward [trai

episode 1235 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.174: : 188it [00:04, 41.48it/s]
episode 1236 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.174: : 111it [00:02, 39.86it/s]
episode 1237 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.174: : 125it [00:03, 40.83it/s]
episode 1238 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.174: : 134it [00:03, 40.02it/s]
episode 1239 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.174: : 143it [00:03, 40.36it/s]
episode 1240 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.175: : 94it [00:02, 43.25it/s]
episode 1241 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.175: : 278it [00:07, 38.92it/s]
episode 1242 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.175: : 200it [00:05, 38.76it/s]
episode 1243 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.175: : 137it [00:03, 39.30it/s]
episode 1244 avg_loss: 0.002 total_reward [trai

episode 1312 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.186: : 108it [00:02, 40.24it/s]
episode 1313 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.186: : 99it [00:02, 41.18it/s]
episode 1314 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.186: : 124it [00:03, 40.05it/s]
episode 1315 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.186: : 137it [00:03, 41.31it/s]
episode 1316 avg_loss: 0.002 total_reward [train:0.000 test:-] e-greedy:0.186: : 126it [00:03, 42.00it/s]
episode 1317 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.186: : 134it [00:03, 40.83it/s]
episode 1318 avg_loss: 0.002 total_reward [train:2.000 test:-] e-greedy:0.187: : 189it [00:04, 40.21it/s]
episode 1319 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.187: : 132it [00:03, 41.15it/s]
episode 1320 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.187: : 231it [00:05, 40.62it/s]
episode 1321 avg_loss: 0.003 total_reward [trai

episode 1389 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.198: : 164it [00:04, 39.74it/s]
episode 1390 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.198: : 138it [00:03, 35.96it/s]
episode 1391 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.198: : 198it [00:05, 40.11it/s]
episode 1392 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.198: : 204it [00:05, 39.21it/s]
episode 1393 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.198: : 121it [00:03, 39.60it/s]
episode 1394 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.198: : 101it [00:02, 39.09it/s]
episode 1395 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.199: : 159it [00:03, 40.63it/s]
episode 1396 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.199: : 169it [00:04, 40.19it/s]
episode 1397 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.199: : 315it [00:07, 40.52it/s]
episode 1398 avg_loss: 0.003 total_reward [tra

episode 1466 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.211: : 151it [00:03, 40.84it/s]
episode 1467 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.211: : 151it [00:03, 40.34it/s]
episode 1468 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.211: : 238it [00:05, 40.36it/s]
episode 1469 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.211: : 166it [00:04, 40.37it/s]
episode 1470 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.211: : 158it [00:03, 40.32it/s]
episode 1471 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.211: : 237it [00:06, 39.08it/s]
episode 1472 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.212: : 230it [00:05, 38.99it/s]
episode 1473 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.212: : 203it [00:05, 38.96it/s]
episode 1474 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.212: : 244it [00:06, 39.89it/s]
episode 1475 avg_loss: 0.003 total_reward [tra

episode 1543 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.224: : 139it [00:03, 47.06it/s]
episode 1544 avg_loss: 0.003 total_reward [train:0.000 test:-] e-greedy:0.224: : 100it [00:02, 39.14it/s]
episode 1545 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.224: : 174it [00:04, 38.76it/s]
episode 1546 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.224: : 226it [00:05, 38.68it/s]
episode 1547 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.224: : 164it [00:04, 36.31it/s]
episode 1548 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.225: : 210it [00:05, 35.59it/s]
episode 1549 avg_loss: 0.004 total_reward [train:2.000 test:-] e-greedy:0.225: : 192it [00:05, 37.92it/s]
episode 1550 avg_loss: 0.003 total_reward [train:0.000 test:6.000] e-greedy:0.225: : 109it [00:03, 40.06it/s]
episode 1551 avg_loss: 0.005 total_reward [train:3.000 test:-] e-greedy:0.225: : 243it [00:06, 36.08it/s]
episode 1552 avg_loss: 0.004 total_reward 

episode 1620 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.238: : 194it [00:04, 39.73it/s]
episode 1621 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.238: : 181it [00:04, 39.20it/s]
episode 1622 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.238: : 195it [00:04, 40.15it/s]
episode 1623 avg_loss: 0.003 total_reward [train:5.000 test:-] e-greedy:0.238: : 299it [00:07, 39.43it/s]
episode 1624 avg_loss: 0.004 total_reward [train:2.000 test:-] e-greedy:0.238: : 206it [00:05, 39.03it/s]
episode 1625 avg_loss: 0.003 total_reward [train:2.000 test:-] e-greedy:0.239: : 191it [00:04, 39.73it/s]
episode 1626 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.239: : 241it [00:06, 39.91it/s]
episode 1627 avg_loss: 0.003 total_reward [train:4.000 test:-] e-greedy:0.239: : 284it [00:07, 39.70it/s]
episode 1628 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.239: : 172it [00:04, 40.46it/s]
episode 1629 avg_loss: 0.003 total_reward [tra

episode 1697 avg_loss: 0.003 total_reward [train:1.000 test:-] e-greedy:0.251: : 141it [00:03, 39.70it/s]
episode 1698 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.252: : 244it [00:06, 40.09it/s]
episode 1699 avg_loss: 0.003 total_reward [train:7.000 test:-] e-greedy:0.252: : 464it [00:11, 39.88it/s]
episode 1700 avg_loss: 0.005 total_reward [train:0.000 test:7.000] e-greedy:0.252: : 117it [00:04, 26.13it/s]
episode 1701 avg_loss: 0.005 total_reward [train:0.000 test:-] e-greedy:0.252: : 129it [00:03, 38.38it/s]
episode 1702 avg_loss: 0.005 total_reward [train:7.000 test:-] e-greedy:0.253: : 377it [00:09, 40.05it/s]
episode 1703 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.253: : 229it [00:05, 39.52it/s]
episode 1704 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.253: : 373it [00:09, 39.53it/s]
episode 1705 avg_loss: 0.004 total_reward [train:7.000 test:-] e-greedy:0.253: : 419it [00:10, 40.36it/s]
episode 1706 avg_loss: 0.004 total_reward 

episode 1774 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.265: : 237it [00:06, 38.92it/s]
episode 1775 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.265: : 247it [00:06, 38.37it/s]
episode 1776 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.265: : 113it [00:02, 39.69it/s]
episode 1777 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.265: : 153it [00:03, 39.10it/s]
episode 1778 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.265: : 225it [00:05, 38.94it/s]
episode 1779 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.266: : 109it [00:02, 40.73it/s]
episode 1780 avg_loss: 0.004 total_reward [train:3.000 test:-] e-greedy:0.266: : 223it [00:05, 39.41it/s]
episode 1781 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.266: : 161it [00:04, 38.86it/s]
episode 1782 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.266: : 136it [00:03, 38.55it/s]
episode 1783 avg_loss: 0.003 total_reward [tra

episode 1851 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.278: : 150it [00:03, 38.69it/s]
episode 1852 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.278: : 251it [00:06, 38.50it/s]
episode 1853 avg_loss: 0.004 total_reward [train:4.000 test:-] e-greedy:0.278: : 231it [00:06, 34.57it/s]
episode 1854 avg_loss: 0.003 total_reward [train:3.000 test:-] e-greedy:0.279: : 235it [00:06, 38.84it/s]
episode 1855 avg_loss: 0.004 total_reward [train:0.000 test:-] e-greedy:0.279: : 101it [00:02, 38.00it/s]
episode 1856 avg_loss: 0.004 total_reward [train:1.000 test:-] e-greedy:0.279: : 185it [00:04, 39.57it/s]
episode 1857 avg_loss: 0.009 total_reward [train:2.000 test:-] e-greedy:0.279: : 204it [00:05, 37.01it/s]
episode 1858 avg_loss: 0.009 total_reward [train:2.000 test:-] e-greedy:0.279: : 190it [00:04, 38.37it/s]
episode 1859 avg_loss: 0.009 total_reward [train:1.000 test:-] e-greedy:0.279: : 139it [00:03, 37.64it/s]
episode 1860 avg_loss: 0.009 total_reward [tra

In [None]:
q_network.save("dqn_exp5.h5")
# model = DQN(custom_env, q_network)

In [None]:
model.test(render=True)

In [None]:
import time
start_t = time.time()
a = np.random.permutation(int(1e1))
print(time.time()-start_t)

# 