In [1]:
import gym
import gym_Aircraft
from gym_Aircraft.envs.Aircraft_env import (
    NOT_DONE,            # 0
    CRASHED,             # 1
    AVOIDED_TIMEOUT,     # 2
    AVOIDED_IN_ADVANCE,  # 3
)

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils as torch_utils

import numpy as np
import matplotlib.pyplot as plt

from collections import Counter
import random

In [2]:
env = gym.make("acav-v0")
# env = gym.make("CartPole-v1")
# env.action_space.seed(960501) 



In [3]:
dim_obs = env.observation_space.shape[0]
dim_act = env.action_space.n
max_episode = 5000
max_replay = 50000
batch_size = 256 
gamma = 1.  # ㅋㅋㅋ .9에서 1로 바꾸니까 갑자기 잘됨
eps = 1.  # 얘도 1로 시작해야 되네
eps_decay = .99
eps_decay_step = 10
learning_rate = 1e-7  # 학습하다가 발산하면 얘부터 만져봐야 됨
max_grad_norm = 10

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class DQN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(dim_obs, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, dim_act),
            nn.LeakyReLU(.5),  # 와 이거 혁신이네
        )
        
    def forward(self, *input):
        return self.model(*input).squeeze()

In [5]:
net = DQN().to(device)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

memory = []
record = {'score': [], 'average loss': [], 'actions': []}

global_step = 0
train_step = 0
for episode in range(max_episode):
    s0 = env.reset()
    done = False
    net_reward = 0
    episode_loss = 0
    episode_norm = 0
    episode_step = 0
    episode_actions = []
    
    while not done:
        if train_step % eps_decay_step == eps_decay_step - 1:
            eps *= eps_decay
            eps = max(.01, eps)
            
        if random.random() > eps:
            a = np.argmax(net(torch.from_numpy(s0).float().to(device)).detach().cpu())
            a = int(a) 
        else:
            a = env.action_space.sample()
        
        # 의도적인 탐험
#         if episode < 50: a = 1
#         elif episode < 100: a = 2
#         elif episode < 150:
#             a = 1 if episode_step < 50 else 0
#         elif episode < 200:
#             a = 2 if episode_step < 70 else 0
            
        s1, reward, done, _ = env.step(a)
        s0 = s1
        net_reward += reward
        if not done: 
            episode_actions.append(a)
        
        memory.append([s0, a, reward, s1, done])
        if len(memory) > max_replay:
            del memory[0]
        
        if global_step == max_replay - 1:
            print('Training now begins')
        
# https://stackoverflow.com/questions/52770780/why-is-my-deep-q-net-and-double-deep-q-net-unstable
        if len(memory) == max_replay and global_step % 2 == 0:  # soft update
            y_batch = np.zeros(batch_size)
            s0_batch = np.zeros((batch_size, dim_obs))
            for j, sample in enumerate(random.sample(memory, batch_size)):
                mem_s0, mem_a, mem_reward, mem_s1, is_terminal = sample
                s0_batch[j] = mem_s0
                if is_terminal is NOT_DONE:
                    # y_j = r_j + gamma * max_a' Q(s_j+1, a')
                    y_batch[j] = mem_reward + gamma * torch.max(net(torch.from_numpy(mem_s1).float().to(device)).detach()).cpu().numpy()
                else:
                    y_batch[j] = mem_reward
            # optimize Q w.r.t ||max_a Q(s, a) - y||
            loss = criterion(
                torch.max(net(torch.from_numpy(s0_batch).float().to(device)), 1)[0],
                torch.from_numpy(y_batch).float().to(device)
            )
            loss.backward()
            episode_norm += torch_utils.clip_grad_norm_(net.parameters(), max_grad_norm)
            optimizer.step()
            train_step += 1
            episode_loss += loss
        episode_step += 1
        global_step += 1
    
    # episode log
    # score가 아니라 얼마나 잘 깨는지 봐야 됨
    # score는 단순히 사람 편하자고 계산하는 수임
    print(f'Epi. {episode+1:4d} score: {net_reward:4.0f}', end=' ')
    if len(memory) == max_replay:
        average_episode_loss = episode_loss/episode_step
        record['score'].append(net_reward)
        record['average loss'].append(average_episode_loss)
        record['actions'].append(episode_actions)
        c = Counter(episode_actions)
        print(
            f'E[L]: {average_episode_loss:6.1f} '
            f'e: {eps:.4f} norm: {episode_norm/episode_step:.0f} '
            f'actions: {c[0]:3d} {c[1]:3d} {c[2]:3d} {done}'
        )
    else:
        print()

Epi.    1 score:  -13 
Epi.    2 score:   -7 
Epi.    3 score:   -9 
Epi.    4 score:  -13 
Epi.    5 score:   -8 
Epi.    6 score:  -23 
Epi.    7 score:   -8 
Epi.    8 score:  -18 
Epi.    9 score:   -8 
Epi.   10 score:  -15 
Epi.   11 score:  -21 
Epi.   12 score:  -15 
Epi.   13 score:  -12 
Epi.   14 score:  -16 
Epi.   15 score:   -9 
Epi.   16 score:  -21 
Epi.   17 score:  -17 
Epi.   18 score:  -10 
Epi.   19 score:  -18 
Epi.   20 score:  -33 
Epi.   21 score:  -25 
Epi.   22 score:  -19 
Epi.   23 score:  -10 
Epi.   24 score:  -12 
Epi.   25 score:  -24 
Epi.   26 score:  -11 
Epi.   27 score:  -12 
Epi.   28 score:  -15 
Epi.   29 score:  -12 
Epi.   30 score:  -11 
Epi.   31 score:  -32 
Epi.   32 score:  -11 
Epi.   33 score:   -9 
Epi.   34 score:  -12 
Epi.   35 score:  -12 
Epi.   36 score:  -13 
Epi.   37 score:  -16 
Epi.   38 score:   -7 
Epi.   39 score:  -18 
Epi.   40 score:  -10 
Epi.   41 score:  -18 
Epi.   42 score:  -13 
Epi.   43 score:  -12 
Epi.   44 s

Epi.  362 score:  -20 
Epi.  363 score:  -24 
Epi.  364 score:  -10 
Epi.  365 score:  -11 
Epi.  366 score:   -9 
Epi.  367 score:  -12 
Epi.  368 score:  -13 
Epi.  369 score:  -11 
Epi.  370 score:  -24 
Epi.  371 score:  -24 
Epi.  372 score:  -16 
Epi.  373 score:   -9 
Epi.  374 score:   -7 
Epi.  375 score:  -28 
Epi.  376 score:   -7 
Epi.  377 score:  -10 
Epi.  378 score:   -7 
Epi.  379 score:   -9 
Epi.  380 score:   -9 
Epi.  381 score:  -12 
Epi.  382 score:   -8 
Epi.  383 score:  -15 
Epi.  384 score:  -10 
Epi.  385 score:  -20 
Epi.  386 score:  -26 
Epi.  387 score:  -13 
Epi.  388 score:   -8 
Epi.  389 score:   -8 
Epi.  390 score:  -15 
Epi.  391 score:  -28 
Epi.  392 score:  -23 
Epi.  393 score:   -7 
Epi.  394 score:  -19 
Epi.  395 score:   -9 
Epi.  396 score:  -16 
Epi.  397 score:  -32 
Epi.  398 score:  -12 
Epi.  399 score:  -28 
Epi.  400 score:  -23 
Epi.  401 score:  -16 
Epi.  402 score:   -9 
Epi.  403 score:  -22 
Epi.  404 score:   -7 
Epi.  405 s

Epi.  568 score: -519 E[L]:   30.9 e: 0.0100 norm: 3550 actions:   1 233   1 3
Epi.  569 score: -488 E[L]:   41.4 e: 0.0100 norm: 4129 actions:   0 227   1 3
Epi.  570 score: -493 E[L]:   38.1 e: 0.0100 norm: 4061 actions:   0 228   1 3
Epi.  571 score: -472 E[L]:   29.8 e: 0.0100 norm: 3678 actions:   0 221   4 3
Epi.  572 score: -479 E[L]:   38.4 e: 0.0100 norm: 4236 actions:   0 225   1 3
Epi.  573 score: -472 E[L]:   37.6 e: 0.0100 norm: 4218 actions:   1 221   3 3
Epi.  574 score: -479 E[L]:   37.3 e: 0.0100 norm: 4255 actions:   0 225   1 3
Epi.  575 score: -500 E[L]:   35.5 e: 0.0100 norm: 4184 actions:   0 229   2 3
Epi.  576 score: -506 E[L]:   33.4 e: 0.0100 norm: 4201 actions:   1 229   2 3
Epi.  577 score: -514 E[L]:   31.3 e: 0.0100 norm: 4063 actions:   3 231   1 3
Epi.  578 score: -529 E[L]:   27.6 e: 0.0100 norm: 3928 actions:   0 236   1 3
Epi.  579 score: -470 E[L]:   31.6 e: 0.0100 norm: 4246 actions:   1 220   4 3
Epi.  580 score: -480 E[L]:   30.0 e: 0.0100 norm: 4

Epi.  672 score: -232 E[L]:    1.4 e: 0.0100 norm: 1356 actions:   7  95 134 3
Epi.  673 score: -224 E[L]:    1.4 e: 0.0100 norm: 1291 actions:  11  92 124 3
Epi.  674 score: -227 E[L]:    1.4 e: 0.0100 norm: 1260 actions:   7  95 131 3
Epi.  675 score: -214 E[L]:    1.4 e: 0.0100 norm: 1241 actions:   8  95 128 3
Epi.  676 score: -223 E[L]:    1.4 e: 0.0100 norm: 1202 actions:   9  94 127 3
Epi.  677 score: -201 E[L]:    1.5 e: 0.0100 norm: 1212 actions:   9  98 123 3
Epi.  678 score: -206 E[L]:    1.5 e: 0.0100 norm: 1180 actions:   8  96 127 3
Epi.  679 score: -205 E[L]:    1.5 e: 0.0100 norm: 1156 actions:  11  95 121 3
Epi.  680 score: -220 E[L]:    1.6 e: 0.0100 norm: 1162 actions:   9  95 124 3
Epi.  681 score: -221 E[L]:    1.6 e: 0.0100 norm: 1148 actions:   9  94 128 3
Epi.  682 score: -216 E[L]:    1.7 e: 0.0100 norm: 1157 actions:   8  95 130 3
Epi.  683 score: -207 E[L]:    1.7 e: 0.0100 norm: 1123 actions:   8  94 131 3
Epi.  684 score: -211 E[L]:    1.7 e: 0.0100 norm: 1

Epi.  777 score: -218 E[L]:   14.8 e: 0.0100 norm: 927 actions:  18  59 161 3
Epi.  778 score: -249 E[L]:   13.0 e: 0.0100 norm: 928 actions:  19  51 161 3
Epi.  779 score: -238 E[L]:   14.0 e: 0.0100 norm: 1019 actions:  19  52 155 3
Epi.  780 score: -236 E[L]:   12.9 e: 0.0100 norm: 954 actions:  17  53 164 3
Epi.  781 score: -234 E[L]:   12.9 e: 0.0100 norm: 931 actions:  17  52 157 3
Epi.  782 score: -233 E[L]:   14.6 e: 0.0100 norm: 904 actions:  17  51 159 3
Epi.  783 score: -248 E[L]:   13.9 e: 0.0100 norm: 1029 actions:  16  50 167 3
Epi.  784 score: -239 E[L]:   13.8 e: 0.0100 norm: 862 actions:  20  49 159 3
Epi.  785 score: -234 E[L]:   13.9 e: 0.0100 norm: 882 actions:  18  54 166 3
Epi.  786 score: -239 E[L]:   13.5 e: 0.0100 norm: 1010 actions:  17  50 165 3
Epi.  787 score: -219 E[L]:   12.6 e: 0.0100 norm: 956 actions:  17  55 161 3
Epi.  788 score: -216 E[L]:   11.3 e: 0.0100 norm: 1024 actions:  18  53 159 3
Epi.  789 score: -213 E[L]:   12.8 e: 0.0100 norm: 886 actio

Epi.  882 score: -266 E[L]:   18.5 e: 0.0100 norm: 1096 actions:  18  30 180 3
Epi.  883 score: -298 E[L]:   19.6 e: 0.0100 norm: 1074 actions:  19  25 182 3
Epi.  884 score: -301 E[L]:   19.3 e: 0.0100 norm: 1087 actions:  16  29 189 3
Epi.  885 score: -265 E[L]:   18.9 e: 0.0100 norm: 1107 actions:  18  29 179 3
Epi.  886 score: -292 E[L]:   19.1 e: 0.0100 norm: 1126 actions:  16  26 184 3
Epi.  887 score: -287 E[L]:   20.1 e: 0.0100 norm: 1207 actions:  18  28 186 3
Epi.  888 score: -271 E[L]:   20.8 e: 0.0100 norm: 1117 actions:  16  30 184 3
Epi.  889 score: -281 E[L]:   18.5 e: 0.0100 norm: 1058 actions:  16  27 183 3
Epi.  890 score: -305 E[L]:   18.5 e: 0.0100 norm: 1046 actions:  17  25 186 3
Epi.  891 score: -273 E[L]:   20.4 e: 0.0100 norm: 1200 actions:  19  31 179 3
Epi.  892 score: -283 E[L]:   19.0 e: 0.0100 norm: 1179 actions:  16  31 187 3
Epi.  893 score: -286 E[L]:   21.6 e: 0.0100 norm: 1271 actions:  16  29 189 3
Epi.  894 score: -266 E[L]:   17.3 e: 0.0100 norm: 1

Epi.  986 score: -259 E[L]:   30.4 e: 0.0100 norm: 1470 actions:  18  33 175 3
Epi.  987 score: -241 E[L]:   30.3 e: 0.0100 norm: 1397 actions:  16  37 173 3
Epi.  988 score: -252 E[L]:   29.4 e: 0.0100 norm: 1444 actions:  18  34 174 3
Epi.  989 score: -272 E[L]:   29.1 e: 0.0100 norm: 1330 actions:  19  34 181 3
Epi.  990 score: -246 E[L]:   29.3 e: 0.0100 norm: 1436 actions:  16  39 174 3
Epi.  991 score: -248 E[L]:   30.1 e: 0.0100 norm: 1467 actions:  16  36 174 3
Epi.  992 score: -250 E[L]:   33.8 e: 0.0100 norm: 1424 actions:  18  36 173 3
Epi.  993 score: -262 E[L]:   31.2 e: 0.0100 norm: 1520 actions:  19  34 178 3
Epi.  994 score: -257 E[L]:   32.5 e: 0.0100 norm: 1526 actions:  19  33 174 3
Epi.  995 score: -253 E[L]:   31.4 e: 0.0100 norm: 1535 actions:  18  34 174 3
Epi.  996 score: -253 E[L]:   29.0 e: 0.0100 norm: 1395 actions:  18  38 177 3
Epi.  997 score: -268 E[L]:   31.8 e: 0.0100 norm: 1416 actions:  17  36 183 3
Epi.  998 score: -247 E[L]:   32.0 e: 0.0100 norm: 1

Epi. 1090 score: -322 E[L]:   39.3 e: 0.0100 norm: 1672 actions:  13  29 195 3
Epi. 1091 score: -318 E[L]:   40.0 e: 0.0100 norm: 1608 actions:  15  25 188 3
Epi. 1092 score: -317 E[L]:   38.3 e: 0.0100 norm: 1481 actions:  13  28 192 3
Epi. 1093 score: -324 E[L]:   42.1 e: 0.0100 norm: 1717 actions:  18  28 192 3
Epi. 1094 score: -320 E[L]:   40.8 e: 0.0100 norm: 1666 actions:  15  25 190 3
Epi. 1095 score: -321 E[L]:   37.9 e: 0.0100 norm: 1521 actions:  15  27 191 3
Epi. 1096 score: -303 E[L]:   39.4 e: 0.0100 norm: 1633 actions:  15  25 185 3
Epi. 1097 score: -324 E[L]:   44.5 e: 0.0100 norm: 1815 actions:  14  27 193 3
Epi. 1098 score: -327 E[L]:   33.0 e: 0.0100 norm: 1573 actions:  13  28 195 3
Epi. 1099 score: -318 E[L]:   38.6 e: 0.0100 norm: 1556 actions:  15  27 192 3
Epi. 1100 score: -288 E[L]:   44.3 e: 0.0100 norm: 1925 actions:  15  28 183 3
Epi. 1101 score: -319 E[L]:   37.1 e: 0.0100 norm: 1502 actions:  13  27 193 3
Epi. 1102 score: -315 E[L]:   34.8 e: 0.0100 norm: 1

Epi. 1194 score: -290 E[L]:   38.6 e: 0.0100 norm: 1680 actions:  15  28 184 3
Epi. 1195 score: -306 E[L]:   42.5 e: 0.0100 norm: 1867 actions:  14  26 188 3
Epi. 1196 score: -284 E[L]:   32.3 e: 0.0100 norm: 1728 actions:  13  34 185 3
Epi. 1197 score: -277 E[L]:   41.6 e: 0.0100 norm: 1854 actions:  15  31 180 3
Epi. 1198 score: -293 E[L]:   50.1 e: 0.0100 norm: 1763 actions:  14  31 187 3
Epi. 1199 score: -284 E[L]:   49.6 e: 0.0100 norm: 1986 actions:  16  30 180 3
Epi. 1200 score: -306 E[L]:   42.9 e: 0.0100 norm: 1763 actions:  16  28 190 3
Epi. 1201 score: -297 E[L]:   44.6 e: 0.0100 norm: 1817 actions:  13  31 189 3
Epi. 1202 score: -277 E[L]:   48.8 e: 0.0100 norm: 1737 actions:  17  30 179 3
Epi. 1203 score: -310 E[L]:   51.1 e: 0.0100 norm: 1972 actions:  15  27 185 3
Epi. 1204 score: -285 E[L]:   43.2 e: 0.0100 norm: 1806 actions:  15  31 182 3
Epi. 1205 score: -280 E[L]:   42.2 e: 0.0100 norm: 1628 actions:  18  30 178 3
Epi. 1206 score: -288 E[L]:   49.9 e: 0.0100 norm: 1

Epi. 1298 score: -313 E[L]:   55.9 e: 0.0100 norm: 1981 actions:  12  26 188 3
Epi. 1299 score: -343 E[L]:   57.4 e: 0.0100 norm: 1933 actions:  13  25 199 3
Epi. 1300 score: -319 E[L]:   52.4 e: 0.0100 norm: 1847 actions:  14  26 191 3
Epi. 1301 score: -304 E[L]:   46.6 e: 0.0100 norm: 1824 actions:  12  28 192 3
Epi. 1302 score: -325 E[L]:   56.0 e: 0.0100 norm: 2083 actions:  12  29 197 3
Epi. 1303 score: -303 E[L]:   54.7 e: 0.0100 norm: 1904 actions:  17  25 184 3
Epi. 1304 score: -304 E[L]:   51.5 e: 0.0100 norm: 1794 actions:  15  25 187 3
Epi. 1305 score: -326 E[L]:   57.2 e: 0.0100 norm: 2057 actions:  11  29 197 3
Epi. 1306 score: -323 E[L]:   73.3 e: 0.0100 norm: 2362 actions:  15  23 189 3
Epi. 1307 score: -314 E[L]:   50.8 e: 0.0100 norm: 1827 actions:  13  24 189 3
Epi. 1308 score: -306 E[L]:   56.7 e: 0.0100 norm: 1904 actions:  14  26 187 3
Epi. 1309 score: -307 E[L]:   50.7 e: 0.0100 norm: 1916 actions:  11  27 189 3
Epi. 1310 score: -294 E[L]:   51.8 e: 0.0100 norm: 2

Epi. 1402 score: -316 E[L]:   52.8 e: 0.0100 norm: 2036 actions:  14  23 188 3
Epi. 1403 score: -332 E[L]:   52.7 e: 0.0100 norm: 1872 actions:  15  26 193 3
Epi. 1404 score: -316 E[L]:   53.4 e: 0.0100 norm: 1884 actions:  10  25 191 3
Epi. 1405 score: -329 E[L]:   64.3 e: 0.0100 norm: 2072 actions:  14  22 194 3
Epi. 1406 score: -320 E[L]:   56.7 e: 0.0100 norm: 2001 actions:  16  20 190 3
Epi. 1407 score: -322 E[L]:   54.7 e: 0.0100 norm: 1873 actions:  11  24 194 3
Epi. 1408 score: -318 E[L]:   52.8 e: 0.0100 norm: 2235 actions:  12  25 191 3
Epi. 1409 score: -339 E[L]:   55.1 e: 0.0100 norm: 2022 actions:  12  24 199 3
Epi. 1410 score: -346 E[L]:   68.4 e: 0.0100 norm: 2118 actions:  12  23 200 3
Epi. 1411 score: -323 E[L]:   64.5 e: 0.0100 norm: 2314 actions:  14  22 190 3
Epi. 1412 score: -319 E[L]:   61.3 e: 0.0100 norm: 2297 actions:  14  22 192 3
Epi. 1413 score: -328 E[L]:   59.9 e: 0.0100 norm: 1820 actions:  12  23 195 3
Epi. 1414 score: -322 E[L]:   57.3 e: 0.0100 norm: 2

Epi. 1506 score: -327 E[L]:   74.2 e: 0.0100 norm: 2556 actions:  13  22 193 3
Epi. 1507 score: -333 E[L]:   68.6 e: 0.0100 norm: 2147 actions:  13  20 195 3
Epi. 1508 score: -334 E[L]:   66.1 e: 0.0100 norm: 2391 actions:  12  20 196 3
Epi. 1509 score: -323 E[L]:   58.3 e: 0.0100 norm: 2300 actions:  11  21 194 3
Epi. 1510 score: -322 E[L]:   67.6 e: 0.0100 norm: 2262 actions:  12  21 193 3
Epi. 1511 score: -327 E[L]:   60.3 e: 0.0100 norm: 1884 actions:  10  21 196 3
Epi. 1512 score: -333 E[L]:   69.4 e: 0.0100 norm: 2177 actions:  13  19 195 3
Epi. 1513 score: -354 E[L]:   64.3 e: 0.0100 norm: 2218 actions:   9  22 205 3
Epi. 1514 score: -316 E[L]:   62.8 e: 0.0100 norm: 2163 actions:  10  22 194 3
Epi. 1515 score: -359 E[L]:   66.8 e: 0.0100 norm: 2330 actions:  10  22 205 3
Epi. 1516 score: -326 E[L]:   70.7 e: 0.0100 norm: 2310 actions:  13  22 192 3
Epi. 1517 score: -334 E[L]:   60.1 e: 0.0100 norm: 2219 actions:  12  21 196 3
Epi. 1518 score: -328 E[L]:   62.7 e: 0.0100 norm: 2

Epi. 1610 score: -333 E[L]:   63.8 e: 0.0100 norm: 2213 actions:  12  18 196 3
Epi. 1611 score: -367 E[L]:   74.8 e: 0.0100 norm: 2298 actions:   9  21 207 3
Epi. 1612 score: -340 E[L]:   68.6 e: 0.0100 norm: 2205 actions:  12  20 196 3
Epi. 1613 score: -335 E[L]:   72.0 e: 0.0100 norm: 2245 actions:  13  18 195 3
Epi. 1614 score: -356 E[L]:   66.3 e: 0.0100 norm: 2352 actions:  12  19 204 3
Epi. 1615 score: -341 E[L]:   74.0 e: 0.0100 norm: 2676 actions:  11  18 198 3
Epi. 1616 score: -337 E[L]:   69.4 e: 0.0100 norm: 2276 actions:  10  19 198 3
Epi. 1617 score: -334 E[L]:   71.6 e: 0.0100 norm: 2357 actions:  10  20 197 3
Epi. 1618 score: -338 E[L]:   66.9 e: 0.0100 norm: 2129 actions:  13  17 196 3
Epi. 1619 score: -342 E[L]:   76.3 e: 0.0100 norm: 2377 actions:  16  17 198 3
Epi. 1620 score: -356 E[L]:   61.9 e: 0.0100 norm: 2004 actions:  12  21 202 3
Epi. 1621 score: -339 E[L]:   68.7 e: 0.0100 norm: 2330 actions:  10  22 198 3
Epi. 1622 score: -322 E[L]:   72.3 e: 0.0100 norm: 2

Epi. 1714 score: -349 E[L]:   73.5 e: 0.0100 norm: 2283 actions:  13  20 200 3
Epi. 1715 score: -349 E[L]:   75.8 e: 0.0100 norm: 2285 actions:  11  17 201 3
Epi. 1716 score: -340 E[L]:   76.7 e: 0.0100 norm: 2492 actions:  11  18 198 3
Epi. 1717 score: -350 E[L]:   81.0 e: 0.0100 norm: 2564 actions:  12  15 199 3
Epi. 1718 score: -348 E[L]:   81.9 e: 0.0100 norm: 2528 actions:  11  19 200 3
Epi. 1719 score: -351 E[L]:   73.4 e: 0.0100 norm: 2398 actions:  11  19 200 3
Epi. 1720 score: -352 E[L]:   63.2 e: 0.0100 norm: 2218 actions:  11  17 200 3
Epi. 1721 score: -357 E[L]:   67.8 e: 0.0100 norm: 2040 actions:  14  17 201 3
Epi. 1722 score: -364 E[L]:   75.4 e: 0.0100 norm: 2456 actions:  13  18 204 3
Epi. 1723 score: -337 E[L]:   67.2 e: 0.0100 norm: 2490 actions:  13  18 196 3
Epi. 1724 score: -368 E[L]:   78.8 e: 0.0100 norm: 2487 actions:  11  19 206 3
Epi. 1725 score: -368 E[L]:   81.4 e: 0.0100 norm: 2422 actions:  12  18 204 3
Epi. 1726 score: -344 E[L]:   69.5 e: 0.0100 norm: 2

Epi. 1818 score: -379 E[L]:   76.5 e: 0.0100 norm: 2238 actions:  10  18 210 3
Epi. 1819 score: -354 E[L]:   83.0 e: 0.0100 norm: 2470 actions:  13  16 199 3
Epi. 1820 score: -327 E[L]:   76.4 e: 0.0100 norm: 2532 actions:  12  19 195 3
Epi. 1821 score: -351 E[L]:   78.4 e: 0.0100 norm: 2315 actions:  13  15 199 3
Epi. 1822 score: -355 E[L]:   80.9 e: 0.0100 norm: 2524 actions:  11  17 201 3
Epi. 1823 score: -371 E[L]:   81.6 e: 0.0100 norm: 2659 actions:  12  17 206 3
Epi. 1824 score: -365 E[L]:   94.3 e: 0.0100 norm: 2772 actions:  12  15 203 3
Epi. 1825 score: -371 E[L]:   81.4 e: 0.0100 norm: 2751 actions:  12  16 206 3
Epi. 1826 score: -358 E[L]:   82.4 e: 0.0100 norm: 2262 actions:  11  16 202 3
Epi. 1827 score: -392 E[L]:   82.8 e: 0.0100 norm: 2490 actions:  11  18 207 3
Epi. 1828 score: -366 E[L]:   75.5 e: 0.0100 norm: 2263 actions:  14  18 203 3
Epi. 1829 score: -364 E[L]:   81.8 e: 0.0100 norm: 2511 actions:  13  16 203 3
Epi. 1830 score: -365 E[L]:   70.7 e: 0.0100 norm: 2

Epi. 1922 score: -341 E[L]:   78.4 e: 0.0100 norm: 2529 actions:   7  20 199 3
Epi. 1923 score: -365 E[L]:   85.0 e: 0.0100 norm: 2671 actions:  13  17 201 3
Epi. 1924 score: -385 E[L]:   88.4 e: 0.0100 norm: 2852 actions:  11  17 209 3
Epi. 1925 score: -362 E[L]:   84.2 e: 0.0100 norm: 2566 actions:  12  14 200 3
Epi. 1926 score: -367 E[L]:   74.7 e: 0.0100 norm: 2389 actions:  12  18 202 3
Epi. 1927 score: -392 E[L]:   75.9 e: 0.0100 norm: 2288 actions:  10  15 211 3
Epi. 1928 score: -361 E[L]:   90.9 e: 0.0100 norm: 2543 actions:  11  14 201 3
Epi. 1929 score: -388 E[L]:   83.6 e: 0.0100 norm: 2448 actions:  11  13 206 3
Epi. 1930 score: -385 E[L]:   87.0 e: 0.0100 norm: 2848 actions:  13  12 204 3
Epi. 1931 score: -358 E[L]:   84.8 e: 0.0100 norm: 2657 actions:  11  18 199 3
Epi. 1932 score: -345 E[L]:   80.2 e: 0.0100 norm: 2375 actions:  11  16 198 3
Epi. 1933 score: -388 E[L]:   90.2 e: 0.0100 norm: 2883 actions:  11  16 210 3
Epi. 1934 score: -356 E[L]:   72.4 e: 0.0100 norm: 2

Epi. 2026 score: -369 E[L]:  104.9 e: 0.0100 norm: 3009 actions:   9  13 205 3
Epi. 2027 score: -385 E[L]:   78.1 e: 0.0100 norm: 2424 actions:  12  16 208 3
Epi. 2028 score: -361 E[L]:   84.0 e: 0.0100 norm: 2511 actions:   9  13 203 3
Epi. 2029 score: -348 E[L]:   84.6 e: 0.0100 norm: 2397 actions:  11  17 198 3
Epi. 2030 score: -355 E[L]:   78.3 e: 0.0100 norm: 2619 actions:  12  16 201 3
Epi. 2031 score: -351 E[L]:   85.9 e: 0.0100 norm: 2294 actions:  12  17 199 3
Epi. 2032 score: -344 E[L]:   81.7 e: 0.0100 norm: 2471 actions:  10  17 198 3
Epi. 2033 score: -359 E[L]:   83.8 e: 0.0100 norm: 2493 actions:  10  15 200 3
Epi. 2034 score: -358 E[L]:   75.3 e: 0.0100 norm: 2702 actions:  11  15 200 3
Epi. 2035 score: -344 E[L]:   91.8 e: 0.0100 norm: 2704 actions:  11  16 198 3
Epi. 2036 score: -372 E[L]:   84.7 e: 0.0100 norm: 2397 actions:  11  16 206 3
Epi. 2037 score: -382 E[L]:   85.1 e: 0.0100 norm: 2901 actions:  11  17 208 3
Epi. 2038 score: -351 E[L]:   90.0 e: 0.0100 norm: 2

Epi. 2130 score: -376 E[L]:   87.7 e: 0.0100 norm: 2485 actions:  13  16 206 3
Epi. 2131 score: -349 E[L]:   91.6 e: 0.0100 norm: 2569 actions:  12  15 199 3
Epi. 2132 score: -355 E[L]:   83.2 e: 0.0100 norm: 2799 actions:  13  15 200 3
Epi. 2133 score: -371 E[L]:   72.0 e: 0.0100 norm: 2561 actions:  12  15 205 3
Epi. 2134 score: -349 E[L]:   90.3 e: 0.0100 norm: 2774 actions:  13  15 198 3
Epi. 2135 score: -349 E[L]:   83.6 e: 0.0100 norm: 2637 actions:  11  15 200 3
Epi. 2136 score: -355 E[L]:   88.7 e: 0.0100 norm: 2814 actions:  10  15 202 3
Epi. 2137 score: -385 E[L]:   95.7 e: 0.0100 norm: 2727 actions:  13  15 210 3
Epi. 2138 score: -368 E[L]:  110.9 e: 0.0100 norm: 3065 actions:  13  15 205 3
Epi. 2139 score: -385 E[L]:   85.2 e: 0.0100 norm: 2522 actions:  11  15 210 3
Epi. 2140 score: -387 E[L]:   79.4 e: 0.0100 norm: 2771 actions:  15  14 209 3
Epi. 2141 score: -363 E[L]:   85.9 e: 0.0100 norm: 2581 actions:  13  19 203 3
Epi. 2142 score: -361 E[L]:   85.9 e: 0.0100 norm: 2

Epi. 2234 score: -382 E[L]:   94.9 e: 0.0100 norm: 2787 actions:  11  15 209 3
Epi. 2235 score: -344 E[L]:   93.7 e: 0.0100 norm: 2908 actions:  13  17 196 3
Epi. 2236 score: -364 E[L]:   86.0 e: 0.0100 norm: 2667 actions:  10  16 200 3
Epi. 2237 score: -393 E[L]:   82.6 e: 0.0100 norm: 2582 actions:  11  16 211 3
Epi. 2238 score: -360 E[L]:   80.3 e: 0.0100 norm: 2888 actions:   8  17 202 3
Epi. 2239 score: -387 E[L]:   96.4 e: 0.0100 norm: 3059 actions:   8  18 211 3
Epi. 2240 score: -363 E[L]:   85.4 e: 0.0100 norm: 2963 actions:   8  16 205 3
Epi. 2241 score: -369 E[L]:   85.6 e: 0.0100 norm: 2761 actions:  11  16 202 3
Epi. 2242 score: -369 E[L]:   89.8 e: 0.0100 norm: 2805 actions:   9  16 205 3
Epi. 2243 score: -383 E[L]:   93.1 e: 0.0100 norm: 2949 actions:   9  19 210 3
Epi. 2244 score: -344 E[L]:   86.8 e: 0.0100 norm: 2944 actions:  12  17 197 3
Epi. 2245 score: -376 E[L]:   95.9 e: 0.0100 norm: 2886 actions:   9  18 206 3
Epi. 2246 score: -387 E[L]:   88.5 e: 0.0100 norm: 2

Epi. 2338 score: -384 E[L]:   81.2 e: 0.0100 norm: 2905 actions:  10  17 208 3
Epi. 2339 score: -366 E[L]:   85.8 e: 0.0100 norm: 2901 actions:  11  16 203 3
Epi. 2340 score: -387 E[L]:   92.4 e: 0.0100 norm: 2682 actions:  11  18 209 3
Epi. 2341 score: -387 E[L]:   80.6 e: 0.0100 norm: 2656 actions:  10  17 211 3
Epi. 2342 score: -358 E[L]:   91.1 e: 0.0100 norm: 3102 actions:   9  17 202 3
Epi. 2343 score: -354 E[L]:   98.9 e: 0.0100 norm: 2921 actions:  10  15 200 3
Epi. 2344 score: -363 E[L]:   82.6 e: 0.0100 norm: 2786 actions:  10  15 202 3
Epi. 2345 score: -376 E[L]:   95.7 e: 0.0100 norm: 2834 actions:   9  19 208 3
Epi. 2346 score: -380 E[L]:  105.9 e: 0.0100 norm: 3217 actions:  10  17 209 3
Epi. 2347 score: -365 E[L]:   81.5 e: 0.0100 norm: 2532 actions:   6  18 206 3
Epi. 2348 score: -366 E[L]:   98.0 e: 0.0100 norm: 3012 actions:  10  16 203 3
Epi. 2349 score: -384 E[L]:   89.4 e: 0.0100 norm: 2665 actions:  11  17 209 3
Epi. 2350 score: -365 E[L]:  102.3 e: 0.0100 norm: 3

Epi. 2442 score: -360 E[L]:   88.0 e: 0.0100 norm: 2582 actions:   7  18 204 3
Epi. 2443 score: -377 E[L]:   78.6 e: 0.0100 norm: 2539 actions:  10  17 207 3
Epi. 2444 score: -347 E[L]:   85.8 e: 0.0100 norm: 2842 actions:  11  17 198 3
Epi. 2445 score: -361 E[L]:   97.7 e: 0.0100 norm: 2977 actions:  11  15 201 3
Epi. 2446 score: -379 E[L]:  100.2 e: 0.0100 norm: 2770 actions:  10  18 208 3
Epi. 2447 score: -360 E[L]:  101.5 e: 0.0100 norm: 3161 actions:  10  15 201 3
Epi. 2448 score: -380 E[L]:   81.5 e: 0.0100 norm: 2610 actions:   8  20 208 3
Epi. 2449 score: -366 E[L]:  101.8 e: 0.0100 norm: 2752 actions:   9  16 204 3
Epi. 2450 score: -355 E[L]:   89.3 e: 0.0100 norm: 2707 actions:   8  15 203 3
Epi. 2451 score: -357 E[L]:   97.4 e: 0.0100 norm: 2733 actions:  11  15 199 3
Epi. 2452 score: -396 E[L]:  109.0 e: 0.0100 norm: 3219 actions:  10  18 210 3
Epi. 2453 score: -403 E[L]:   88.9 e: 0.0100 norm: 2578 actions:  11  13 209 3
Epi. 2454 score: -371 E[L]:  101.9 e: 0.0100 norm: 2

Epi. 2546 score: -407 E[L]:  103.8 e: 0.0100 norm: 2981 actions:  12  11 211 3
Epi. 2547 score: -375 E[L]:   90.9 e: 0.0100 norm: 2598 actions:  10  15 206 3
Epi. 2548 score: -402 E[L]:   96.6 e: 0.0100 norm: 2747 actions:  10  12 210 3
Epi. 2549 score: -393 E[L]:   87.2 e: 0.0100 norm: 2851 actions:   9  15 209 3
Epi. 2550 score: -374 E[L]:   90.8 e: 0.0100 norm: 2934 actions:  11  15 203 3
Epi. 2551 score: -366 E[L]:   92.9 e: 0.0100 norm: 2932 actions:  10  16 203 3
Epi. 2552 score: -383 E[L]:  111.2 e: 0.0100 norm: 2828 actions:   7  13 206 3
Epi. 2553 score: -385 E[L]:   67.7 e: 0.0100 norm: 2519 actions:  10  16 206 3
Epi. 2554 score: -381 E[L]:   76.4 e: 0.0100 norm: 2937 actions:   9  14 207 3
Epi. 2555 score: -358 E[L]:   98.7 e: 0.0100 norm: 2675 actions:   8  16 202 3
Epi. 2556 score: -395 E[L]:   86.0 e: 0.0100 norm: 2777 actions:   9  14 210 3
Epi. 2557 score: -394 E[L]:   88.3 e: 0.0100 norm: 2784 actions:  10  15 209 3
Epi. 2558 score: -397 E[L]:  112.9 e: 0.0100 norm: 3

Epi. 2650 score: -365 E[L]:   92.3 e: 0.0100 norm: 2820 actions:   8  14 205 3
Epi. 2651 score: -365 E[L]:   87.5 e: 0.0100 norm: 2816 actions:   8  16 204 3
Epi. 2652 score: -362 E[L]:   84.0 e: 0.0100 norm: 2902 actions:   8  15 203 3
Epi. 2653 score: -376 E[L]:  113.5 e: 0.0100 norm: 3230 actions:  13  14 204 3
Epi. 2654 score: -360 E[L]:   88.2 e: 0.0100 norm: 2655 actions:   8  15 202 3
Epi. 2655 score: -364 E[L]:   97.4 e: 0.0100 norm: 2807 actions:   8  14 204 3
Epi. 2656 score: -359 E[L]:   93.0 e: 0.0100 norm: 2841 actions:   6  16 204 3
Epi. 2657 score: -364 E[L]:   95.2 e: 0.0100 norm: 2794 actions:   6  16 204 3
Epi. 2658 score: -378 E[L]:   96.5 e: 0.0100 norm: 2734 actions:  11  14 205 3
Epi. 2659 score: -362 E[L]:   99.5 e: 0.0100 norm: 2930 actions:   8  16 202 3
Epi. 2660 score: -408 E[L]:   95.6 e: 0.0100 norm: 2669 actions:   8  16 213 3
Epi. 2661 score: -366 E[L]:   81.7 e: 0.0100 norm: 2591 actions:  10  14 202 3
Epi. 2662 score: -370 E[L]:  110.7 e: 0.0100 norm: 3

Epi. 2754 score: -367 E[L]:  101.8 e: 0.0100 norm: 3076 actions:  11  13 202 3
Epi. 2755 score: -364 E[L]:   87.5 e: 0.0100 norm: 2748 actions:   7  15 204 3
Epi. 2756 score: -365 E[L]:   94.7 e: 0.0100 norm: 2935 actions:  10  17 202 3
Epi. 2757 score: -361 E[L]:  100.8 e: 0.0100 norm: 3096 actions:   7  16 202 3
Epi. 2758 score: -381 E[L]:   96.5 e: 0.0100 norm: 2996 actions:   5  19 213 3
Epi. 2759 score: -365 E[L]:   91.3 e: 0.0100 norm: 2787 actions:   9  15 202 3
Epi. 2760 score: -368 E[L]:  101.2 e: 0.0100 norm: 3159 actions:   9  14 205 3
Epi. 2761 score: -365 E[L]:   90.1 e: 0.0100 norm: 3072 actions:   8  14 204 3
Epi. 2762 score: -371 E[L]:  111.4 e: 0.0100 norm: 3039 actions:  10  13 204 3
Epi. 2763 score: -360 E[L]:   95.9 e: 0.0100 norm: 2965 actions:   9  17 200 3
Epi. 2764 score: -371 E[L]:   91.7 e: 0.0100 norm: 2962 actions:  10  15 204 3
Epi. 2765 score: -368 E[L]:   89.3 e: 0.0100 norm: 2777 actions:   9  14 203 3
Epi. 2766 score: -368 E[L]:   95.5 e: 0.0100 norm: 3

Epi. 2858 score: -403 E[L]:  105.1 e: 0.0100 norm: 2937 actions:  11  11 207 3
Epi. 2859 score: -378 E[L]:   91.4 e: 0.0100 norm: 2765 actions:   6  14 208 3
Epi. 2860 score: -398 E[L]:  102.6 e: 0.0100 norm: 3059 actions:  10  12 209 3
Epi. 2861 score: -379 E[L]:   89.2 e: 0.0100 norm: 2680 actions:   9  12 205 3
Epi. 2862 score: -400 E[L]:   96.8 e: 0.0100 norm: 2936 actions:   8  14 211 3
Epi. 2863 score: -390 E[L]:  103.4 e: 0.0100 norm: 2945 actions:   8  17 211 3
Epi. 2864 score: -374 E[L]:   91.8 e: 0.0100 norm: 2738 actions:  10  12 203 3
Epi. 2865 score: -373 E[L]:   81.8 e: 0.0100 norm: 2886 actions:   5  19 209 3
Epi. 2866 score: -364 E[L]:   86.6 e: 0.0100 norm: 2939 actions:   7  15 204 3
Epi. 2867 score: -369 E[L]:  105.7 e: 0.0100 norm: 2999 actions:   8  18 204 3
Epi. 2868 score: -379 E[L]:   98.7 e: 0.0100 norm: 2930 actions:   9  16 206 3
Epi. 2869 score: -375 E[L]:  107.2 e: 0.0100 norm: 3036 actions:   7  16 207 3
Epi. 2870 score: -370 E[L]:   95.3 e: 0.0100 norm: 2

Epi. 2962 score: -368 E[L]:   89.3 e: 0.0100 norm: 3066 actions:   7  16 205 3
Epi. 2963 score: -368 E[L]:  102.3 e: 0.0100 norm: 2912 actions:   8  14 204 3
Epi. 2964 score: -370 E[L]:   89.6 e: 0.0100 norm: 2745 actions:   8  14 204 3
Epi. 2965 score: -371 E[L]:   91.5 e: 0.0100 norm: 3014 actions:   8  13 204 3
Epi. 2966 score: -377 E[L]:   98.1 e: 0.0100 norm: 2836 actions:   6  21 206 3
Epi. 2967 score: -370 E[L]:   93.6 e: 0.0100 norm: 3068 actions:   9  13 204 3
Epi. 2968 score: -359 E[L]:  101.3 e: 0.0100 norm: 3180 actions:   6  18 202 3
Epi. 2969 score: -365 E[L]:   99.0 e: 0.0100 norm: 2894 actions:   7  15 204 3
Epi. 2970 score: -367 E[L]:   88.8 e: 0.0100 norm: 2813 actions:  11  14 202 3
Epi. 2971 score: -370 E[L]:  108.5 e: 0.0100 norm: 3306 actions:   9  16 203 3
Epi. 2972 score: -369 E[L]:   92.2 e: 0.0100 norm: 3072 actions:   8  15 203 3
Epi. 2973 score: -367 E[L]:   98.8 e: 0.0100 norm: 3275 actions:   9  13 203 3
Epi. 2974 score: -360 E[L]:  102.7 e: 0.0100 norm: 3

Epi. 3066 score: -402 E[L]:  107.4 e: 0.0100 norm: 3222 actions:   8  14 215 3
Epi. 3067 score: -390 E[L]:  100.6 e: 0.0100 norm: 3260 actions:   7  14 208 3
Epi. 3068 score: -372 E[L]:  110.4 e: 0.0100 norm: 3115 actions:   8  13 206 3
Epi. 3069 score: -419 E[L]:   98.8 e: 0.0100 norm: 2930 actions:   7  10 214 3
Epi. 3070 score: -387 E[L]:  114.9 e: 0.0100 norm: 3415 actions:   9  10 206 3
Epi. 3071 score: -396 E[L]:   98.9 e: 0.0100 norm: 3225 actions:   9  11 210 3
Epi. 3072 score: -372 E[L]:  101.5 e: 0.0100 norm: 2696 actions:   6  16 206 3
Epi. 3073 score: -426 E[L]:  102.3 e: 0.0100 norm: 3089 actions:   7  11 216 3
Epi. 3074 score: -401 E[L]:   86.2 e: 0.0100 norm: 3016 actions:   6  14 215 3
Epi. 3075 score: -379 E[L]:   76.6 e: 0.0100 norm: 3017 actions:   6  16 207 3
Epi. 3076 score: -407 E[L]:   99.7 e: 0.0100 norm: 3043 actions:   6  15 216 3
Epi. 3077 score: -357 E[L]:   91.9 e: 0.0100 norm: 3242 actions:   7  17 201 3
Epi. 3078 score: -369 E[L]:   94.6 e: 0.0100 norm: 2

Epi. 3170 score: -385 E[L]:  115.9 e: 0.0100 norm: 3175 actions:  10  14 205 3
Epi. 3171 score: -383 E[L]:   97.3 e: 0.0100 norm: 3072 actions:   6  18 209 3
Epi. 3172 score: -376 E[L]:  108.9 e: 0.0100 norm: 3254 actions:   9  15 205 3
Epi. 3173 score: -356 E[L]:   93.6 e: 0.0100 norm: 3392 actions:   7  16 203 3
Epi. 3174 score: -370 E[L]:  114.1 e: 0.0100 norm: 3469 actions:   8  15 205 3
Epi. 3175 score: -389 E[L]:   95.5 e: 0.0100 norm: 3049 actions:  10  11 207 3
Epi. 3176 score: -372 E[L]:  108.5 e: 0.0100 norm: 3039 actions:   6  16 206 3
Epi. 3177 score: -385 E[L]:  105.4 e: 0.0100 norm: 3431 actions:  11  15 207 3
Epi. 3178 score: -386 E[L]:   88.2 e: 0.0100 norm: 3084 actions:   6  16 211 3
Epi. 3179 score: -377 E[L]:  106.5 e: 0.0100 norm: 3146 actions:   7  16 208 3
Epi. 3180 score: -379 E[L]:  101.8 e: 0.0100 norm: 3236 actions:  11  11 204 3
Epi. 3181 score: -387 E[L]:   88.6 e: 0.0100 norm: 3117 actions:  10  12 208 3
Epi. 3182 score: -383 E[L]:  105.1 e: 0.0100 norm: 3

Epi. 3274 score: -377 E[L]:  107.7 e: 0.0100 norm: 3210 actions:   9  13 206 3
Epi. 3275 score: -417 E[L]:   99.5 e: 0.0100 norm: 2858 actions:   9  11 216 3
Epi. 3276 score: -386 E[L]:   90.9 e: 0.0100 norm: 2787 actions:   5  16 213 3
Epi. 3277 score: -378 E[L]:   96.2 e: 0.0100 norm: 3064 actions:  10  12 204 3
Epi. 3278 score: -384 E[L]:  107.2 e: 0.0100 norm: 3376 actions:  10  12 204 3
Epi. 3279 score: -385 E[L]:  114.0 e: 0.0100 norm: 3606 actions:   9  11 206 3
Epi. 3280 score: -375 E[L]:   89.2 e: 0.0100 norm: 2670 actions:   7  13 206 3
Epi. 3281 score: -363 E[L]:   95.2 e: 0.0100 norm: 3023 actions:   6  16 203 3
Epi. 3282 score: -368 E[L]:   97.1 e: 0.0100 norm: 3005 actions:   7  14 205 3
Epi. 3283 score: -389 E[L]:   96.5 e: 0.0100 norm: 2953 actions:   9  12 209 3
Epi. 3284 score: -407 E[L]:  102.1 e: 0.0100 norm: 3068 actions:   7  13 216 3
Epi. 3285 score: -374 E[L]:  107.2 e: 0.0100 norm: 3217 actions:   7  14 205 3
Epi. 3286 score: -368 E[L]:  101.8 e: 0.0100 norm: 3

Epi. 3378 score: -391 E[L]:  100.0 e: 0.0100 norm: 3116 actions:   8  15 209 3
Epi. 3379 score: -391 E[L]:   97.1 e: 0.0100 norm: 3046 actions:   7  16 212 3
Epi. 3380 score: -391 E[L]:  102.0 e: 0.0100 norm: 3097 actions:   8  13 210 3
Epi. 3381 score: -377 E[L]:  109.6 e: 0.0100 norm: 2993 actions:   8  13 204 3
Epi. 3382 score: -393 E[L]:   95.7 e: 0.0100 norm: 2895 actions:   9  13 208 3
Epi. 3383 score: -384 E[L]:  102.4 e: 0.0100 norm: 3565 actions:   8  13 206 3
Epi. 3384 score: -375 E[L]:   99.1 e: 0.0100 norm: 3162 actions:   7  13 206 3
Epi. 3385 score: -385 E[L]:  118.7 e: 0.0100 norm: 3308 actions:   8  14 206 3
Epi. 3386 score: -404 E[L]:  101.3 e: 0.0100 norm: 3021 actions:   4  16 218 3
Epi. 3387 score: -391 E[L]:   91.3 e: 0.0100 norm: 2894 actions:   8  11 208 3
Epi. 3388 score: -381 E[L]:   90.9 e: 0.0100 norm: 2984 actions:   9  13 207 3
Epi. 3389 score: -380 E[L]:   94.7 e: 0.0100 norm: 2826 actions:   8  12 205 3
Epi. 3390 score: -401 E[L]:   92.5 e: 0.0100 norm: 3

Epi. 3482 score: -378 E[L]:  100.5 e: 0.0100 norm: 2717 actions:   7  14 207 3
Epi. 3483 score: -380 E[L]:   99.8 e: 0.0100 norm: 2989 actions:   8  13 207 3
Epi. 3484 score: -375 E[L]:  109.1 e: 0.0100 norm: 3331 actions:   8  13 205 3
Epi. 3485 score: -381 E[L]:   91.9 e: 0.0100 norm: 3372 actions:   8  13 207 3
Epi. 3486 score: -402 E[L]:  101.7 e: 0.0100 norm: 3323 actions:   5  17 216 3
Epi. 3487 score: -367 E[L]:   92.4 e: 0.0100 norm: 3074 actions:   7  16 203 3
Epi. 3488 score: -394 E[L]:  103.2 e: 0.0100 norm: 3260 actions:   6  18 213 3
Epi. 3489 score: -379 E[L]:  101.5 e: 0.0100 norm: 3225 actions:  10  16 204 3
Epi. 3490 score: -368 E[L]:   96.4 e: 0.0100 norm: 3200 actions:  10  15 201 3
Epi. 3491 score: -365 E[L]:   92.9 e: 0.0100 norm: 3048 actions:   8  13 204 3
Epi. 3492 score: -388 E[L]:   89.0 e: 0.0100 norm: 2910 actions:   7  16 210 3
Epi. 3493 score: -381 E[L]:  104.5 e: 0.0100 norm: 3004 actions:   7  14 208 3
Epi. 3494 score: -392 E[L]:   95.8 e: 0.0100 norm: 2

Epi. 3586 score: -379 E[L]:  102.7 e: 0.0100 norm: 3200 actions:   9  11 206 3
Epi. 3587 score: -407 E[L]:   89.2 e: 0.0100 norm: 3270 actions:   7  15 215 3
Epi. 3588 score: -392 E[L]:  104.2 e: 0.0100 norm: 3284 actions:   7  14 209 3
Epi. 3589 score: -382 E[L]:   98.8 e: 0.0100 norm: 3242 actions:   6  14 206 3
Epi. 3590 score: -375 E[L]:  110.2 e: 0.0100 norm: 3343 actions:   7  14 206 3
Epi. 3591 score: -373 E[L]:   95.3 e: 0.0100 norm: 3324 actions:   7  15 205 3
Epi. 3592 score: -373 E[L]:   99.7 e: 0.0100 norm: 3538 actions:   5  18 207 3
Epi. 3593 score: -379 E[L]:   93.6 e: 0.0100 norm: 3002 actions:   7  14 207 3
Epi. 3594 score: -379 E[L]:  106.1 e: 0.0100 norm: 3220 actions:   6  14 208 3
Epi. 3595 score: -377 E[L]:  104.8 e: 0.0100 norm: 3146 actions:   8  13 206 3
Epi. 3596 score: -380 E[L]:   90.0 e: 0.0100 norm: 3170 actions:   8  18 207 3
Epi. 3597 score: -370 E[L]:  108.0 e: 0.0100 norm: 3023 actions:   6  15 206 3
Epi. 3598 score: -372 E[L]:   96.4 e: 0.0100 norm: 2

Epi. 3690 score: -408 E[L]:   98.8 e: 0.0100 norm: 3199 actions:   8  14 213 3
Epi. 3691 score: -391 E[L]:   92.8 e: 0.0100 norm: 3196 actions:   7  15 211 3
Epi. 3692 score: -389 E[L]:   94.6 e: 0.0100 norm: 2450 actions:   9  12 206 3
Epi. 3693 score: -360 E[L]:   94.3 e: 0.0100 norm: 3164 actions:   2  19 205 3
Epi. 3694 score: -369 E[L]:   91.7 e: 0.0100 norm: 3133 actions:   3  17 209 3
Epi. 3695 score: -391 E[L]:  106.0 e: 0.0100 norm: 3495 actions:   9  11 207 3
Epi. 3696 score: -366 E[L]:   85.9 e: 0.0100 norm: 3373 actions:   4  16 206 3
Epi. 3697 score: -366 E[L]:   84.3 e: 0.0100 norm: 3208 actions:   7  14 204 3
Epi. 3698 score: -401 E[L]:   97.6 e: 0.0100 norm: 3137 actions:   6  15 214 3
Epi. 3699 score: -368 E[L]:  101.6 e: 0.0100 norm: 3474 actions:   7  16 204 3
Epi. 3700 score: -375 E[L]:   79.0 e: 0.0100 norm: 3309 actions:   7  14 206 3
Epi. 3701 score: -404 E[L]:  107.5 e: 0.0100 norm: 3413 actions:   6  15 215 3
Epi. 3702 score: -371 E[L]:   85.6 e: 0.0100 norm: 3

Epi. 3794 score: -387 E[L]:  100.8 e: 0.0100 norm: 2858 actions:  10  12 205 3
Epi. 3795 score: -390 E[L]:   87.7 e: 0.0100 norm: 3202 actions:   5  18 212 3
Epi. 3796 score: -404 E[L]:  105.3 e: 0.0100 norm: 3416 actions:   7  15 213 3
Epi. 3797 score: -389 E[L]:  120.5 e: 0.0100 norm: 3830 actions:   8  15 207 3
Epi. 3798 score: -387 E[L]:  104.6 e: 0.0100 norm: 3247 actions:   9  11 207 3
Epi. 3799 score: -387 E[L]:  104.5 e: 0.0100 norm: 3192 actions:   6  14 207 3
Epi. 3800 score: -404 E[L]:  110.0 e: 0.0100 norm: 3461 actions:   7  15 213 3
Epi. 3801 score: -385 E[L]:   94.9 e: 0.0100 norm: 3030 actions:   4  15 211 3
Epi. 3802 score: -392 E[L]:  103.6 e: 0.0100 norm: 3514 actions:   6  16 211 3
Epi. 3803 score: -383 E[L]:  111.3 e: 0.0100 norm: 3383 actions:   6  13 207 3
Epi. 3804 score: -380 E[L]:   97.9 e: 0.0100 norm: 3333 actions:   6  15 207 3
Epi. 3805 score: -373 E[L]:  114.0 e: 0.0100 norm: 3159 actions:   5  16 204 3
Epi. 3806 score: -389 E[L]:   94.3 e: 0.0100 norm: 3

Epi. 3898 score: -398 E[L]:  107.0 e: 0.0100 norm: 3495 actions:   5  17 215 3
Epi. 3899 score: -375 E[L]:  103.8 e: 0.0100 norm: 3445 actions:   7  14 206 3
Epi. 3900 score: -374 E[L]:  116.0 e: 0.0100 norm: 3700 actions:   9  13 205 3
Epi. 3901 score: -406 E[L]:  116.0 e: 0.0100 norm: 3450 actions:   6  15 215 3
Epi. 3902 score: -387 E[L]:   99.9 e: 0.0100 norm: 3440 actions:   7  11 207 3
Epi. 3903 score: -393 E[L]:  111.4 e: 0.0100 norm: 3429 actions:   6  15 210 3
Epi. 3904 score: -413 E[L]:  103.5 e: 0.0100 norm: 3432 actions:   5  15 218 3
Epi. 3905 score: -399 E[L]:  105.1 e: 0.0100 norm: 3145 actions:   6  11 211 3
Epi. 3906 score: -412 E[L]:  104.2 e: 0.0100 norm: 3345 actions:   6  14 216 3
Epi. 3907 score: -388 E[L]:  116.4 e: 0.0100 norm: 3730 actions:   9  10 207 3
Epi. 3908 score: -386 E[L]:   94.8 e: 0.0100 norm: 3210 actions:   7  13 207 3
Epi. 3909 score: -371 E[L]:   93.4 e: 0.0100 norm: 3379 actions:   8  14 203 3
Epi. 3910 score: -372 E[L]:   96.2 e: 0.0100 norm: 3

Epi. 4002 score: -364 E[L]:  105.8 e: 0.0100 norm: 3032 actions:   6  15 205 3
Epi. 4003 score: -370 E[L]:   99.3 e: 0.0100 norm: 3405 actions:   9  14 205 3
Epi. 4004 score: -402 E[L]:  104.0 e: 0.0100 norm: 3320 actions:   6  15 214 3
Epi. 4005 score: -395 E[L]:   93.9 e: 0.0100 norm: 3230 actions:   9  15 211 3
Epi. 4006 score: -360 E[L]:   96.5 e: 0.0100 norm: 3458 actions:   6  16 203 3
Epi. 4007 score: -392 E[L]:  107.1 e: 0.0100 norm: 3465 actions:   6  14 210 3
Epi. 4008 score: -378 E[L]:  102.4 e: 0.0100 norm: 3413 actions:   9  12 206 3
Epi. 4009 score: -381 E[L]:  107.8 e: 0.0100 norm: 3407 actions:   7  12 207 3
Epi. 4010 score: -401 E[L]:  107.1 e: 0.0100 norm: 3371 actions:   7  14 213 3
Epi. 4011 score: -377 E[L]:  110.7 e: 0.0100 norm: 3500 actions:   8  12 206 3
Epi. 4012 score: -402 E[L]:  100.8 e: 0.0100 norm: 3351 actions:   6  13 213 3
Epi. 4013 score: -365 E[L]:  105.8 e: 0.0100 norm: 3361 actions:   5  15 205 3
Epi. 4014 score: -385 E[L]:  101.8 e: 0.0100 norm: 3

Epi. 4106 score: -385 E[L]:  115.4 e: 0.0100 norm: 3359 actions:   9  14 207 3
Epi. 4107 score: -399 E[L]:   98.5 e: 0.0100 norm: 2923 actions:   6  13 212 3
Epi. 4108 score: -387 E[L]:  112.6 e: 0.0100 norm: 3672 actions:   8  11 208 3
Epi. 4109 score: -388 E[L]:  109.7 e: 0.0100 norm: 3604 actions:   7  11 208 3
Epi. 4110 score: -390 E[L]:  104.8 e: 0.0100 norm: 3324 actions:   7  12 209 3
Epi. 4111 score: -387 E[L]:   97.1 e: 0.0100 norm: 3070 actions:   8  11 208 3
Epi. 4112 score: -398 E[L]:  114.7 e: 0.0100 norm: 3733 actions:   6  14 210 3
Epi. 4113 score: -384 E[L]:  100.2 e: 0.0100 norm: 3476 actions:   7  11 207 3
Epi. 4114 score: -381 E[L]:   98.6 e: 0.0100 norm: 3585 actions:   7  12 207 3
Epi. 4115 score: -392 E[L]:   95.7 e: 0.0100 norm: 3117 actions:   6  13 210 3
Epi. 4116 score: -391 E[L]:  114.6 e: 0.0100 norm: 3086 actions:   6  11 210 3
Epi. 4117 score: -387 E[L]:  118.6 e: 0.0100 norm: 3567 actions:   7  11 208 3
Epi. 4118 score: -394 E[L]:   93.9 e: 0.0100 norm: 3

Epi. 4210 score: -401 E[L]:  101.7 e: 0.0100 norm: 3315 actions:   7  17 213 3
Epi. 4211 score: -369 E[L]:   99.6 e: 0.0100 norm: 2971 actions:   8  14 205 3
Epi. 4212 score: -409 E[L]:  103.4 e: 0.0100 norm: 3205 actions:   3  16 219 3
Epi. 4213 score: -399 E[L]:  110.4 e: 0.0100 norm: 3450 actions:   6  14 213 3
Epi. 4214 score: -388 E[L]:   99.0 e: 0.0100 norm: 3432 actions:   9  16 207 3
Epi. 4215 score: -418 E[L]:  102.7 e: 0.0100 norm: 3400 actions:   5  13 218 3
Epi. 4216 score: -377 E[L]:   96.5 e: 0.0100 norm: 3296 actions:   6  14 207 3
Epi. 4217 score: -360 E[L]:   95.5 e: 0.0100 norm: 3430 actions:   6  16 204 3
Epi. 4218 score: -391 E[L]:  108.3 e: 0.0100 norm: 3103 actions:   6  16 211 3
Epi. 4219 score: -370 E[L]:  109.3 e: 0.0100 norm: 3287 actions:  10  15 202 3
Epi. 4220 score: -397 E[L]:  110.9 e: 0.0100 norm: 3276 actions:  10  13 211 3
Epi. 4221 score: -387 E[L]:  110.6 e: 0.0100 norm: 3686 actions:   6  13 207 3
Epi. 4222 score: -390 E[L]:  111.2 e: 0.0100 norm: 3

Epi. 4314 score: -401 E[L]:  108.1 e: 0.0100 norm: 3588 actions:   7  14 211 3
Epi. 4315 score: -373 E[L]:   97.6 e: 0.0100 norm: 3277 actions:   5  14 207 3
Epi. 4316 score: -374 E[L]:  106.1 e: 0.0100 norm: 3217 actions:   9  12 204 3
Epi. 4317 score: -405 E[L]:  107.7 e: 0.0100 norm: 3530 actions:   8  12 212 3
Epi. 4318 score: -404 E[L]:   98.3 e: 0.0100 norm: 3283 actions:   6  10 210 3
Epi. 4319 score: -401 E[L]:  108.9 e: 0.0100 norm: 3733 actions:   7  15 211 3
Epi. 4320 score: -387 E[L]:  103.5 e: 0.0100 norm: 3462 actions:   6  12 209 3
Epi. 4321 score: -385 E[L]:  100.6 e: 0.0100 norm: 3569 actions:   7  12 207 3
Epi. 4322 score: -387 E[L]:  111.3 e: 0.0100 norm: 3847 actions:   7  14 207 3
Epi. 4323 score: -407 E[L]:  102.9 e: 0.0100 norm: 3393 actions:   8  13 213 3
Epi. 4324 score: -378 E[L]:  102.3 e: 0.0100 norm: 3508 actions:   7  15 206 3
Epi. 4325 score: -399 E[L]:   99.2 e: 0.0100 norm: 3049 actions:   6  14 211 3
Epi. 4326 score: -383 E[L]:   98.3 e: 0.0100 norm: 3

Epi. 4418 score: -384 E[L]:   95.6 e: 0.0100 norm: 3351 actions:   7  13 206 3
Epi. 4419 score: -395 E[L]:   91.2 e: 0.0100 norm: 3088 actions:   9  13 210 3
Epi. 4420 score: -394 E[L]:  104.2 e: 0.0100 norm: 3474 actions:   7  12 210 3
Epi. 4421 score: -415 E[L]:  112.7 e: 0.0100 norm: 3454 actions:   6  15 217 3
Epi. 4422 score: -399 E[L]:  108.9 e: 0.0100 norm: 4219 actions:   4  15 215 3
Epi. 4423 score: -379 E[L]:  108.2 e: 0.0100 norm: 3689 actions:   6  13 207 3
Epi. 4424 score: -369 E[L]:  105.3 e: 0.0100 norm: 3283 actions:   5  16 205 3
Epi. 4425 score: -382 E[L]:  103.9 e: 0.0100 norm: 3310 actions:   6  13 208 3
Epi. 4426 score: -399 E[L]:  105.6 e: 0.0100 norm: 3577 actions:   4  15 215 3
Epi. 4427 score: -400 E[L]:   84.6 e: 0.0100 norm: 3139 actions:   7  13 212 3
Epi. 4428 score: -398 E[L]:  108.9 e: 0.0100 norm: 3881 actions:   5  16 211 3
Epi. 4429 score: -372 E[L]:   97.2 e: 0.0100 norm: 3305 actions:   5  16 206 3
Epi. 4430 score: -366 E[L]:  110.4 e: 0.0100 norm: 3

Epi. 4522 score: -395 E[L]:  100.5 e: 0.0100 norm: 3571 actions:   5  12 209 3
Epi. 4523 score: -390 E[L]:  112.8 e: 0.0100 norm: 3747 actions:   8  13 208 3
Epi. 4524 score: -417 E[L]:  111.9 e: 0.0100 norm: 4024 actions:   5  14 218 3
Epi. 4525 score: -390 E[L]:  114.8 e: 0.0100 norm: 3497 actions:   7  12 209 3
Epi. 4526 score: -381 E[L]:  111.8 e: 0.0100 norm: 3812 actions:   5  13 207 3
Epi. 4527 score: -392 E[L]:  105.9 e: 0.0100 norm: 3308 actions:   6  11 209 3
Epi. 4528 score: -423 E[L]:  105.0 e: 0.0100 norm: 3769 actions:   4  14 220 3
Epi. 4529 score: -389 E[L]:   93.7 e: 0.0100 norm: 3197 actions:   8  13 208 3
Epi. 4530 score: -387 E[L]:   92.9 e: 0.0100 norm: 3127 actions:   6  12 209 3
Epi. 4531 score: -382 E[L]:   98.3 e: 0.0100 norm: 3821 actions:   5  13 208 3
Epi. 4532 score: -390 E[L]:  105.6 e: 0.0100 norm: 3516 actions:   5  14 211 3
Epi. 4533 score: -380 E[L]:  124.9 e: 0.0100 norm: 3924 actions:   5  13 208 3
Epi. 4534 score: -378 E[L]:  105.1 e: 0.0100 norm: 3

KeyboardInterrupt: 

In [1]:
k = list(map(lambda x: x.grad, net.parameters()))
l = list(map(lambda x: x.data, net.parameters()))
fig = plt.figure(figsize=(15, 15))
fig.suptitle('Pretty well trained', fontsize=25)

ax = plt.subplot(211)
plt.hist([
    k[0].cpu().numpy().reshape(-1),
    k[2].cpu().numpy().reshape(-1),
], bins=20, color=['r', 'orange'], label=['Layer 1 (input)', 'Layer 2 (output)'])
ax.set_title(r'Gradients $\partial \mathcal{L} / \partial w$', fontsize=22)
ax.legend(fontsize=15)

ax = plt.subplot(212)
plt.hist([
    l[0].cpu().numpy().reshape(-1),
    l[2].cpu().numpy().reshape(-1),
], bins=20, color=['r', 'orange'], label=['Layer 1 (input)', 'Layer 2 (output)'])
ax.set_title('Weights', fontsize=22)
ax.legend(fontsize=15)

plt.show()

NameError: name 'net' is not defined

In [2]:
fig, host = plt.subplots()
fig.set_size_inches(15, 15)
fig.subplots_adjust(right=.75)

par1 = host.twinx()
# par2 = host.twinx()

# par2.spines['right'].set_position(('axes', 1.2))
# par2.spines['right'].set_visible(True)

p1, = host.plot(record['score'], 'r', label='score')
p2, = par1.plot(record['average loss'], 'orange', label='average loss')

host.set_xlabel('episode', fontsize=22)
host.set_ylabel('score', fontsize=22)
par1.set_ylabel('average loss', fontsize=22)

host.yaxis.label.set_color(p1.get_color())
par1.yaxis.label.set_color(p2.get_color())

tkw = dict(size=4, width=1.5)
host.tick_params(axis='x', **tkw) 
host.tick_params(axis='y', colors=p1.get_color(), **tkw)
par1.tick_params(axis='y', colors=p2.get_color(), **tkw)

lines = [p1, p2]
host.legend(lines, [l.get_label() for l in lines], fontsize=20)  # 이렇게 안 하면 child에 그려진 애들은 legend가 안 붙나봄
plt.show() 

NameError: name 'plt' is not defined

In [None]:
# r: range
# vc: closing velocity
# los: line of sight angle
# daz: azimuthal rate (horizontal look angle, right +)
# dlos: los rate
print('Env Reset')
obs0 = env.reset() 
print('observations')
for label, val in zip(['range', 'vc', 'los', 'daz', 'dlos'], obs0):
    print(f'  {label:8}:\t{val}')

In [None]:
print('Env Get Started')
obs, reward, done, info = env.step(a)
a =  np.argmax(net(torch.from_numpy(obs).float().to(device)).detach().cpu())
print('action:\t', a.item()) 
print('reward:\t', reward)
print('done:\t', done) 
print('observations')
for label, val in zip(['range', 'vc', 'los', 'daz', 'dlos'], obs):
    print(f'  {label:10}:\t{val}')

print('\ninfos')
for label, val in zip(['hdot_cmd', 'range', 'elev', 'azim', 'Pm_NED', 'Pt_NED', 'h'], info):
    print(f'  {label:10}:\t{val}')