In [1]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from collections import deque

from itertools import count
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class memory():

    def __init__(self, size):
        self.mem = deque(maxlen=size)

    def append_sample(self, state, action, reward, next_state, done):
        self.mem.append((state, action, reward, next_state, done))

    def get_train_data(self,batch_size):
        return random.sample(self.mem, batch_size)

    def __len__(self):
        return len(self.mem)



In [2]:
class NN(nn.Module):

    def __init__(self, state_size, action_size):
        super(NN,self).__init__()
        self.fc1 = nn.Linear(state_size, 32)
        self.fc2 = nn.Linear(32,32)
        self.out = nn.Linear(32, action_size)
        nn.init.xavier_normal_(self.fc1.weight)
        nn.init.xavier_normal_(self.fc2.weight)
        nn.init.xavier_normal_(self.out.weight)



    def forward(self,x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.out(x)
        return x
        

In [3]:
action = 0
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n


WARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.




In [4]:
class Agent():
    def __init__(self, mem_size, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        
        self.epsilon = 1
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9
        self.batch_size = 64
        self.GAMMA =0.9
        
        self.worker_NN = NN(state_size, action_size).to(device)
        self.target_NN = NN(state_size, action_size).to(device)
        self.target_NN.load_state_dict(self.worker_NN.state_dict())
        
        self.optimizer = optim.Adam(self.worker_NN.parameters(),lr=0.001)
        self.loss_func = nn.MSELoss()
        self.mem = memory(mem_size)
    
    def get_action(self,state):
        if np.random.rand() <= self.epsilon:
            self.epsilon = self.epsilon * self.epsilon_decay
            return env.action_space.sample()
        else:
            state = torch.Tensor(state).to(device)
            a=self.worker_NN(state)
            out = torch.argmax(a)
            return  out.cpu().numpy()
    
    def train_model(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
        mini_batch = self.mem.get_train_data(self.batch_size)
        
        states = np.zeros((self.batch_size, self.state_size))
        next_states = np.zeros((self.batch_size, self.state_size))
        actions, rewards, dones = [],[],[]
        
        for i in range(self.batch_size):
            states[i] = mini_batch[i][0]
            actions.append(mini_batch[i][1])
            rewards.append(mini_batch[i][2])
            next_states[i] = mini_batch[i][3]
            dones.append(mini_batch[i][4])
        
        states = torch.Tensor(states).to(device)
        next_states = torch.Tensor(next_states).to(device)
        rewards = torch.Tensor(rewards).to(device)
        
        q_eval = self.worker_NN(states)
        #q_eval = q_eval.max(1)[0]
        
        q_val = torch.zeros(self.batch_size).to(device)
        q_target = torch.zeros(self.batch_size).to(device)
        q_next = self.target_NN(next_states)
        
        '''
        for w in range(self.batch_size):
            q_val[w] = q_eval[w][actions[w]]
            if dones[w]:
                q_target[w] = rewards[w]
            else:
                #print(q_next[w])
                #print(q_next[w].max())
                q_target[w] = rewards[w] + (self.GAMMA * q_next[w].max())
        '''  
        for w in range(self.batch_size):
            q_val[w] = q_eval[w][actions[w]]
            
        
        q_target = rewards + (self.GAMMA * q_next.max(1)[0])
        
        #print("q_eval_size : {}".format(q_eval.shape))
        #print("q_target_size : {}".format(q_target.shape))
        
        #loss = F.mse_loss(q_eval, q_target)
        loss = F.mse_loss(q_val, q_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        
    def weight_copy(self):
        self.target_NN.load_state_dict(self.worker_NN.state_dict())
            
    

In [5]:
agent = Agent(mem_size=2000, state_size=state_size, action_size =action_size)

In [6]:
for i_episode in range(1500):
    state = env.reset()
    score = 0
    for t in range(600):
        env.render()
        #action = env.action_space.sample()
        action = agent.get_action(state)
        next_state, reward, done, info = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        reward = reward if not done or score > 499 else -100 #안끝났거나 score가 499보다 큰경우가 아니고서야 reward는 -100

        agent.mem.append_sample(state, action, reward, next_state, done)
        state=next_state
        score +=reward
        if len(agent.mem) > 100:
            agent.train_model()

        if done:
            if t+1 > 10:
                print("{} : Episode finished after {} timesteps and ".format(i_episode, t+1))
            if (i_episode % 10) == 0:
                agent.weight_copy()
            break
env.close()


0 : Episode finished after 15 timesteps and 




1 : Episode finished after 19 timesteps and 




2 : Episode finished after 23 timesteps and 




3 : Episode finished after 29 timesteps and 




4 : Episode finished after 61 timesteps and 




5 : Episode finished after 28 timesteps and 




6 : Episode finished after 24 timesteps and 




7 : Episode finished after 21 timesteps and 




9 : Episode finished after 11 timesteps and 




10 : Episode finished after 11 timesteps and 




11 : Episode finished after 16 timesteps and 




12 : Episode finished after 18 timesteps and 




13 : Episode finished after 32 timesteps and 




14 : Episode finished after 71 timesteps and 




15 : Episode finished after 33 timesteps and 




16 : Episode finished after 75 timesteps and 




17 : Episode finished after 29 timesteps and 




18 : Episode finished after 41 timesteps and 




19 : Episode finished after 30 timesteps and 




20 : Episode finished after 53 timesteps and 




21 : Episode finished after 74 timesteps and 




22 : Episode finished after 38 timesteps and 




23 : Episode finished after 24 timesteps and 




24 : Episode finished after 29 timesteps and 




25 : Episode finished after 35 timesteps and 




26 : Episode finished after 32 timesteps and 




27 : Episode finished after 58 timesteps and 




28 : Episode finished after 36 timesteps and 




29 : Episode finished after 61 timesteps and 




30 : Episode finished after 47 timesteps and 




31 : Episode finished after 60 timesteps and 




32 : Episode finished after 49 timesteps and 




33 : Episode finished after 87 timesteps and 




34 : Episode finished after 121 timesteps and 




35 : Episode finished after 74 timesteps and 




36 : Episode finished after 65 timesteps and 




37 : Episode finished after 231 timesteps and 




38 : Episode finished after 93 timesteps and 




39 : Episode finished after 93 timesteps and 




40 : Episode finished after 137 timesteps and 




41 : Episode finished after 135 timesteps and 




42 : Episode finished after 197 timesteps and 




43 : Episode finished after 116 timesteps and 




44 : Episode finished after 192 timesteps and 




45 : Episode finished after 500 timesteps and 




46 : Episode finished after 500 timesteps and 




47 : Episode finished after 500 timesteps and 




48 : Episode finished after 206 timesteps and 




49 : Episode finished after 388 timesteps and 




50 : Episode finished after 220 timesteps and 




51 : Episode finished after 162 timesteps and 




52 : Episode finished after 172 timesteps and 




53 : Episode finished after 211 timesteps and 




54 : Episode finished after 284 timesteps and 




55 : Episode finished after 335 timesteps and 




56 : Episode finished after 157 timesteps and 




57 : Episode finished after 390 timesteps and 




58 : Episode finished after 274 timesteps and 




59 : Episode finished after 322 timesteps and 




60 : Episode finished after 230 timesteps and 




61 : Episode finished after 279 timesteps and 




62 : Episode finished after 272 timesteps and 




63 : Episode finished after 220 timesteps and 




64 : Episode finished after 292 timesteps and 




65 : Episode finished after 283 timesteps and 




66 : Episode finished after 250 timesteps and 




67 : Episode finished after 500 timesteps and 




68 : Episode finished after 375 timesteps and 




69 : Episode finished after 383 timesteps and 




70 : Episode finished after 201 timesteps and 




71 : Episode finished after 224 timesteps and 




72 : Episode finished after 243 timesteps and 




73 : Episode finished after 236 timesteps and 




74 : Episode finished after 211 timesteps and 




75 : Episode finished after 176 timesteps and 




76 : Episode finished after 167 timesteps and 




77 : Episode finished after 145 timesteps and 




78 : Episode finished after 149 timesteps and 




79 : Episode finished after 130 timesteps and 




80 : Episode finished after 162 timesteps and 




81 : Episode finished after 158 timesteps and 




82 : Episode finished after 187 timesteps and 




83 : Episode finished after 196 timesteps and 




84 : Episode finished after 170 timesteps and 




85 : Episode finished after 283 timesteps and 




86 : Episode finished after 227 timesteps and 




87 : Episode finished after 218 timesteps and 




88 : Episode finished after 180 timesteps and 




89 : Episode finished after 165 timesteps and 




90 : Episode finished after 184 timesteps and 




91 : Episode finished after 162 timesteps and 




92 : Episode finished after 153 timesteps and 




93 : Episode finished after 160 timesteps and 




94 : Episode finished after 189 timesteps and 




95 : Episode finished after 162 timesteps and 




96 : Episode finished after 158 timesteps and 




97 : Episode finished after 163 timesteps and 




98 : Episode finished after 131 timesteps and 




99 : Episode finished after 164 timesteps and 




100 : Episode finished after 131 timesteps and 




101 : Episode finished after 222 timesteps and 




102 : Episode finished after 198 timesteps and 




103 : Episode finished after 273 timesteps and 




104 : Episode finished after 166 timesteps and 




105 : Episode finished after 185 timesteps and 




106 : Episode finished after 170 timesteps and 




107 : Episode finished after 148 timesteps and 




108 : Episode finished after 190 timesteps and 




109 : Episode finished after 500 timesteps and 




110 : Episode finished after 190 timesteps and 




111 : Episode finished after 156 timesteps and 




112 : Episode finished after 162 timesteps and 




113 : Episode finished after 144 timesteps and 




114 : Episode finished after 143 timesteps and 




115 : Episode finished after 141 timesteps and 




116 : Episode finished after 154 timesteps and 




117 : Episode finished after 125 timesteps and 




118 : Episode finished after 160 timesteps and 




119 : Episode finished after 120 timesteps and 




120 : Episode finished after 139 timesteps and 




121 : Episode finished after 150 timesteps and 




122 : Episode finished after 141 timesteps and 




123 : Episode finished after 198 timesteps and 




124 : Episode finished after 201 timesteps and 




125 : Episode finished after 231 timesteps and 




126 : Episode finished after 132 timesteps and 




127 : Episode finished after 186 timesteps and 




128 : Episode finished after 188 timesteps and 




129 : Episode finished after 187 timesteps and 




130 : Episode finished after 216 timesteps and 




131 : Episode finished after 161 timesteps and 




132 : Episode finished after 163 timesteps and 




133 : Episode finished after 214 timesteps and 




134 : Episode finished after 159 timesteps and 




135 : Episode finished after 208 timesteps and 




136 : Episode finished after 167 timesteps and 




137 : Episode finished after 185 timesteps and 




138 : Episode finished after 211 timesteps and 




139 : Episode finished after 159 timesteps and 




140 : Episode finished after 139 timesteps and 




141 : Episode finished after 188 timesteps and 




142 : Episode finished after 194 timesteps and 




143 : Episode finished after 154 timesteps and 




144 : Episode finished after 218 timesteps and 




145 : Episode finished after 160 timesteps and 




146 : Episode finished after 231 timesteps and 




147 : Episode finished after 200 timesteps and 




148 : Episode finished after 208 timesteps and 




149 : Episode finished after 165 timesteps and 




150 : Episode finished after 156 timesteps and 




151 : Episode finished after 300 timesteps and 




152 : Episode finished after 251 timesteps and 




153 : Episode finished after 180 timesteps and 




154 : Episode finished after 159 timesteps and 




155 : Episode finished after 232 timesteps and 




156 : Episode finished after 500 timesteps and 




157 : Episode finished after 209 timesteps and 




158 : Episode finished after 198 timesteps and 




159 : Episode finished after 211 timesteps and 




160 : Episode finished after 198 timesteps and 




161 : Episode finished after 179 timesteps and 




162 : Episode finished after 156 timesteps and 




163 : Episode finished after 207 timesteps and 




164 : Episode finished after 201 timesteps and 




165 : Episode finished after 156 timesteps and 




166 : Episode finished after 137 timesteps and 




167 : Episode finished after 164 timesteps and 




168 : Episode finished after 135 timesteps and 




169 : Episode finished after 192 timesteps and 




170 : Episode finished after 196 timesteps and 




171 : Episode finished after 165 timesteps and 




172 : Episode finished after 135 timesteps and 




173 : Episode finished after 159 timesteps and 




174 : Episode finished after 159 timesteps and 




175 : Episode finished after 157 timesteps and 




176 : Episode finished after 156 timesteps and 




177 : Episode finished after 136 timesteps and 




178 : Episode finished after 203 timesteps and 




179 : Episode finished after 170 timesteps and 




180 : Episode finished after 171 timesteps and 




181 : Episode finished after 147 timesteps and 




182 : Episode finished after 348 timesteps and 




183 : Episode finished after 179 timesteps and 




184 : Episode finished after 188 timesteps and 




185 : Episode finished after 167 timesteps and 




186 : Episode finished after 177 timesteps and 




187 : Episode finished after 176 timesteps and 




188 : Episode finished after 173 timesteps and 




189 : Episode finished after 219 timesteps and 




190 : Episode finished after 156 timesteps and 




191 : Episode finished after 239 timesteps and 




192 : Episode finished after 175 timesteps and 




193 : Episode finished after 166 timesteps and 




194 : Episode finished after 205 timesteps and 




195 : Episode finished after 182 timesteps and 




196 : Episode finished after 261 timesteps and 




197 : Episode finished after 253 timesteps and 




198 : Episode finished after 176 timesteps and 




199 : Episode finished after 189 timesteps and 




200 : Episode finished after 193 timesteps and 




201 : Episode finished after 198 timesteps and 




202 : Episode finished after 207 timesteps and 




203 : Episode finished after 181 timesteps and 




204 : Episode finished after 208 timesteps and 




205 : Episode finished after 188 timesteps and 




206 : Episode finished after 185 timesteps and 




207 : Episode finished after 211 timesteps and 




208 : Episode finished after 183 timesteps and 




209 : Episode finished after 184 timesteps and 




210 : Episode finished after 183 timesteps and 




211 : Episode finished after 187 timesteps and 




212 : Episode finished after 232 timesteps and 




213 : Episode finished after 257 timesteps and 




214 : Episode finished after 215 timesteps and 




215 : Episode finished after 223 timesteps and 




216 : Episode finished after 242 timesteps and 




217 : Episode finished after 233 timesteps and 




218 : Episode finished after 230 timesteps and 




219 : Episode finished after 187 timesteps and 




220 : Episode finished after 213 timesteps and 




221 : Episode finished after 205 timesteps and 




222 : Episode finished after 294 timesteps and 




223 : Episode finished after 212 timesteps and 




224 : Episode finished after 218 timesteps and 




225 : Episode finished after 250 timesteps and 




226 : Episode finished after 207 timesteps and 




227 : Episode finished after 184 timesteps and 




228 : Episode finished after 238 timesteps and 




229 : Episode finished after 166 timesteps and 




230 : Episode finished after 194 timesteps and 




231 : Episode finished after 244 timesteps and 




232 : Episode finished after 205 timesteps and 




233 : Episode finished after 179 timesteps and 




234 : Episode finished after 185 timesteps and 




235 : Episode finished after 167 timesteps and 




236 : Episode finished after 171 timesteps and 




237 : Episode finished after 212 timesteps and 




238 : Episode finished after 208 timesteps and 




239 : Episode finished after 194 timesteps and 




240 : Episode finished after 185 timesteps and 




241 : Episode finished after 171 timesteps and 




242 : Episode finished after 164 timesteps and 




243 : Episode finished after 163 timesteps and 




244 : Episode finished after 184 timesteps and 




245 : Episode finished after 185 timesteps and 




246 : Episode finished after 163 timesteps and 




247 : Episode finished after 194 timesteps and 




248 : Episode finished after 198 timesteps and 




249 : Episode finished after 145 timesteps and 




250 : Episode finished after 178 timesteps and 




251 : Episode finished after 167 timesteps and 




252 : Episode finished after 214 timesteps and 




253 : Episode finished after 219 timesteps and 




254 : Episode finished after 187 timesteps and 




255 : Episode finished after 361 timesteps and 




256 : Episode finished after 430 timesteps and 




257 : Episode finished after 247 timesteps and 




258 : Episode finished after 256 timesteps and 




259 : Episode finished after 270 timesteps and 




260 : Episode finished after 308 timesteps and 




261 : Episode finished after 303 timesteps and 




262 : Episode finished after 248 timesteps and 




263 : Episode finished after 269 timesteps and 




264 : Episode finished after 223 timesteps and 




265 : Episode finished after 194 timesteps and 




266 : Episode finished after 241 timesteps and 




267 : Episode finished after 192 timesteps and 




268 : Episode finished after 317 timesteps and 




269 : Episode finished after 268 timesteps and 




270 : Episode finished after 234 timesteps and 




271 : Episode finished after 315 timesteps and 




272 : Episode finished after 267 timesteps and 




273 : Episode finished after 500 timesteps and 




274 : Episode finished after 500 timesteps and 




275 : Episode finished after 500 timesteps and 




276 : Episode finished after 500 timesteps and 




277 : Episode finished after 342 timesteps and 




278 : Episode finished after 500 timesteps and 




279 : Episode finished after 500 timesteps and 




280 : Episode finished after 500 timesteps and 




281 : Episode finished after 500 timesteps and 




282 : Episode finished after 500 timesteps and 




283 : Episode finished after 500 timesteps and 




284 : Episode finished after 500 timesteps and 




285 : Episode finished after 500 timesteps and 




286 : Episode finished after 500 timesteps and 




287 : Episode finished after 500 timesteps and 




288 : Episode finished after 500 timesteps and 




289 : Episode finished after 500 timesteps and 




290 : Episode finished after 500 timesteps and 




291 : Episode finished after 500 timesteps and 




292 : Episode finished after 500 timesteps and 




293 : Episode finished after 263 timesteps and 




294 : Episode finished after 210 timesteps and 




295 : Episode finished after 159 timesteps and 




296 : Episode finished after 189 timesteps and 




297 : Episode finished after 165 timesteps and 




298 : Episode finished after 206 timesteps and 




299 : Episode finished after 154 timesteps and 




300 : Episode finished after 169 timesteps and 




301 : Episode finished after 209 timesteps and 




302 : Episode finished after 268 timesteps and 




303 : Episode finished after 239 timesteps and 




304 : Episode finished after 217 timesteps and 




305 : Episode finished after 171 timesteps and 




306 : Episode finished after 169 timesteps and 




307 : Episode finished after 201 timesteps and 




308 : Episode finished after 194 timesteps and 




309 : Episode finished after 191 timesteps and 




310 : Episode finished after 163 timesteps and 




311 : Episode finished after 199 timesteps and 




312 : Episode finished after 172 timesteps and 




313 : Episode finished after 234 timesteps and 




314 : Episode finished after 186 timesteps and 




315 : Episode finished after 170 timesteps and 




316 : Episode finished after 209 timesteps and 




317 : Episode finished after 200 timesteps and 




318 : Episode finished after 198 timesteps and 




319 : Episode finished after 186 timesteps and 




320 : Episode finished after 214 timesteps and 




321 : Episode finished after 155 timesteps and 




322 : Episode finished after 252 timesteps and 




323 : Episode finished after 325 timesteps and 




324 : Episode finished after 266 timesteps and 




325 : Episode finished after 226 timesteps and 




326 : Episode finished after 227 timesteps and 




327 : Episode finished after 208 timesteps and 




328 : Episode finished after 214 timesteps and 




329 : Episode finished after 214 timesteps and 




330 : Episode finished after 294 timesteps and 




331 : Episode finished after 172 timesteps and 




332 : Episode finished after 250 timesteps and 




333 : Episode finished after 154 timesteps and 




334 : Episode finished after 196 timesteps and 




335 : Episode finished after 163 timesteps and 




336 : Episode finished after 182 timesteps and 




337 : Episode finished after 207 timesteps and 




338 : Episode finished after 209 timesteps and 




339 : Episode finished after 196 timesteps and 




340 : Episode finished after 165 timesteps and 




341 : Episode finished after 180 timesteps and 




342 : Episode finished after 134 timesteps and 




343 : Episode finished after 138 timesteps and 




344 : Episode finished after 142 timesteps and 




345 : Episode finished after 184 timesteps and 




346 : Episode finished after 148 timesteps and 




347 : Episode finished after 178 timesteps and 




348 : Episode finished after 165 timesteps and 




349 : Episode finished after 158 timesteps and 




350 : Episode finished after 163 timesteps and 




351 : Episode finished after 175 timesteps and 




352 : Episode finished after 245 timesteps and 




353 : Episode finished after 255 timesteps and 




354 : Episode finished after 316 timesteps and 




355 : Episode finished after 321 timesteps and 




356 : Episode finished after 216 timesteps and 




357 : Episode finished after 213 timesteps and 




358 : Episode finished after 314 timesteps and 




359 : Episode finished after 412 timesteps and 




360 : Episode finished after 279 timesteps and 




361 : Episode finished after 273 timesteps and 




362 : Episode finished after 201 timesteps and 




363 : Episode finished after 192 timesteps and 




364 : Episode finished after 199 timesteps and 




365 : Episode finished after 203 timesteps and 




366 : Episode finished after 250 timesteps and 




367 : Episode finished after 190 timesteps and 




368 : Episode finished after 171 timesteps and 




369 : Episode finished after 182 timesteps and 




370 : Episode finished after 165 timesteps and 




371 : Episode finished after 163 timesteps and 




372 : Episode finished after 174 timesteps and 




373 : Episode finished after 183 timesteps and 




374 : Episode finished after 151 timesteps and 




375 : Episode finished after 169 timesteps and 




376 : Episode finished after 155 timesteps and 




377 : Episode finished after 148 timesteps and 




378 : Episode finished after 161 timesteps and 




379 : Episode finished after 135 timesteps and 




380 : Episode finished after 146 timesteps and 




381 : Episode finished after 152 timesteps and 




382 : Episode finished after 178 timesteps and 




383 : Episode finished after 164 timesteps and 




384 : Episode finished after 170 timesteps and 




385 : Episode finished after 146 timesteps and 




386 : Episode finished after 155 timesteps and 




387 : Episode finished after 182 timesteps and 




388 : Episode finished after 175 timesteps and 




389 : Episode finished after 163 timesteps and 




390 : Episode finished after 195 timesteps and 




391 : Episode finished after 138 timesteps and 




392 : Episode finished after 147 timesteps and 




393 : Episode finished after 110 timesteps and 




394 : Episode finished after 125 timesteps and 




395 : Episode finished after 96 timesteps and 




396 : Episode finished after 102 timesteps and 




397 : Episode finished after 103 timesteps and 




398 : Episode finished after 123 timesteps and 




399 : Episode finished after 103 timesteps and 




400 : Episode finished after 120 timesteps and 




401 : Episode finished after 157 timesteps and 




402 : Episode finished after 178 timesteps and 




403 : Episode finished after 166 timesteps and 




404 : Episode finished after 163 timesteps and 




405 : Episode finished after 137 timesteps and 




406 : Episode finished after 182 timesteps and 




407 : Episode finished after 196 timesteps and 




408 : Episode finished after 151 timesteps and 




409 : Episode finished after 175 timesteps and 




410 : Episode finished after 170 timesteps and 




411 : Episode finished after 121 timesteps and 




412 : Episode finished after 213 timesteps and 




413 : Episode finished after 209 timesteps and 




414 : Episode finished after 184 timesteps and 




415 : Episode finished after 205 timesteps and 




416 : Episode finished after 402 timesteps and 




417 : Episode finished after 267 timesteps and 




418 : Episode finished after 342 timesteps and 




419 : Episode finished after 282 timesteps and 




420 : Episode finished after 250 timesteps and 




421 : Episode finished after 112 timesteps and 




422 : Episode finished after 317 timesteps and 




423 : Episode finished after 412 timesteps and 




424 : Episode finished after 54 timesteps and 




425 : Episode finished after 180 timesteps and 




426 : Episode finished after 126 timesteps and 




427 : Episode finished after 125 timesteps and 




428 : Episode finished after 141 timesteps and 




429 : Episode finished after 79 timesteps and 




430 : Episode finished after 173 timesteps and 




431 : Episode finished after 58 timesteps and 




432 : Episode finished after 59 timesteps and 




433 : Episode finished after 83 timesteps and 




434 : Episode finished after 68 timesteps and 




435 : Episode finished after 114 timesteps and 




436 : Episode finished after 82 timesteps and 




437 : Episode finished after 81 timesteps and 




438 : Episode finished after 72 timesteps and 




439 : Episode finished after 107 timesteps and 




440 : Episode finished after 117 timesteps and 




441 : Episode finished after 96 timesteps and 




442 : Episode finished after 79 timesteps and 




443 : Episode finished after 105 timesteps and 




444 : Episode finished after 106 timesteps and 




445 : Episode finished after 113 timesteps and 




446 : Episode finished after 126 timesteps and 




447 : Episode finished after 121 timesteps and 




448 : Episode finished after 134 timesteps and 




449 : Episode finished after 134 timesteps and 




450 : Episode finished after 145 timesteps and 




451 : Episode finished after 111 timesteps and 




452 : Episode finished after 120 timesteps and 




453 : Episode finished after 147 timesteps and 




454 : Episode finished after 120 timesteps and 




455 : Episode finished after 125 timesteps and 




456 : Episode finished after 117 timesteps and 




457 : Episode finished after 132 timesteps and 




458 : Episode finished after 141 timesteps and 




459 : Episode finished after 121 timesteps and 




460 : Episode finished after 123 timesteps and 




461 : Episode finished after 143 timesteps and 




462 : Episode finished after 155 timesteps and 




463 : Episode finished after 141 timesteps and 




464 : Episode finished after 165 timesteps and 




465 : Episode finished after 152 timesteps and 




466 : Episode finished after 168 timesteps and 




467 : Episode finished after 148 timesteps and 




468 : Episode finished after 160 timesteps and 




469 : Episode finished after 144 timesteps and 




470 : Episode finished after 163 timesteps and 




471 : Episode finished after 145 timesteps and 




472 : Episode finished after 160 timesteps and 




473 : Episode finished after 129 timesteps and 




474 : Episode finished after 146 timesteps and 




475 : Episode finished after 136 timesteps and 




476 : Episode finished after 134 timesteps and 




477 : Episode finished after 140 timesteps and 




478 : Episode finished after 155 timesteps and 




479 : Episode finished after 144 timesteps and 




480 : Episode finished after 165 timesteps and 




481 : Episode finished after 135 timesteps and 




482 : Episode finished after 154 timesteps and 




483 : Episode finished after 150 timesteps and 




484 : Episode finished after 147 timesteps and 




485 : Episode finished after 145 timesteps and 




486 : Episode finished after 163 timesteps and 




487 : Episode finished after 145 timesteps and 




488 : Episode finished after 165 timesteps and 




489 : Episode finished after 155 timesteps and 




490 : Episode finished after 159 timesteps and 




491 : Episode finished after 141 timesteps and 




492 : Episode finished after 167 timesteps and 




493 : Episode finished after 148 timesteps and 




494 : Episode finished after 143 timesteps and 




495 : Episode finished after 157 timesteps and 




496 : Episode finished after 162 timesteps and 




497 : Episode finished after 153 timesteps and 




498 : Episode finished after 163 timesteps and 




499 : Episode finished after 165 timesteps and 




500 : Episode finished after 144 timesteps and 




501 : Episode finished after 140 timesteps and 




502 : Episode finished after 166 timesteps and 




503 : Episode finished after 149 timesteps and 




504 : Episode finished after 149 timesteps and 




505 : Episode finished after 147 timesteps and 




506 : Episode finished after 168 timesteps and 




507 : Episode finished after 177 timesteps and 




508 : Episode finished after 153 timesteps and 




509 : Episode finished after 159 timesteps and 




510 : Episode finished after 169 timesteps and 




511 : Episode finished after 163 timesteps and 




512 : Episode finished after 154 timesteps and 




513 : Episode finished after 148 timesteps and 




514 : Episode finished after 147 timesteps and 




515 : Episode finished after 144 timesteps and 




516 : Episode finished after 153 timesteps and 




517 : Episode finished after 158 timesteps and 




518 : Episode finished after 155 timesteps and 




519 : Episode finished after 153 timesteps and 




520 : Episode finished after 166 timesteps and 




521 : Episode finished after 163 timesteps and 




522 : Episode finished after 141 timesteps and 




523 : Episode finished after 138 timesteps and 




524 : Episode finished after 161 timesteps and 




525 : Episode finished after 159 timesteps and 




526 : Episode finished after 143 timesteps and 




527 : Episode finished after 145 timesteps and 




528 : Episode finished after 143 timesteps and 




529 : Episode finished after 144 timesteps and 




530 : Episode finished after 150 timesteps and 




531 : Episode finished after 142 timesteps and 




532 : Episode finished after 138 timesteps and 




533 : Episode finished after 151 timesteps and 




534 : Episode finished after 155 timesteps and 




535 : Episode finished after 137 timesteps and 




536 : Episode finished after 146 timesteps and 




537 : Episode finished after 145 timesteps and 




538 : Episode finished after 150 timesteps and 




539 : Episode finished after 139 timesteps and 




540 : Episode finished after 144 timesteps and 




541 : Episode finished after 140 timesteps and 




542 : Episode finished after 151 timesteps and 




543 : Episode finished after 143 timesteps and 




544 : Episode finished after 139 timesteps and 




545 : Episode finished after 147 timesteps and 




546 : Episode finished after 144 timesteps and 




547 : Episode finished after 149 timesteps and 




548 : Episode finished after 156 timesteps and 




549 : Episode finished after 162 timesteps and 




550 : Episode finished after 140 timesteps and 




551 : Episode finished after 158 timesteps and 




552 : Episode finished after 145 timesteps and 




553 : Episode finished after 132 timesteps and 




554 : Episode finished after 138 timesteps and 




555 : Episode finished after 139 timesteps and 




556 : Episode finished after 143 timesteps and 




557 : Episode finished after 143 timesteps and 




558 : Episode finished after 131 timesteps and 




559 : Episode finished after 135 timesteps and 




560 : Episode finished after 139 timesteps and 




561 : Episode finished after 172 timesteps and 




562 : Episode finished after 156 timesteps and 




563 : Episode finished after 162 timesteps and 




564 : Episode finished after 179 timesteps and 




565 : Episode finished after 181 timesteps and 




566 : Episode finished after 165 timesteps and 




567 : Episode finished after 182 timesteps and 




568 : Episode finished after 186 timesteps and 




569 : Episode finished after 164 timesteps and 




570 : Episode finished after 184 timesteps and 




571 : Episode finished after 166 timesteps and 




572 : Episode finished after 156 timesteps and 




573 : Episode finished after 153 timesteps and 




574 : Episode finished after 154 timesteps and 




575 : Episode finished after 144 timesteps and 




576 : Episode finished after 146 timesteps and 




577 : Episode finished after 143 timesteps and 




578 : Episode finished after 138 timesteps and 




579 : Episode finished after 137 timesteps and 




580 : Episode finished after 147 timesteps and 




581 : Episode finished after 153 timesteps and 




582 : Episode finished after 141 timesteps and 




583 : Episode finished after 154 timesteps and 




584 : Episode finished after 159 timesteps and 




585 : Episode finished after 163 timesteps and 




586 : Episode finished after 154 timesteps and 




587 : Episode finished after 152 timesteps and 




588 : Episode finished after 160 timesteps and 




589 : Episode finished after 151 timesteps and 




590 : Episode finished after 143 timesteps and 




591 : Episode finished after 151 timesteps and 




592 : Episode finished after 158 timesteps and 




593 : Episode finished after 146 timesteps and 




594 : Episode finished after 153 timesteps and 




595 : Episode finished after 163 timesteps and 




596 : Episode finished after 147 timesteps and 




597 : Episode finished after 164 timesteps and 




598 : Episode finished after 158 timesteps and 




599 : Episode finished after 161 timesteps and 




600 : Episode finished after 154 timesteps and 




601 : Episode finished after 157 timesteps and 




602 : Episode finished after 152 timesteps and 




603 : Episode finished after 158 timesteps and 




604 : Episode finished after 156 timesteps and 




605 : Episode finished after 144 timesteps and 




606 : Episode finished after 156 timesteps and 




607 : Episode finished after 153 timesteps and 




608 : Episode finished after 160 timesteps and 




609 : Episode finished after 151 timesteps and 




610 : Episode finished after 157 timesteps and 




611 : Episode finished after 158 timesteps and 




612 : Episode finished after 153 timesteps and 




613 : Episode finished after 165 timesteps and 




614 : Episode finished after 156 timesteps and 




615 : Episode finished after 153 timesteps and 




616 : Episode finished after 141 timesteps and 




617 : Episode finished after 141 timesteps and 




618 : Episode finished after 148 timesteps and 




619 : Episode finished after 150 timesteps and 




620 : Episode finished after 152 timesteps and 




621 : Episode finished after 150 timesteps and 




622 : Episode finished after 155 timesteps and 




623 : Episode finished after 140 timesteps and 




624 : Episode finished after 140 timesteps and 




625 : Episode finished after 155 timesteps and 




626 : Episode finished after 141 timesteps and 




627 : Episode finished after 149 timesteps and 




628 : Episode finished after 156 timesteps and 




629 : Episode finished after 157 timesteps and 




630 : Episode finished after 151 timesteps and 




631 : Episode finished after 147 timesteps and 




632 : Episode finished after 148 timesteps and 




633 : Episode finished after 145 timesteps and 




634 : Episode finished after 145 timesteps and 




635 : Episode finished after 137 timesteps and 




636 : Episode finished after 135 timesteps and 




637 : Episode finished after 141 timesteps and 




638 : Episode finished after 139 timesteps and 




639 : Episode finished after 145 timesteps and 




640 : Episode finished after 151 timesteps and 




641 : Episode finished after 149 timesteps and 




642 : Episode finished after 157 timesteps and 




643 : Episode finished after 152 timesteps and 




644 : Episode finished after 155 timesteps and 




645 : Episode finished after 154 timesteps and 




646 : Episode finished after 148 timesteps and 




647 : Episode finished after 153 timesteps and 




648 : Episode finished after 151 timesteps and 




649 : Episode finished after 149 timesteps and 




650 : Episode finished after 153 timesteps and 




651 : Episode finished after 143 timesteps and 




652 : Episode finished after 148 timesteps and 




653 : Episode finished after 152 timesteps and 




654 : Episode finished after 148 timesteps and 




655 : Episode finished after 144 timesteps and 




656 : Episode finished after 139 timesteps and 




657 : Episode finished after 146 timesteps and 




658 : Episode finished after 149 timesteps and 




659 : Episode finished after 136 timesteps and 




660 : Episode finished after 145 timesteps and 




661 : Episode finished after 146 timesteps and 




662 : Episode finished after 158 timesteps and 




663 : Episode finished after 163 timesteps and 




664 : Episode finished after 149 timesteps and 




665 : Episode finished after 152 timesteps and 




666 : Episode finished after 150 timesteps and 




667 : Episode finished after 153 timesteps and 




668 : Episode finished after 163 timesteps and 




669 : Episode finished after 157 timesteps and 




670 : Episode finished after 160 timesteps and 




671 : Episode finished after 168 timesteps and 




672 : Episode finished after 175 timesteps and 




673 : Episode finished after 176 timesteps and 




674 : Episode finished after 184 timesteps and 




675 : Episode finished after 187 timesteps and 




676 : Episode finished after 177 timesteps and 




677 : Episode finished after 184 timesteps and 




678 : Episode finished after 195 timesteps and 




679 : Episode finished after 196 timesteps and 




680 : Episode finished after 500 timesteps and 




681 : Episode finished after 500 timesteps and 




682 : Episode finished after 500 timesteps and 




683 : Episode finished after 500 timesteps and 




684 : Episode finished after 500 timesteps and 




685 : Episode finished after 500 timesteps and 




686 : Episode finished after 500 timesteps and 




687 : Episode finished after 500 timesteps and 




688 : Episode finished after 500 timesteps and 




689 : Episode finished after 500 timesteps and 




690 : Episode finished after 500 timesteps and 




691 : Episode finished after 339 timesteps and 




692 : Episode finished after 126 timesteps and 




693 : Episode finished after 466 timesteps and 




694 : Episode finished after 403 timesteps and 




695 : Episode finished after 268 timesteps and 




696 : Episode finished after 353 timesteps and 




697 : Episode finished after 433 timesteps and 




698 : Episode finished after 243 timesteps and 




699 : Episode finished after 275 timesteps and 




700 : Episode finished after 131 timesteps and 




701 : Episode finished after 500 timesteps and 




702 : Episode finished after 49 timesteps and 




703 : Episode finished after 500 timesteps and 




704 : Episode finished after 41 timesteps and 




705 : Episode finished after 46 timesteps and 




706 : Episode finished after 451 timesteps and 




707 : Episode finished after 419 timesteps and 




708 : Episode finished after 44 timesteps and 




709 : Episode finished after 445 timesteps and 




710 : Episode finished after 89 timesteps and 




711 : Episode finished after 33 timesteps and 




712 : Episode finished after 408 timesteps and 




713 : Episode finished after 256 timesteps and 




714 : Episode finished after 446 timesteps and 




715 : Episode finished after 369 timesteps and 




716 : Episode finished after 299 timesteps and 




717 : Episode finished after 276 timesteps and 




718 : Episode finished after 254 timesteps and 




719 : Episode finished after 264 timesteps and 




720 : Episode finished after 266 timesteps and 




721 : Episode finished after 274 timesteps and 




722 : Episode finished after 249 timesteps and 




723 : Episode finished after 333 timesteps and 




724 : Episode finished after 211 timesteps and 




725 : Episode finished after 203 timesteps and 




726 : Episode finished after 205 timesteps and 




727 : Episode finished after 203 timesteps and 




728 : Episode finished after 13 timesteps and 




729 : Episode finished after 227 timesteps and 




730 : Episode finished after 244 timesteps and 




731 : Episode finished after 336 timesteps and 




732 : Episode finished after 125 timesteps and 




733 : Episode finished after 109 timesteps and 




734 : Episode finished after 116 timesteps and 




735 : Episode finished after 147 timesteps and 




736 : Episode finished after 138 timesteps and 




737 : Episode finished after 140 timesteps and 




738 : Episode finished after 140 timesteps and 




739 : Episode finished after 140 timesteps and 




740 : Episode finished after 114 timesteps and 




741 : Episode finished after 124 timesteps and 




742 : Episode finished after 138 timesteps and 




743 : Episode finished after 123 timesteps and 




744 : Episode finished after 125 timesteps and 




745 : Episode finished after 136 timesteps and 




746 : Episode finished after 137 timesteps and 




747 : Episode finished after 131 timesteps and 




748 : Episode finished after 131 timesteps and 




749 : Episode finished after 129 timesteps and 




750 : Episode finished after 132 timesteps and 




751 : Episode finished after 144 timesteps and 




752 : Episode finished after 215 timesteps and 




753 : Episode finished after 237 timesteps and 




754 : Episode finished after 189 timesteps and 




755 : Episode finished after 197 timesteps and 




756 : Episode finished after 235 timesteps and 




757 : Episode finished after 179 timesteps and 




758 : Episode finished after 140 timesteps and 




759 : Episode finished after 149 timesteps and 




760 : Episode finished after 152 timesteps and 




761 : Episode finished after 251 timesteps and 




762 : Episode finished after 193 timesteps and 




763 : Episode finished after 203 timesteps and 




764 : Episode finished after 203 timesteps and 




765 : Episode finished after 210 timesteps and 




766 : Episode finished after 198 timesteps and 




767 : Episode finished after 215 timesteps and 




768 : Episode finished after 222 timesteps and 




769 : Episode finished after 214 timesteps and 




770 : Episode finished after 225 timesteps and 




771 : Episode finished after 212 timesteps and 




772 : Episode finished after 219 timesteps and 




773 : Episode finished after 225 timesteps and 




774 : Episode finished after 229 timesteps and 




775 : Episode finished after 236 timesteps and 




776 : Episode finished after 240 timesteps and 




777 : Episode finished after 252 timesteps and 




778 : Episode finished after 274 timesteps and 




779 : Episode finished after 277 timesteps and 




780 : Episode finished after 280 timesteps and 




781 : Episode finished after 259 timesteps and 




782 : Episode finished after 262 timesteps and 




783 : Episode finished after 297 timesteps and 




784 : Episode finished after 302 timesteps and 




785 : Episode finished after 382 timesteps and 




786 : Episode finished after 500 timesteps and 




787 : Episode finished after 500 timesteps and 




788 : Episode finished after 500 timesteps and 




789 : Episode finished after 500 timesteps and 




790 : Episode finished after 500 timesteps and 




791 : Episode finished after 500 timesteps and 




792 : Episode finished after 140 timesteps and 




793 : Episode finished after 500 timesteps and 




794 : Episode finished after 500 timesteps and 




795 : Episode finished after 500 timesteps and 




796 : Episode finished after 500 timesteps and 




797 : Episode finished after 29 timesteps and 




798 : Episode finished after 25 timesteps and 




799 : Episode finished after 26 timesteps and 




800 : Episode finished after 227 timesteps and 




801 : Episode finished after 264 timesteps and 




802 : Episode finished after 61 timesteps and 




803 : Episode finished after 21 timesteps and 




804 : Episode finished after 44 timesteps and 




805 : Episode finished after 16 timesteps and 




806 : Episode finished after 500 timesteps and 




807 : Episode finished after 500 timesteps and 




808 : Episode finished after 500 timesteps and 




809 : Episode finished after 347 timesteps and 




810 : Episode finished after 500 timesteps and 




811 : Episode finished after 370 timesteps and 




812 : Episode finished after 411 timesteps and 




813 : Episode finished after 322 timesteps and 




814 : Episode finished after 292 timesteps and 




815 : Episode finished after 270 timesteps and 




816 : Episode finished after 206 timesteps and 




817 : Episode finished after 192 timesteps and 




818 : Episode finished after 64 timesteps and 




819 : Episode finished after 58 timesteps and 




820 : Episode finished after 54 timesteps and 




821 : Episode finished after 188 timesteps and 




822 : Episode finished after 500 timesteps and 




823 : Episode finished after 113 timesteps and 




824 : Episode finished after 120 timesteps and 




825 : Episode finished after 136 timesteps and 




826 : Episode finished after 204 timesteps and 




827 : Episode finished after 182 timesteps and 




828 : Episode finished after 121 timesteps and 




829 : Episode finished after 148 timesteps and 




830 : Episode finished after 160 timesteps and 




831 : Episode finished after 304 timesteps and 




832 : Episode finished after 275 timesteps and 




833 : Episode finished after 303 timesteps and 




834 : Episode finished after 265 timesteps and 




835 : Episode finished after 245 timesteps and 




836 : Episode finished after 253 timesteps and 




837 : Episode finished after 235 timesteps and 




838 : Episode finished after 232 timesteps and 




839 : Episode finished after 211 timesteps and 




840 : Episode finished after 192 timesteps and 




841 : Episode finished after 245 timesteps and 




842 : Episode finished after 249 timesteps and 




843 : Episode finished after 223 timesteps and 




844 : Episode finished after 217 timesteps and 




845 : Episode finished after 214 timesteps and 




846 : Episode finished after 241 timesteps and 




847 : Episode finished after 208 timesteps and 




848 : Episode finished after 205 timesteps and 




849 : Episode finished after 193 timesteps and 




850 : Episode finished after 191 timesteps and 




851 : Episode finished after 206 timesteps and 




852 : Episode finished after 208 timesteps and 




853 : Episode finished after 218 timesteps and 




854 : Episode finished after 206 timesteps and 




855 : Episode finished after 194 timesteps and 




856 : Episode finished after 194 timesteps and 




857 : Episode finished after 229 timesteps and 




858 : Episode finished after 214 timesteps and 




859 : Episode finished after 213 timesteps and 




860 : Episode finished after 195 timesteps and 




861 : Episode finished after 176 timesteps and 




862 : Episode finished after 181 timesteps and 




863 : Episode finished after 204 timesteps and 




864 : Episode finished after 213 timesteps and 




865 : Episode finished after 195 timesteps and 




866 : Episode finished after 220 timesteps and 




867 : Episode finished after 184 timesteps and 




868 : Episode finished after 173 timesteps and 




869 : Episode finished after 207 timesteps and 




870 : Episode finished after 208 timesteps and 




871 : Episode finished after 178 timesteps and 




872 : Episode finished after 200 timesteps and 




873 : Episode finished after 178 timesteps and 




874 : Episode finished after 185 timesteps and 




875 : Episode finished after 217 timesteps and 




876 : Episode finished after 222 timesteps and 




877 : Episode finished after 223 timesteps and 




878 : Episode finished after 180 timesteps and 




879 : Episode finished after 195 timesteps and 




880 : Episode finished after 192 timesteps and 




881 : Episode finished after 360 timesteps and 




882 : Episode finished after 500 timesteps and 




883 : Episode finished after 500 timesteps and 




884 : Episode finished after 245 timesteps and 




885 : Episode finished after 285 timesteps and 




886 : Episode finished after 317 timesteps and 




887 : Episode finished after 299 timesteps and 




888 : Episode finished after 284 timesteps and 




889 : Episode finished after 313 timesteps and 




890 : Episode finished after 317 timesteps and 




891 : Episode finished after 260 timesteps and 




892 : Episode finished after 287 timesteps and 




893 : Episode finished after 289 timesteps and 




894 : Episode finished after 277 timesteps and 




895 : Episode finished after 235 timesteps and 




896 : Episode finished after 296 timesteps and 




897 : Episode finished after 359 timesteps and 




898 : Episode finished after 296 timesteps and 




899 : Episode finished after 249 timesteps and 




900 : Episode finished after 151 timesteps and 




901 : Episode finished after 124 timesteps and 




902 : Episode finished after 46 timesteps and 




903 : Episode finished after 500 timesteps and 




904 : Episode finished after 61 timesteps and 




905 : Episode finished after 500 timesteps and 




906 : Episode finished after 500 timesteps and 




907 : Episode finished after 407 timesteps and 




908 : Episode finished after 500 timesteps and 




909 : Episode finished after 370 timesteps and 




910 : Episode finished after 295 timesteps and 




911 : Episode finished after 495 timesteps and 




912 : Episode finished after 360 timesteps and 




913 : Episode finished after 315 timesteps and 




914 : Episode finished after 332 timesteps and 




915 : Episode finished after 312 timesteps and 




KeyboardInterrupt: 