### importing modules

In [22]:
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['font.size'] = 15

In [23]:
import numpy as np
import gym
from collections import deque
import random
import pickle
import time
import copy

In [24]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

## class for DQN

In [25]:
class DQN:
    
    def __init__(self, o_space, a_space, lr, units=32, mem_len=10000):
        self.a_space = a_space
        self.state_size = o_space

        self.lr = lr
        self.units = units
        
        # hyperparameters
        self.discount = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01
        self.batch_size = 50
        self.train_start = 1000
        self.memory = deque(maxlen=10000)

        # create main model and target model
        self.model = self.init_model()
        self.target_model = self.init_model()

        # initialize target model
        self.update_target_model()

    def init_model(self):
        model = Sequential()
        model.add(Dense(self.units, input_dim=self.state_size, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.units, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.a_space, activation='linear',
                        kernel_initializer='he_uniform'))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.a_space)
        else:
            q_value = self.model.predict(s)
            return np.argmax(q_value[0])

    def add_experience(self, s, a, r, s_next, terminal):
        self.memory.append([s, a, r, s_next, terminal])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
      
    def train_model(self):
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.state_size))
        a, r, terminal = [], [], []

        for i in range(self.batch_size):
            update_input[i] = mini_batch[i][0]
            a.append(mini_batch[i][1])
            r.append(mini_batch[i][2])
            update_target[i] = mini_batch[i][3]
            terminal.append(mini_batch[i][4])

        target = self.model.predict(update_input)
        target_val = self.target_model.predict(update_target)

        for i in range(self.batch_size):
            if terminal[i]:
                target[i][a[i]] = r[i]
            else:
                target[i][a[i]] = r[i] + self.discount * (np.amax(target_val[i]))

        self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0) 

In [26]:
terminal_r_dict = {
    'CartPole-v1': -1.0,
    'LunarLander-v2': 100.0,
    'MountainCar-v0': 1.0
}

memory_env = {
    'CartPole-v1': 10000,
    'LunarLander-v2': 10000,
    'MountainCar-v0': 10000
}

In [27]:
class DQN_HER:
    
    def __init__(self, o_space, a_space, lr, units=32, mem_len=10000):
        self.a_space = a_space
        self.state_size = o_space

        self.lr = lr
        self.units = units
        # hyperparameters
        self.discount = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01
        self.batch_size = 50
        self.train_start = 1000
        self.memory = deque(maxlen=mem_len)
        self.her_memory = deque(maxlen=1000)

        # create main model and target model
        self.model = self.init_model()
        self.target_model = self.init_model()

        # initialize target model
        self.update_target_model()

    def init_model(self):
        model = Sequential()
        model.add(Dense(self.units, input_dim=self.state_size, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.units, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.a_space, activation='linear',
                        kernel_initializer='he_uniform'))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.a_space)
        else:
            q_value = self.model.predict(s)
            return np.argmax(q_value[0])

    def add_experience(self, s, a, r, s_next, terminal):
        self.memory.append([s, a, r, s_next, terminal])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def add_her_experience(self, s, a, r, s_next, terminal):
        self.her_memory.append([s, a, r, s_next, terminal])
        
    def modify_her_list(self, terminal_reward):
        new_her_experience = copy.deepcopy(self.her_memory)
        new_her_len = len(new_her_experience)
        her_goal = self.her_memory[-1][3]
        
        for i in range(new_her_len):
            new_her_experience[-1-i][0] = her_goal
            new_her_experience[-1-i][2] = self.memory[-1-i][2]
            new_her_experience[-1-i][3] = her_goal
            new_her_experience[-1-i][4] = False
            
            if (np.sum(np.abs((new_her_experience[-1-i][3] - her_goal))) == 0):
                new_her_experience[-1-i][2] = terminal_reward
                new_her_experience[-1-i][4] = True
        
        for hx in new_her_experience:
            self.memory.append(hx)
            
        self.reset_her()
            
    def reset_her(self):
        self.her_memory.clear()
      
    def train_model(self):
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.state_size))
        a, r, terminal = [], [], []

        for i in range(self.batch_size):
            update_input[i] = mini_batch[i][0]
            a.append(mini_batch[i][1])
            r.append(mini_batch[i][2])
            update_target[i] = mini_batch[i][3]
            terminal.append(mini_batch[i][4])

        target = self.model.predict(update_input)
        target_val = self.target_model.predict(update_target)

        for i in range(self.batch_size):
            if terminal[i]:
                target[i][a[i]] = r[i]
            else:
                target[i][a[i]] = r[i] + self.discount * (np.amax(target_val[i]))

        self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0) 

## main

#### settings

In [55]:
lr = 0.001
num_seeds = 10
seeds = np.arange(num_seeds)
num_episodes = 500

### classic replay

In [56]:
%%time

result_dict = {}

env_name = 'CartPole-v1'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN(o_space, a_space, lr, units=32)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: 23.0   memory length: 23   epsilon: 0.9772512378214517
episode: 1   score: 30.0   memory length: 53   epsilon: 0.9483548639781193
episode: 2   score: 12.0   memory length: 65   epsilon: 0.9370369888620198
episode: 3   score: 23.0   memory length: 88   epsilon: 0.9157205572498949
episode: 4   score: 14.0   memory length: 102   epsilon: 0.9029834676116293
episode: 5   score: 17.0   memory length: 119   epsilon: 0.887754942528593
episode: 6   score: 12.0   memory length: 131   epsilon: 0.8771602801771059
episode: 7   score: 13.0   memory length: 144   epsilon: 0.8658253647948594
episode: 8   score: 15.0   memory length: 159   epsilon: 0.8529285032149548
episode: 9   score: 19.0   memory length: 178   epsilon: 0.8368678892362568
episode: 10   score: 13.0   memory length: 191   epsilon: 0.8260536436246144
episode: 11   score: 19.0   memory length: 210   epsilon: 0.8104990823150267
episode: 12   score: 19.0   memory length: 229   epsilon: 0.7952374128525983
episode: 13   

episode: 108   score: 14.0   memory length: 1482   epsilon: 0.22701447560002463
episode: 109   score: 9.0   memory length: 1491   epsilon: 0.2249794988001053
episode: 110   score: 10.0   memory length: 1501   epsilon: 0.22273980093919937
episode: 111   score: 11.0   memory length: 1512   epsilon: 0.22030187713925398
episode: 112   score: 16.0   memory length: 1528   epsilon: 0.21680336036122028
episode: 113   score: 14.0   memory length: 1542   epsilon: 0.21378776372211966
episode: 114   score: 31.0   memory length: 1573   epsilon: 0.20725880007153075
episode: 115   score: 46.0   memory length: 1619   epsilon: 0.1979362954770861
episode: 116   score: 92.0   memory length: 1711   epsilon: 0.1805304042562526
episode: 117   score: 90.0   memory length: 1801   epsilon: 0.1649849368967147
episode: 118   score: 127.0   memory length: 1928   epsilon: 0.14529855648899012
episode: 119   score: 37.0   memory length: 1965   epsilon: 0.1400181593009795
episode: 120   score: 70.0   memory length: 2

episode: 209   score: 178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: 199.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 211   score: 169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 212   score: 193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 213   score: 189.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 214   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 215   score: 172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 216   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 217   score: 190.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 218   score: 159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 219   score: 168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 220   score: 177.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 308   score: 328.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 309   score: 292.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 310   score: 231.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 311   score: 205.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 312   score: 322.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 313   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 314   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 315   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 316   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 317   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 318   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 319   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 407   score: 87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 408   score: 69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 409   score: 71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 410   score: 73.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 411   score: 73.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 412   score: 47.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 413   score: 48.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 414   score: 33.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 415   score: 44.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 416   score: 103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 417   score: 96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 418   score: 125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 419  

episode: 49   score: 11.0   memory length: 680   epsilon: 0.506444656987029
episode: 50   score: 11.0   memory length: 691   epsilon: 0.5009015368198305
episode: 51   score: 11.0   memory length: 702   epsilon: 0.49541908701565013
episode: 52   score: 10.0   memory length: 712   epsilon: 0.4904871306580321
episode: 53   score: 15.0   memory length: 727   epsilon: 0.4831811023432807
episode: 54   score: 10.0   memory length: 737   epsilon: 0.47837097658906735
episode: 55   score: 11.0   memory length: 748   epsilon: 0.4731351274767304
episode: 56   score: 17.0   memory length: 765   epsilon: 0.46515585607821586
episode: 57   score: 10.0   memory length: 775   epsilon: 0.46052517380982005
episode: 58   score: 11.0   memory length: 786   epsilon: 0.45548464994757865
episode: 59   score: 11.0   memory length: 797   epsilon: 0.4504992954490349
episode: 60   score: 10.0   memory length: 807   epsilon: 0.44601452099741573
episode: 61   score: 8.0   memory length: 815   epsilon: 0.442458868290

episode: 153   score: 236.0   memory length: 5063   epsilon: 0.009998671593271896
episode: 154   score: 195.0   memory length: 5258   epsilon: 0.009998671593271896
episode: 155   score: 253.0   memory length: 5511   epsilon: 0.009998671593271896
episode: 156   score: 201.0   memory length: 5712   epsilon: 0.009998671593271896
episode: 157   score: 179.0   memory length: 5891   epsilon: 0.009998671593271896
episode: 158   score: 188.0   memory length: 6079   epsilon: 0.009998671593271896
episode: 159   score: 195.0   memory length: 6274   epsilon: 0.009998671593271896
episode: 160   score: 204.0   memory length: 6478   epsilon: 0.009998671593271896
episode: 161   score: 171.0   memory length: 6649   epsilon: 0.009998671593271896
episode: 162   score: 141.0   memory length: 6790   epsilon: 0.009998671593271896
episode: 163   score: 184.0   memory length: 6974   epsilon: 0.009998671593271896
episode: 164   score: 152.0   memory length: 7126   epsilon: 0.009998671593271896
episode: 165   s

episode: 253   score: 170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 254   score: 156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 255   score: 202.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 256   score: 214.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 257   score: 204.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 258   score: 195.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 259   score: 189.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: 238.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: 205.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: 167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: 184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: 211.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 352   score: 435.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: 293.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 356   score: 203.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 357   score: 203.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 358   score: 195.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 359   score: 195.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 360   score: 236.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 361   score: 269.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 362   score: 251.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 363   score: 228.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 451   score: 435.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: 354.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: 370.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 320.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: 319.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: 439.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: 415.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 460   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 461   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 462   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 55   score: 12.0   memory length: 1309   epsilon: 0.2699131774597243
episode: 56   score: 15.0   memory length: 1324   epsilon: 0.2658926982385883
episode: 57   score: 10.0   memory length: 1334   epsilon: 0.26324570457626995
episode: 58   score: 12.0   memory length: 1346   epsilon: 0.2601040725539001
episode: 59   score: 11.0   memory length: 1357   epsilon: 0.25725719064833996
episode: 60   score: 9.0   memory length: 1366   epsilon: 0.25495111561414624
episode: 61   score: 13.0   memory length: 1379   epsilon: 0.25165656456412355
episode: 62   score: 24.0   memory length: 1403   epsilon: 0.24568575753695765
episode: 63   score: 9.0   memory length: 1412   epsilon: 0.24348340979971822
episode: 64   score: 8.0   memory length: 1420   epsilon: 0.24154234643875414
episode: 65   score: 12.0   memory length: 1432   epsilon: 0.2386597270564102
episode: 66   score: 9.0   memory length: 1441   epsilon: 0.2365203612457005
episode: 67   score: 11.0   memory length: 1452   epsilon: 0.

episode: 156   score: 229.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: 254.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 158   score: 224.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 159   score: 276.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 160   score: 264.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: 304.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: 235.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: 185.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: 194.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 165   score: 173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 166   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 167   score: 218.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 255   score: 174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 256   score: 253.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 257   score: 174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 258   score: 234.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 259   score: 222.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: 177.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: 330.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: 190.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: 285.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: 200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: 370.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 354   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 356   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 357   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 358   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 359   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 360   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 361   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 362   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 363   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 453   score: 218.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 460   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 461   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 462   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 463   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 464   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 62   score: 15.0   memory length: 1013   epsilon: 0.36294395959361136
episode: 63   score: 11.0   memory length: 1024   epsilon: 0.35897147818971
episode: 64   score: 8.0   memory length: 1032   epsilon: 0.35610973748828684
episode: 65   score: 14.0   memory length: 1046   epsilon: 0.3511564778813719
episode: 66   score: 10.0   memory length: 1056   epsilon: 0.34766067307894
episode: 67   score: 16.0   memory length: 1072   epsilon: 0.3421396275316961
episode: 68   score: 11.0   memory length: 1083   epsilon: 0.33839485296807126
episode: 69   score: 10.0   memory length: 1093   epsilon: 0.3350260916703695
episode: 70   score: 10.0   memory length: 1103   epsilon: 0.33169086679493115
episode: 71   score: 16.0   memory length: 1119   epsilon: 0.32642343068557345
episode: 72   score: 13.0   memory length: 1132   epsilon: 0.3222052939901265
episode: 73   score: 14.0   memory length: 1146   epsilon: 0.3177236235951741
episode: 74   score: 12.0   memory length: 1158   epsilon: 0.313

episode: 165   score: 186.0   memory length: 8003   epsilon: 0.009998671593271896
episode: 166   score: 151.0   memory length: 8154   epsilon: 0.009998671593271896
episode: 167   score: 185.0   memory length: 8339   epsilon: 0.009998671593271896
episode: 168   score: 181.0   memory length: 8520   epsilon: 0.009998671593271896
episode: 169   score: 171.0   memory length: 8691   epsilon: 0.009998671593271896
episode: 170   score: 173.0   memory length: 8864   epsilon: 0.009998671593271896
episode: 171   score: 174.0   memory length: 9038   epsilon: 0.009998671593271896
episode: 172   score: 208.0   memory length: 9246   epsilon: 0.009998671593271896
episode: 173   score: 187.0   memory length: 9433   epsilon: 0.009998671593271896
episode: 174   score: 168.0   memory length: 9601   epsilon: 0.009998671593271896
episode: 175   score: 166.0   memory length: 9767   epsilon: 0.009998671593271896
episode: 176   score: 169.0   memory length: 9936   epsilon: 0.009998671593271896
episode: 177   s

episode: 264   score: 139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: 381.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: 222.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: 148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: 384.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: 367.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 270   score: 380.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 271   score: 366.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 272   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: 129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: 152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: 145.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 363   score: 416.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: 369.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: 359.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: 416.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: 317.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: 329.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: 322.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: 448.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: 419.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: 471.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 374   score: 382.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 462   score: 440.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 463   score: 332.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 464   score: 423.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 465   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 466   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: 395.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 66   score: 12.0   memory length: 1053   epsilon: 0.34870574454404424
episode: 67   score: 10.0   memory length: 1063   epsilon: 0.3452343370855593
episode: 68   score: 10.0   memory length: 1073   epsilon: 0.34179748790416437
episode: 69   score: 14.0   memory length: 1087   epsilon: 0.3370433025720758
episode: 70   score: 12.0   memory length: 1099   epsilon: 0.3330209538162239
episode: 71   score: 11.0   memory length: 1110   epsilon: 0.3293759846379912
episode: 72   score: 13.0   memory length: 1123   epsilon: 0.3251196941980479
episode: 73   score: 10.0   memory length: 1133   epsilon: 0.32188308869613635
episode: 74   score: 11.0   memory length: 1144   epsilon: 0.31836002528572027
episode: 75   score: 8.0   memory length: 1152   epsilon: 0.3158220413582485
episode: 76   score: 12.0   memory length: 1164   epsilon: 0.3120529517919121
episode: 77   score: 9.0   memory length: 1173   epsilon: 0.30925568295888084
episode: 78   score: 12.0   memory length: 1185   epsilon: 0.

episode: 169   score: 98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 170   score: 145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 171   score: 157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 172   score: 123.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 173   score: 166.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: 259.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: 294.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: 289.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: 200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: 345.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: 229.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: 396.0   memory length: 10000   epsilon: 0.009998671593271896
episo

episode: 268   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 270   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 271   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 272   score: 159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: 155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: 191.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 276   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 277   score: 170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: 191.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: 212.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 367   score: 204.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: 238.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: 232.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: 263.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: 218.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: 265.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: 242.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 374   score: 320.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 375   score: 273.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 376   score: 255.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 377   score: 226.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 378   score: 233.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 466   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: 171.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: 139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: 128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: 201.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: 152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: 154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: 239.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: 139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: 170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: 233.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 74   score: 14.0   memory length: 1048   epsilon: 0.35045451608208705
episode: 75   score: 8.0   memory length: 1056   epsilon: 0.34766067307894
episode: 76   score: 9.0   memory length: 1065   epsilon: 0.34454421364572524
episode: 77   score: 12.0   memory length: 1077   epsilon: 0.340432347370627
episode: 78   score: 11.0   memory length: 1088   epsilon: 0.33670625926950376
episode: 79   score: 9.0   memory length: 1097   epsilon: 0.33368799612046873
episode: 80   score: 10.0   memory length: 1107   epsilon: 0.33036609214652046
episode: 81   score: 9.0   memory length: 1116   epsilon: 0.3274046627873518
episode: 82   score: 11.0   memory length: 1127   epsilon: 0.32382116483926726
episode: 83   score: 10.0   memory length: 1137   epsilon: 0.3205974863526735
episode: 84   score: 11.0   memory length: 1148   epsilon: 0.31708849407160733
episode: 85   score: 12.0   memory length: 1160   epsilon: 0.31330429038059615
episode: 86   score: 12.0   memory length: 1172   epsilon: 0.30

episode: 177   score: 124.0   memory length: 9165   epsilon: 0.009998671593271896
episode: 178   score: 91.0   memory length: 9256   epsilon: 0.009998671593271896
episode: 179   score: 124.0   memory length: 9380   epsilon: 0.009998671593271896
episode: 180   score: 216.0   memory length: 9596   epsilon: 0.009998671593271896
episode: 181   score: 101.0   memory length: 9697   epsilon: 0.009998671593271896
episode: 182   score: 128.0   memory length: 9825   epsilon: 0.009998671593271896
episode: 183   score: 140.0   memory length: 9965   epsilon: 0.009998671593271896
episode: 184   score: 120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: 86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: 255.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: 93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: 188.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189  

episode: 276   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 277   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 375   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 376   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 377   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 378   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 379   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 380   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 474   score: 188.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: 206.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: 198.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 478   score: 200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 479   score: 193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 480   score: 180.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: 197.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: 227.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: 211.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: 195.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: 215.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 80   score: 10.0   memory length: 1299   epsilon: 0.2726272140335106
episode: 81   score: 11.0   memory length: 1310   epsilon: 0.2696432642822646
episode: 82   score: 11.0   memory length: 1321   epsilon: 0.2666919743524145
episode: 83   score: 10.0   memory length: 1331   epsilon: 0.2640370238006374
episode: 84   score: 10.0   memory length: 1341   epsilon: 0.2614085035996406
episode: 85   score: 10.0   memory length: 1351   epsilon: 0.2588061506321157
episode: 86   score: 11.0   memory length: 1362   epsilon: 0.25597347469571885
episode: 87   score: 11.0   memory length: 1373   epsilon: 0.253171802863904
episode: 88   score: 9.0   memory length: 1382   epsilon: 0.2509023495884683
episode: 89   score: 9.0   memory length: 1391   epsilon: 0.248653239882542
episode: 90   score: 10.0   memory length: 1401   epsilon: 0.2461778670932771
episode: 91   score: 10.0   memory length: 1411   epsilon: 0.24372713693665488
episode: 92   score: 14.0   memory length: 1425   epsilon: 0.24033

episode: 182   score: 213.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: 201.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: 187.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: 196.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: 414.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: 178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: 236.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: 167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: 164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: 193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: 169.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 281   score: 383.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: 280.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: 471.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 380   score: 364.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: 304.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: 379.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: 280.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: 363.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 275.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: 277.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: 477.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: 340.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: 399.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 479   score: 433.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 480   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: 220.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: 277.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: 211.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: 221.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 403.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 84   score: 14.0   memory length: 1249   epsilon: 0.2866122801967262
episode: 85   score: 12.0   memory length: 1261   epsilon: 0.28319178633180314
episode: 86   score: 10.0   memory length: 1271   epsilon: 0.2803725781752547
episode: 87   score: 11.0   memory length: 1282   epsilon: 0.27730385413804465
episode: 88   score: 14.0   memory length: 1296   epsilon: 0.2734467341692626
episode: 89   score: 11.0   memory length: 1307   epsilon: 0.27045381463518003
episode: 90   score: 15.0   memory length: 1322   epsilon: 0.26642528237806207
episode: 91   score: 13.0   memory length: 1335   epsilon: 0.2629824588716937
episode: 92   score: 14.0   memory length: 1349   epsilon: 0.259324540388352
episode: 93   score: 19.0   memory length: 1368   epsilon: 0.2544414683340336
episode: 94   score: 16.0   memory length: 1384   epsilon: 0.25040079579164093
episode: 95   score: 11.0   memory length: 1395   epsilon: 0.24766011784808684
episode: 96   score: 29.0   memory length: 1424   epsilon: 

episode: 185   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: 173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: 158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: 125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: 129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: 134.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: 134.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: 142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: 133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: 136.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: 154.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 284   score: 92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: 222.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: 467.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: 252.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: 257.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: 184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: 176.0   memory length: 10000   epsilon: 0.009998671593271896
episo

episode: 383   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 482   score: 151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: 173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 89   score: 12.0   memory length: 1350   epsilon: 0.25906521584796366
episode: 90   score: 10.0   memory length: 1360   epsilon: 0.2564861905907097
episode: 91   score: 12.0   memory length: 1372   epsilon: 0.253425228091996
episode: 92   score: 15.0   memory length: 1387   epsilon: 0.24965034435625258
episode: 93   score: 8.0   memory length: 1395   epsilon: 0.24766011784808684
episode: 94   score: 11.0   memory length: 1406   epsilon: 0.24494943707593364
episode: 95   score: 10.0   memory length: 1416   epsilon: 0.24251093608728802
episode: 96   score: 10.0   memory length: 1426   epsilon: 0.24009671066809293
episode: 97   score: 9.0   memory length: 1435   epsilon: 0.2379444636157624
episode: 98   score: 12.0   memory length: 1447   epsilon: 0.23510478215678413
episode: 99   score: 11.0   memory length: 1458   epsilon: 0.23253152160126508
episode: 100   score: 10.0   memory length: 1468   epsilon: 0.23021664244871493
episode: 101   score: 11.0   memory length: 1479   epsilo

episode: 191   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: 179.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: 135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: 431.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: 467.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: 437.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 290   score: 484.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: 129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: 217.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: 418.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 389   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 488   score: 204.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 258.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: 371.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 94   score: 16.0   memory length: 1230   epsilon: 0.29211275315428575
episode: 95   score: 13.0   memory length: 1243   epsilon: 0.28833798882226386
episode: 96   score: 9.0   memory length: 1252   epsilon: 0.2857533029063643
episode: 97   score: 12.0   memory length: 1264   epsilon: 0.2823430602649749
episode: 98   score: 10.0   memory length: 1274   epsilon: 0.2795323012780908
episode: 99   score: 12.0   memory length: 1286   epsilon: 0.2761963014356792
episode: 100   score: 15.0   memory length: 1301   epsilon: 0.2720822322326576
episode: 101   score: 10.0   memory length: 1311   epsilon: 0.26937362101798235
episode: 102   score: 10.0   memory length: 1321   epsilon: 0.2666919743524145
episode: 103   score: 28.0   memory length: 1349   epsilon: 0.259324540388352
episode: 104   score: 12.0   memory length: 1361   epsilon: 0.256229704400119
episode: 105   score: 11.0   memory length: 1372   epsilon: 0.253425228091996
episode: 106   score: 11.0   memory length: 1383   epsilon:

episode: 195   score: 206.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: 215.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: 173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: 225.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: 249.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: 240.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: 207.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: 190.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: 221.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: 282.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: 218.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: 313.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 294   score: 489.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: 412.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: 329.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: 236.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: 227.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: 336.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: 176.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: 184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: 444.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: 279.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: 203.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: 478.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 393   score: 223.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: 193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: 192.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: 255.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: 236.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: 204.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: 200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: 246.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: 354.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: 316.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: 235.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 492   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: 305.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: 433.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: 137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: 500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: 418.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: 359.0   memory length: 10000   epsilon: 0.009998671593271896
{0: [23.0, 30.0, 12.0, 23.0, 14.0, 17.0, 12.0, 13.0, 15.0, 19.0, 13.0, 19.0, 19.0, 36.0, 11.0, 11.0, 13.0, 12.0, 16.0, 18.0, 13.0, 13.0, 48.0, 24.0, 26.0, 15.0, 15.0, 12.0, 10.0, 15.0, 10.0, 11.0, 16.0, 10.0, 16.0, 21.0, 15.0, 10.0, 14.0, 23.0, 13.0, 10.0, 19.0, 12.0, 10.0, 18.0, 10.0, 9.0, 19.0, 13.0, 16.0, 10.0, 11.0, 17.0, 15.0, 16

In [57]:
%%time

result_dict = {}

env_name = 'MountainCar-v0'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN(o_space, a_space, lr, units=64)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -200.0   memory length: 200   epsilon: 0.818648829478636
episode: 1   score: -200.0   memory length: 400   epsilon: 0.6701859060067403
episode: 2   score: -200.0   memory length: 600   epsilon: 0.5486469074854965
episode: 3   score: -200.0   memory length: 800   epsilon: 0.4491491486100748
episode: 4   score: -200.0   memory length: 1000   epsilon: 0.3676954247709635
episode: 5   score: -200.0   memory length: 1200   epsilon: 0.3010134290933992
episode: 6   score: -200.0   memory length: 1400   epsilon: 0.24642429138466176
episode: 7   score: -200.0   memory length: 1600   epsilon: 0.20173495769715546
episode: 8   score: -200.0   memory length: 1800   epsilon: 0.1651500869836984
episode: 9   score: -200.0   memory length: 2000   epsilon: 0.1351999253974994
episode: 10   score: -200.0   memory length: 2200   epsilon: 0.11068126067226178
episode: 11   score: -200.0   memory length: 2400   epsilon: 0.09060908449456685
episode: 12   score: -200.0   memory length: 2600  

episode: 100   score: -181.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: -167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -173.0   memory length: 10000   epsilon: 0.00999867159

episode: 198   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 207   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 208   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: -140.0   memory length: 10000   epsilon: 0.00999867159

episode: 296   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 306   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 307   score: -86.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 394   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: -150.0   memory length: 10000   epsilon: 0.009998671593271

episode: 492   score: -108.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -200.0   memory length: 200   epsilon: 0.818648829478636
episode: 1   score: -200.0   memory length: 400   epsilon: 0.6701859060067403
episode: 2   score: -200.0   memory length: 600   epsilon: 0.5486469074854965
episode: 3   score: -200.0   memory length: 800   epsilon: 0.4491491486100748
episode: 4   scor

episode: 92   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -176.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -187.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -186.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -171.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 190   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -124.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -129.0   memory length: 10000   epsilon: 0.00999867159

episode: 288   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -97.0   memory length: 10000   epsilon: 0.009998671593271

episode: 386   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -89.0   memory length: 10000   epsilon: 0.009998671593271

episode: 484   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -98.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 84   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 182   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -131.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -128.0   memory length: 10000   epsilon: 0.009998671593271

episode: 280   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -86.0   memory length: 10000   epsilon: 0.009998671593271

episode: 378   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 379   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 380   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -141.0   memory length: 10000   epsilon: 0.00999867159327

episode: 476   score: -122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 478   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 479   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 480   score: -170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -100.0   memory length: 10000   epsilon: 0.00999867159327

episode: 76   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -191.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -195.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 80   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 81   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 82   score: -181.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 83   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 84   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -189.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -197.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 174   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 181   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 182   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -120.0   memory length: 10000   epsilon: 0.00999867159

episode: 272   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 276   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 277   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -143.0   memory length: 10000   epsilon: 0.009998671593271

episode: 370   score: -169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: -170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 374   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 375   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 376   score: -178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 377   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 378   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 379   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 380   score: -170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: -144.0   memory length: 10000   epsilon: 0.00999867159327

episode: 468   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: -122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 478   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 479   score: -106.0   memory length: 10000   epsilon: 0.009998671593271

episode: 68   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 69   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 70   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 73   score: -192.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 74   score: -167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 75   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 76   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -187.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 167   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 168   score: -197.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 169   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 170   score: -169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 171   score: -162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 172   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 173   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: -169.0   memory length: 10000   epsilon: 0.00999867159327

episode: 265   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 270   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 271   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 272   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: -158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 276   score: -108.0   memory length: 10000   epsilon: 0.00999867159

episode: 363   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 374   score: -150.0   memory length: 10000   epsilon: 0.00999867159327

episode: 461   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 462   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 463   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 464   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 465   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 466   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: -151.0   memory length: 10000   epsilon: 0.00999867159327

episode: 61   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 62   score: -178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 63   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 64   score: -168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 65   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 66   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 67   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 68   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 69   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 70   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 160   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 165   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 166   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 167   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 168   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 169   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 170   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 171   score: -94.0   memory length: 10000   epsilon: 0.009998671593271

episode: 258   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 259   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: -187.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: -168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: -183.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: -194.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: -92.0   memory length: 10000   epsilon: 0.0099986715932718

episode: 356   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 357   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 358   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 359   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 360   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 361   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 362   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 363   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: -146.0   memory length: 10000   epsilon: 0.009998671593

episode: 454   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 460   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 461   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 462   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 463   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 464   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 465   score: -104.0   memory length: 10000   epsilon: 0.0099986715932

episode: 54   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 55   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 56   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 57   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 58   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 59   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 60   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 61   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 62   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 63   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 64   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 65   score: -183.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 153   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 154   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 155   score: -171.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 156   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 158   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 159   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 160   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 251   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 253   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 254   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 255   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 256   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 257   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 258   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 259   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: -101.0   memory length: 10000   epsilon: 0.00999867159327

episode: 349   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 356   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 357   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 358   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 359   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 360   score: -154.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 447   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 448   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 449   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: -99.0   memory length: 10000   epsilon: 0.009998671593271

episode: 47   score: -200.0   memory length: 9600   epsilon: 0.009998671593271896
episode: 48   score: -200.0   memory length: 9800   epsilon: 0.009998671593271896
episode: 49   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 50   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 51   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 52   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 53   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 54   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 55   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 56   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 57   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 58   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episod

episode: 146   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 147   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 148   score: -194.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 149   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 150   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 151   score: -175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 152   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 153   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 154   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 155   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 156   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 244   score: -163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: -184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: -167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 253   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 254   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 255   score: -149.0   memory length: 10000   epsilon: 0.00999867159

episode: 342   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 343   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 344   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 345   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: -152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: -172.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: -113.0   memory length: 10000   epsilon: 0.0099986715932

episode: 440   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 442   score: -152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 443   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 444   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 445   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 446   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 447   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 448   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 449   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: -145.0   memory length: 10000   epsilon: 0.00999867159327

episode: 40   score: -200.0   memory length: 8200   epsilon: 0.009998671593271896
episode: 41   score: -200.0   memory length: 8400   epsilon: 0.009998671593271896
episode: 42   score: -200.0   memory length: 8600   epsilon: 0.009998671593271896
episode: 43   score: -200.0   memory length: 8800   epsilon: 0.009998671593271896
episode: 44   score: -200.0   memory length: 9000   epsilon: 0.009998671593271896
episode: 45   score: -200.0   memory length: 9200   epsilon: 0.009998671593271896
episode: 46   score: -200.0   memory length: 9400   epsilon: 0.009998671593271896
episode: 47   score: -200.0   memory length: 9600   epsilon: 0.009998671593271896
episode: 48   score: -200.0   memory length: 9800   epsilon: 0.009998671593271896
episode: 49   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 50   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 51   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 52  

episode: 139   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 140   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 141   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 142   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 143   score: -182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 144   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 145   score: -173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 146   score: -197.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 147   score: -199.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 148   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 149   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 150   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 237   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 238   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 239   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 240   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 241   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 242   score: -136.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: -142.0   memory length: 10000   epsilon: 0.009998671593

episode: 335   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 339   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 340   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 341   score: -168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 342   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 343   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 344   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 345   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: -87.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 433   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: -125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 442   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 443   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 444   score: -85.0   memory length: 10000   epsilon: 0.009998671593271

episode: 33   score: -200.0   memory length: 6800   epsilon: 0.009998671593271896
episode: 34   score: -200.0   memory length: 7000   epsilon: 0.009998671593271896
episode: 35   score: -200.0   memory length: 7200   epsilon: 0.009998671593271896
episode: 36   score: -200.0   memory length: 7400   epsilon: 0.009998671593271896
episode: 37   score: -200.0   memory length: 7600   epsilon: 0.009998671593271896
episode: 38   score: -145.0   memory length: 7745   epsilon: 0.009998671593271896
episode: 39   score: -200.0   memory length: 7945   epsilon: 0.009998671593271896
episode: 40   score: -183.0   memory length: 8128   epsilon: 0.009998671593271896
episode: 41   score: -200.0   memory length: 8328   epsilon: 0.009998671593271896
episode: 42   score: -200.0   memory length: 8528   epsilon: 0.009998671593271896
episode: 43   score: -200.0   memory length: 8728   epsilon: 0.009998671593271896
episode: 44   score: -200.0   memory length: 8928   epsilon: 0.009998671593271896
episode: 45   sc

episode: 132   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 133   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 134   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 135   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 136   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 137   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 138   score: -175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 139   score: -169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 140   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 141   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 142   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 143   score: -178.0   memory length: 10000   epsilon: 0.009998671593

episode: 230   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 231   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 232   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 233   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 234   score: -129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 236   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 237   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 238   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 239   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 240   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 241   score: -138.0   memory length: 10000   epsilon: 0.0099986715932

episode: 328   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 329   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 330   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 331   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 332   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 333   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 334   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: -174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 339   score: -151.0   memory length: 10000   epsilon: 0.0099986715932

episode: 426   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 427   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 428   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 429   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 430   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 431   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 432   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 433   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: -140.0   memory length: 10000   epsilon: 0.0099986715932

In [58]:
%%time

result_dict = {}

env_name = 'LunarLander-v2'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN(o_space, a_space, lr, units=128)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -105.73637539219824   memory length: 64   epsilon: 0.9379749638258457
episode: 1   score: -279.2038802864672   memory length: 163   epsilon: 0.8495219033622532
episode: 2   score: -422.2187524496111   memory length: 252   epsilon: 0.7771467460721305
episode: 3   score: -252.78896238724397   memory length: 320   epsilon: 0.7260327850203407
episode: 4   score: -282.40551244256494   memory length: 406   epsilon: 0.6661748299656206
episode: 5   score: -289.04306299049824   memory length: 472   epsilon: 0.623606226269926
episode: 6   score: -372.3548237724995   memory length: 543   epsilon: 0.5808448013500086
episode: 7   score: -402.80578696153617   memory length: 605   epsilon: 0.5459091539334175
episode: 8   score: -532.5273298937791   memory length: 688   epsilon: 0.5024072518560504
episode: 9   score: -310.3956934043989   memory length: 745   epsilon: 0.47455737640838375
episode: 10   score: -497.0439284418464   memory length: 811   epsilon: 0.44423313721693997
epis

episode: 87   score: -33.55507514431528   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: 168.49125249185602   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -82.88247936212736   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -118.64380783571596   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -44.30993599685888   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: 8.339285803442436   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: 172.02040694136707   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: 237.4255656264892   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -39.08641837257983   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: 198.88414142328116   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: 217.06646765171752   memory le

episode: 173   score: -210.55494929378654   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: -61.03014959376882   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: 228.72747936968986   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: 187.12374041134342   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -38.046096617070305   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: -96.19037176114053   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: -80.01851906672987   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: -155.26823585756992   memory length: 10000   epsilon: 0.009998671593271896
episode: 181   score: 195.40125140522477   memory length: 10000   epsilon: 0.009998671593271896
episode: 182   score: 2.6366852304959707   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -120.5349385647

episode: 259   score: 266.9029739504291   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: 240.47463473269843   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: 226.08553422506333   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: 301.11360034639625   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: 290.83642079059433   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: 282.5946845333133   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: 136.80993178190698   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: 273.9388837553225   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: 254.3189210191104   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: 266.1818190049306   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: 287.19236261244794   me

episode: 345   score: 229.60720795143044   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: 9.502975813384921   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: 254.5566676408245   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: 238.61334886212134   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: 270.1516287856824   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: 262.9162611057591   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: 291.0980776428392   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: 273.72299985403197   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: 253.3647227878405   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: 251.53384403920631   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 289.02771889691536   mem

episode: 431   score: 26.73573768652227   memory length: 10000   epsilon: 0.009998671593271896
episode: 432   score: -292.8153511451591   memory length: 10000   epsilon: 0.009998671593271896
episode: 433   score: 13.867619799505874   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: 304.41091412007773   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: 293.86881339908376   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: 250.89214713165586   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: -254.5509733684332   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: 274.9432688297844   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: -51.69005946790896   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 271.9961693403067   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: 261.443069602779   me

episode: 18   score: -251.78765535910196   memory length: 3177   epsilon: 0.04164435058460235
episode: 19   score: -204.92389952163677   memory length: 3673   epsilon: 0.025353519424892185
episode: 20   score: -162.5915782418411   memory length: 4012   epsilon: 0.01806087399251748
episode: 21   score: -119.36327276905097   memory length: 4323   epsilon: 0.013231393970007657
episode: 22   score: -241.16904914921463   memory length: 4782   epsilon: 0.009998671593271896
episode: 23   score: -62.41578370332687   memory length: 5782   epsilon: 0.009998671593271896
episode: 24   score: -351.2482721118643   memory length: 6770   epsilon: 0.009998671593271896
episode: 25   score: -115.25691765767984   memory length: 7770   epsilon: 0.009998671593271896
episode: 26   score: -65.90949701252207   memory length: 8770   epsilon: 0.009998671593271896
episode: 27   score: -105.91447122529496   memory length: 9770   epsilon: 0.009998671593271896
episode: 28   score: -108.30566408154222   memory length

episode: 105   score: -89.92257805686201   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -29.19283180153066   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: 229.64902094695955   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: 263.3939755714206   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: 26.637145547327094   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: 251.66431441288552   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: 284.64946094539357   memory length: 10000   epsilon: 0.009998671593271896
episode: 112   score: 255.64781693304195   memory length: 10000   epsilon: 0.009998671593271896
episode: 113   score: 1.4646315822870122   memory length: 10000   epsilon: 0.009998671593271896
episode: 114   score: -148.05759949649772   memory length: 10000   epsilon: 0.009998671593271896
episode: 115   score: 231.64406470082116

episode: 191   score: 210.57769790174882   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: 79.81612278822091   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: 195.317631369416   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -395.6632691969507   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: 238.21507772330824   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: 206.67474364502993   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -58.60445996547871   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: 76.23417077145554   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -206.3478810224786   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: 282.0172889329073   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: 245.7807211472064   mem

episode: 277   score: 233.9715103877415   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: 259.7607557954697   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: 250.06372028001766   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: 301.08190418651145   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: 276.0831260843536   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: -20.154900394846834   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: 218.89832172936065   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: 251.918653884288   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 259.21298866465565   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -384.1320541526487   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: 228.74596755041628   m

episode: 363   score: 273.7198984569603   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: 254.89964000767776   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: 239.87265391353293   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: 264.84919654111   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: -149.57587159282463   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: 256.4511335316728   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: 250.19774594027598   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: 252.70826169798866   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: 275.35222647642   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: 246.0936013029388   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: 252.49823695748867   memor

episode: 449   score: -224.36399839486933   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: 267.94086717635975   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: 237.94165096914043   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: 253.9982972080143   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: 270.11913554625147   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 237.4109131890107   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: 273.60980541497565   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: 268.4927035140223   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: 273.86785805396573   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: 244.7166140293254   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: 271.1499316923535   m

episode: 36   score: -88.64429476494556   memory length: 10000   epsilon: 0.009998671593271896
episode: 37   score: -90.20764413080593   memory length: 10000   epsilon: 0.009998671593271896
episode: 38   score: -314.9194109004415   memory length: 10000   epsilon: 0.009998671593271896
episode: 39   score: -57.14686147062129   memory length: 10000   epsilon: 0.009998671593271896
episode: 40   score: -136.8855220654492   memory length: 10000   epsilon: 0.009998671593271896
episode: 41   score: -170.4268410284127   memory length: 10000   epsilon: 0.009998671593271896
episode: 42   score: -42.41268283405772   memory length: 10000   epsilon: 0.009998671593271896
episode: 43   score: -66.15592288780269   memory length: 10000   epsilon: 0.009998671593271896
episode: 44   score: -76.27696809200752   memory length: 10000   epsilon: 0.009998671593271896
episode: 45   score: -80.98286681141288   memory length: 10000   epsilon: 0.009998671593271896
episode: 46   score: -146.6353682600352   memory l

episode: 122   score: -29.308249891355423   memory length: 10000   epsilon: 0.009998671593271896
episode: 123   score: -22.780180642351905   memory length: 10000   epsilon: 0.009998671593271896
episode: 124   score: -47.12545410774268   memory length: 10000   epsilon: 0.009998671593271896
episode: 125   score: -32.208174891626136   memory length: 10000   epsilon: 0.009998671593271896
episode: 126   score: -21.827762411189838   memory length: 10000   epsilon: 0.009998671593271896
episode: 127   score: -39.15791039045884   memory length: 10000   epsilon: 0.009998671593271896
episode: 128   score: -22.5425353067725   memory length: 10000   epsilon: 0.009998671593271896
episode: 129   score: -82.94981582794894   memory length: 10000   epsilon: 0.009998671593271896
episode: 130   score: -35.30655808160472   memory length: 10000   epsilon: 0.009998671593271896
episode: 131   score: -25.888722706127   memory length: 10000   epsilon: 0.009998671593271896
episode: 132   score: -1.76411174538703

episode: 208   score: 275.10661037006184   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: 261.7371804491354   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: 293.8514025603596   memory length: 10000   epsilon: 0.009998671593271896
episode: 211   score: -177.31077710762014   memory length: 10000   epsilon: 0.009998671593271896
episode: 212   score: 242.79601844058055   memory length: 10000   epsilon: 0.009998671593271896
episode: 213   score: -169.18640131961268   memory length: 10000   epsilon: 0.009998671593271896
episode: 214   score: 263.39450490588155   memory length: 10000   epsilon: 0.009998671593271896
episode: 215   score: -195.21316938879335   memory length: 10000   epsilon: 0.009998671593271896
episode: 216   score: 224.8099801381278   memory length: 10000   epsilon: 0.009998671593271896
episode: 217   score: 206.44892122680073   memory length: 10000   epsilon: 0.009998671593271896
episode: 218   score: 106.86206803968439

episode: 294   score: 278.6898936206669   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: 224.8797995816608   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: 289.578555206292   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: 248.54431806435932   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: 293.79741087978834   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: 244.07523286139144   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: 257.37146752584636   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: 243.2182379525291   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: 268.9957950769222   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: 257.3293350541743   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: 246.0552221559104   memor

episode: 380   score: 117.51059863531015   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: 248.33946256947453   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: 245.8968056998911   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -199.8859111020153   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: 234.1074085638433   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: 161.01378469605095   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 256.43874215859796   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: 239.58847516662144   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: 240.54113195946692   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: 242.27109852148558   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: 273.95290169278746  

episode: 466   score: 262.55967901144254   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: 293.17986592765567   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: 274.40894709905535   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: 241.1428340090803   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: 229.34829761664784   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: 229.65511455084177   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: -27.62905112201028   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: 273.63287943941793   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: 271.1716982052647   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: 303.58180988063145   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: 293.3194615145487   

episode: 53   score: -132.1922647696743   memory length: 10000   epsilon: 0.009998671593271896
episode: 54   score: -39.3103441498624   memory length: 10000   epsilon: 0.009998671593271896
episode: 55   score: -55.14696136027575   memory length: 10000   epsilon: 0.009998671593271896
episode: 56   score: -28.07753900300095   memory length: 10000   epsilon: 0.009998671593271896
episode: 57   score: -73.74489893772758   memory length: 10000   epsilon: 0.009998671593271896
episode: 58   score: -50.30564271694357   memory length: 10000   epsilon: 0.009998671593271896
episode: 59   score: -36.30703200568512   memory length: 10000   epsilon: 0.009998671593271896
episode: 60   score: -62.66975914203178   memory length: 10000   epsilon: 0.009998671593271896
episode: 61   score: -21.270282446859678   memory length: 10000   epsilon: 0.009998671593271896
episode: 62   score: -28.4555284392573   memory length: 10000   epsilon: 0.009998671593271896
episode: 63   score: -24.230612602144422   memory l

episode: 139   score: 21.95945458328394   memory length: 10000   epsilon: 0.009998671593271896
episode: 140   score: 36.739535044853   memory length: 10000   epsilon: 0.009998671593271896
episode: 141   score: -50.29824901905011   memory length: 10000   epsilon: 0.009998671593271896
episode: 142   score: -27.811933117881175   memory length: 10000   epsilon: 0.009998671593271896
episode: 143   score: 2.8288964840178696   memory length: 10000   epsilon: 0.009998671593271896
episode: 144   score: -0.2868698580654996   memory length: 10000   epsilon: 0.009998671593271896
episode: 145   score: -165.67441358445657   memory length: 10000   epsilon: 0.009998671593271896
episode: 146   score: -212.0262227438601   memory length: 10000   epsilon: 0.009998671593271896
episode: 147   score: -125.53319041721942   memory length: 10000   epsilon: 0.009998671593271896
episode: 148   score: -61.06210915510722   memory length: 10000   epsilon: 0.009998671593271896
episode: 149   score: -23.07472262880687

episode: 225   score: -132.91487341822307   memory length: 10000   epsilon: 0.009998671593271896
episode: 226   score: 230.65821822231288   memory length: 10000   epsilon: 0.009998671593271896
episode: 227   score: 287.40789038599905   memory length: 10000   epsilon: 0.009998671593271896
episode: 228   score: -87.56520601397665   memory length: 10000   epsilon: 0.009998671593271896
episode: 229   score: 235.33951255214578   memory length: 10000   epsilon: 0.009998671593271896
episode: 230   score: 219.09740908390842   memory length: 10000   epsilon: 0.009998671593271896
episode: 231   score: 230.3265676651893   memory length: 10000   epsilon: 0.009998671593271896
episode: 232   score: 244.5559968322132   memory length: 10000   epsilon: 0.009998671593271896
episode: 233   score: 244.10400643098401   memory length: 10000   epsilon: 0.009998671593271896
episode: 234   score: 264.1194295844838   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: 226.49376030406688  

episode: 311   score: 218.85018700434924   memory length: 10000   epsilon: 0.009998671593271896
episode: 312   score: 236.2522544063901   memory length: 10000   epsilon: 0.009998671593271896
episode: 313   score: -266.0747341614276   memory length: 10000   epsilon: 0.009998671593271896
episode: 314   score: 255.2798667391208   memory length: 10000   epsilon: 0.009998671593271896
episode: 315   score: 215.75940242468337   memory length: 10000   epsilon: 0.009998671593271896
episode: 316   score: 220.7219199321118   memory length: 10000   epsilon: 0.009998671593271896
episode: 317   score: 236.39260921143259   memory length: 10000   epsilon: 0.009998671593271896
episode: 318   score: 2.112153813096768   memory length: 10000   epsilon: 0.009998671593271896
episode: 319   score: 269.7161252906111   memory length: 10000   epsilon: 0.009998671593271896
episode: 320   score: 257.7590416098102   memory length: 10000   epsilon: 0.009998671593271896
episode: 321   score: 269.73095223211294   mem

episode: 397   score: 246.45908396010097   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: 42.663558271661884   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: 52.807725777428345   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -392.95219922552246   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: 268.1114120870276   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: 260.3366038900272   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: 285.7432369042769   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: 271.67789389500274   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: 257.16043586976264   memory length: 10000   epsilon: 0.009998671593271896
episode: 406   score: 285.9835667055077   memory length: 10000   epsilon: 0.009998671593271896
episode: 407   score: -189.2072099120214   

episode: 483   score: 245.48365063728284   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: 262.92754824376436   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -252.4191800886896   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: 213.7378728614139   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: 1.5474512918230232   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: 284.3068810151954   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 211.40975001997964   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: 266.75275970870143   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -124.60575502023829   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -322.6590405367821   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: 252.76401835765333 

episode: 70   score: -201.3192426941642   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: -97.89715701641344   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -42.14713671135937   memory length: 10000   epsilon: 0.009998671593271896
episode: 73   score: 5.1660032670932345   memory length: 10000   epsilon: 0.009998671593271896
episode: 74   score: -27.43238206105833   memory length: 10000   epsilon: 0.009998671593271896
episode: 75   score: 13.512770147688602   memory length: 10000   epsilon: 0.009998671593271896
episode: 76   score: -41.82246882749245   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -46.02620295992993   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -53.00134123975821   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -120.24764328186258   memory length: 10000   epsilon: 0.009998671593271896
episode: 80   score: -36.947423352751336   memory

episode: 156   score: 2.0219509851571775   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: -34.52789943643154   memory length: 10000   epsilon: 0.009998671593271896
episode: 158   score: -265.3683549270552   memory length: 10000   epsilon: 0.009998671593271896
episode: 159   score: -42.939362802458874   memory length: 10000   epsilon: 0.009998671593271896
episode: 160   score: -28.81100509346729   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: 186.0823580396529   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: -32.211492322330706   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: -34.764750107013455   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: -327.2500229698329   memory length: 10000   epsilon: 0.009998671593271896
episode: 165   score: -123.62211074891425   memory length: 10000   epsilon: 0.009998671593271896
episode: 166   score: -314.0137825820

episode: 242   score: -229.1934885633101   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: -231.7178549497438   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: 242.70556280810885   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: -233.66636792085018   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: -23.744984236237826   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: -237.45480748139997   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: -117.0491362382806   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: 231.2564384025117   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: 269.16407788632216   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: 248.05044144111537   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: -31.624899717573

episode: 328   score: 253.90445295172267   memory length: 10000   epsilon: 0.009998671593271896
episode: 329   score: 262.48052528846335   memory length: 10000   epsilon: 0.009998671593271896
episode: 330   score: 12.240918959222341   memory length: 10000   epsilon: 0.009998671593271896
episode: 331   score: -31.68449563234205   memory length: 10000   epsilon: 0.009998671593271896
episode: 332   score: -47.40633278585979   memory length: 10000   epsilon: 0.009998671593271896
episode: 333   score: 216.73067539500806   memory length: 10000   epsilon: 0.009998671593271896
episode: 334   score: 245.62297680420016   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: 253.09345907300064   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: 24.99213872627172   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: -6.617535358822252   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: 55.191235474052256 

episode: 414   score: 255.29739877586735   memory length: 10000   epsilon: 0.009998671593271896
episode: 415   score: 260.7606547371388   memory length: 10000   epsilon: 0.009998671593271896
episode: 416   score: -330.3016785411812   memory length: 10000   epsilon: 0.009998671593271896
episode: 417   score: 182.8746715203041   memory length: 10000   epsilon: 0.009998671593271896
episode: 418   score: 202.03775088387047   memory length: 10000   epsilon: 0.009998671593271896
episode: 419   score: 256.51202516499814   memory length: 10000   epsilon: 0.009998671593271896
episode: 420   score: -561.7955938138998   memory length: 10000   epsilon: 0.009998671593271896
episode: 421   score: 282.0216744263833   memory length: 10000   epsilon: 0.009998671593271896
episode: 422   score: 287.6023052804437   memory length: 10000   epsilon: 0.009998671593271896
episode: 423   score: 268.373149397616   memory length: 10000   epsilon: 0.009998671593271896
episode: 424   score: -257.65402519885254   me

episode: 0   score: -564.8242303433267   memory length: 106   epsilon: 0.8993769480309576
episode: 1   score: -436.55169590965755   memory length: 213   epsilon: 0.8080700157548294
episode: 2   score: -531.4055707621948   memory length: 297   epsilon: 0.7429336049129575
episode: 3   score: -481.9970974600579   memory length: 379   epsilon: 0.6844158170335664
episode: 4   score: -66.85004529191102   memory length: 474   epsilon: 0.6223596374236124
episode: 5   score: -370.6933006742089   memory length: 546   epsilon: 0.5791040088995179
episode: 6   score: -89.23425706715538   memory length: 636   epsilon: 0.5292373811410898
episode: 7   score: -694.0960659975322   memory length: 731   epsilon: 0.48125127508828036
episode: 8   score: -445.27980669657376   memory length: 801   epsilon: 0.44869999946146477
episode: 9   score: -475.438348466763   memory length: 872   epsilon: 0.4179321037441544
episode: 10   score: -10.175004484310321   memory length: 950   epsilon: 0.3865572425889805
episo

episode: 87   score: -106.6858073670412   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -39.62287107198615   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -30.002060940302254   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -72.98294480856241   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -75.13398362499778   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -30.22475471690009   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -21.629750937723713   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: 6.642487107675815   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: 20.733229413133927   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -0.2094787581628156   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -45.97948075077643   memory

episode: 173   score: -9.713522460946521   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: 15.103544669789159   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: 235.58524545858162   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: 221.03318525423143   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -21.243326405446   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: 258.50612958594877   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: 296.87476599168156   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: -12.995694037452651   memory length: 10000   epsilon: 0.009998671593271896
episode: 181   score: -45.707587189077515   memory length: 10000   epsilon: 0.009998671593271896
episode: 182   score: 259.49950315957506   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -20.99088497868042

episode: 259   score: 262.8942016430445   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: 73.22091382520577   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: -102.21989121070031   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: 244.5164394102916   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: -42.99561599363423   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: -262.60579661254917   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: -255.75301565165753   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: 280.0326939246078   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: 219.7073061779819   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: 37.18301622881023   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: -91.18670629361138   

episode: 345   score: 73.12076790805082   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: 251.2620157011665   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: 244.03353658051415   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: 255.3004062815323   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: 259.9426930832308   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: 294.6069424879919   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: 286.82618875000907   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: 265.073386708895   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: 232.19612079215102   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: 282.5627815245141   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 266.8202808521561   memory

episode: 431   score: 267.5477133919667   memory length: 10000   epsilon: 0.009998671593271896
episode: 432   score: 240.63296317855256   memory length: 10000   epsilon: 0.009998671593271896
episode: 433   score: 250.32587384031584   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: 254.89520282551965   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: 268.02604903492994   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: 255.62937598637282   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: 270.9884611614043   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: 290.46767318283435   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: 267.64583546700266   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 279.4784119110326   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: 259.65611716686806   

episode: 18   score: -194.68491588035545   memory length: 2326   epsilon: 0.09757209133261925
episode: 19   score: -392.2792188786185   memory length: 2491   epsilon: 0.08272393237606315
episode: 20   score: -74.68220878656359   memory length: 2693   epsilon: 0.06758647443056524
episode: 21   score: -50.88086508036785   memory length: 3106   epsilon: 0.04471018119326632
episode: 22   score: -37.83900876891491   memory length: 4106   epsilon: 0.0164397290654448
episode: 23   score: 238.76803252479687   memory length: 4674   epsilon: 0.009998671593271896
episode: 24   score: -152.33452283060905   memory length: 5463   epsilon: 0.009998671593271896
episode: 25   score: -121.94205275742492   memory length: 6158   epsilon: 0.009998671593271896
episode: 26   score: -361.06610011183903   memory length: 6424   epsilon: 0.009998671593271896
episode: 27   score: 215.04054330800824   memory length: 7056   epsilon: 0.009998671593271896
episode: 28   score: -83.85699818071042   memory length: 8056 

episode: 105   score: 158.78503087796304   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: 165.88137305466793   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: 132.40320426938885   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: 135.2255083567655   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: 129.42507471534014   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -86.37642875492766   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -104.56810164696867   memory length: 10000   epsilon: 0.009998671593271896
episode: 112   score: 181.61793831519034   memory length: 10000   epsilon: 0.009998671593271896
episode: 113   score: -238.8310733390445   memory length: 10000   epsilon: 0.009998671593271896
episode: 114   score: -97.65523830828887   memory length: 10000   epsilon: 0.009998671593271896
episode: 115   score: -23.92890306129564

episode: 191   score: -25.278252657860204   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: 262.1921549128879   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -193.65458126759734   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: 173.64498887106447   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: 113.36951454957384   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -25.118774892820582   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -52.94929096203702   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -65.19735397500574   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -48.195170504547434   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -39.46083118824593   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -256.1194671342

episode: 277   score: 224.09095977002204   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: 226.40336484257142   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: 231.10279065419505   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: -508.53538907310445   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: 223.54139863798258   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: 2.98716506671839   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: 198.5003546352363   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: 282.4070054236165   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 204.52139079643197   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -74.22143572491888   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: 280.92071743236386   

episode: 363   score: 189.11198363186202   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: 258.6794311761369   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: 237.07265517071306   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: 221.42754955755152   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: 302.74972596154623   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: 245.15047499407257   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: 225.08900737002364   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: 289.50883776016667   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: 270.98063197414353   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: 280.03295842374143   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: 260.8387392141302  

episode: 449   score: 260.1085790002114   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: 292.215874570729   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: 218.23848532465365   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: 269.46732916771447   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: 226.82849128589854   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 268.81436967440715   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: 266.4965700094501   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: -16.665520238548922   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: -45.85116650677634   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: 2.9644615766194216   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: 224.4727080140854   m

episode: 36   score: -138.48296087153545   memory length: 10000   epsilon: 0.009998671593271896
episode: 37   score: -58.33435589098565   memory length: 10000   epsilon: 0.009998671593271896
episode: 38   score: -105.80758390439078   memory length: 10000   epsilon: 0.009998671593271896
episode: 39   score: -81.99055178029664   memory length: 10000   epsilon: 0.009998671593271896
episode: 40   score: -108.29545429314608   memory length: 10000   epsilon: 0.009998671593271896
episode: 41   score: -110.61672060499953   memory length: 10000   epsilon: 0.009998671593271896
episode: 42   score: -89.52283625231908   memory length: 10000   epsilon: 0.009998671593271896
episode: 43   score: -109.25140045918705   memory length: 10000   epsilon: 0.009998671593271896
episode: 44   score: 6.595585616383849   memory length: 10000   epsilon: 0.009998671593271896
episode: 45   score: -75.33931624377908   memory length: 10000   epsilon: 0.009998671593271896
episode: 46   score: -86.75118312320748   memo

episode: 122   score: -47.35062704120439   memory length: 10000   epsilon: 0.009998671593271896
episode: 123   score: 120.91044403958972   memory length: 10000   epsilon: 0.009998671593271896
episode: 124   score: -6.074451865969138   memory length: 10000   epsilon: 0.009998671593271896
episode: 125   score: -93.01815583984853   memory length: 10000   epsilon: 0.009998671593271896
episode: 126   score: -133.71433052651525   memory length: 10000   epsilon: 0.009998671593271896
episode: 127   score: -43.482337720830294   memory length: 10000   epsilon: 0.009998671593271896
episode: 128   score: 214.19878063349438   memory length: 10000   epsilon: 0.009998671593271896
episode: 129   score: 215.77519655845097   memory length: 10000   epsilon: 0.009998671593271896
episode: 130   score: 174.9616692296309   memory length: 10000   epsilon: 0.009998671593271896
episode: 131   score: -149.52138056977424   memory length: 10000   epsilon: 0.009998671593271896
episode: 132   score: 12.0033037317584

episode: 208   score: -106.26103538479035   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: 265.53914410097866   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: 263.5140357709577   memory length: 10000   epsilon: 0.009998671593271896
episode: 211   score: 286.6644878029035   memory length: 10000   epsilon: 0.009998671593271896
episode: 212   score: 225.37170894310648   memory length: 10000   epsilon: 0.009998671593271896
episode: 213   score: 259.3460858381171   memory length: 10000   epsilon: 0.009998671593271896
episode: 214   score: 264.33185050574207   memory length: 10000   epsilon: 0.009998671593271896
episode: 215   score: 239.63598393247867   memory length: 10000   epsilon: 0.009998671593271896
episode: 216   score: 243.46789839722814   memory length: 10000   epsilon: 0.009998671593271896
episode: 217   score: 17.202138573204195   memory length: 10000   epsilon: 0.009998671593271896
episode: 218   score: -53.67504942784044  

episode: 294   score: 229.8418930872552   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: 254.3356057613938   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: 252.56197508305513   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: 243.95750308306793   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: 280.3033041255112   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: 223.10812702306418   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: 271.3546749719527   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: 259.6386942749969   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -83.25454721643113   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: 268.7773317562667   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: 248.543981995548   memor

episode: 380   score: -216.26602184145008   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: 106.43952175419213   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: 263.87453963762084   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -248.1994884582788   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: -58.75971764902586   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: 173.48561578728618   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 228.5324371761276   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -37.45951746535651   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: 213.60190504843067   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: 208.8337811899799   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -133.1077638598957 

episode: 466   score: 264.93143406182026   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: 256.3247742001556   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: 278.06908521875175   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: 243.8949160218056   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: 286.81976955864815   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: 261.53926567453885   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: 240.7586904780905   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: 264.57494955727486   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: 275.78011630666776   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: 219.84336292911803   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: 236.1754000424481   m

episode: 53   score: -115.05953704011128   memory length: 10000   epsilon: 0.009998671593271896
episode: 54   score: -151.20825105789942   memory length: 10000   epsilon: 0.009998671593271896
episode: 55   score: -76.7680408310779   memory length: 10000   epsilon: 0.009998671593271896
episode: 56   score: -327.61943347992883   memory length: 10000   epsilon: 0.009998671593271896
episode: 57   score: -81.98269044896206   memory length: 10000   epsilon: 0.009998671593271896
episode: 58   score: -26.851634501896285   memory length: 10000   epsilon: 0.009998671593271896
episode: 59   score: -300.94044534033947   memory length: 10000   epsilon: 0.009998671593271896
episode: 60   score: -58.311027043847616   memory length: 10000   epsilon: 0.009998671593271896
episode: 61   score: -30.08983805104587   memory length: 10000   epsilon: 0.009998671593271896
episode: 62   score: -59.159975433414864   memory length: 10000   epsilon: 0.009998671593271896
episode: 63   score: -248.54320555659123   m

episode: 139   score: 259.7418968977445   memory length: 10000   epsilon: 0.009998671593271896
episode: 140   score: -143.46516671020788   memory length: 10000   epsilon: 0.009998671593271896
episode: 141   score: 130.16989909685213   memory length: 10000   epsilon: 0.009998671593271896
episode: 142   score: 233.8368660927246   memory length: 10000   epsilon: 0.009998671593271896
episode: 143   score: 21.263903023580426   memory length: 10000   epsilon: 0.009998671593271896
episode: 144   score: -19.759751604810745   memory length: 10000   epsilon: 0.009998671593271896
episode: 145   score: 253.4791540496362   memory length: 10000   epsilon: 0.009998671593271896
episode: 146   score: 254.53991883771909   memory length: 10000   epsilon: 0.009998671593271896
episode: 147   score: -44.64491936525471   memory length: 10000   epsilon: 0.009998671593271896
episode: 148   score: 238.29188665953595   memory length: 10000   epsilon: 0.009998671593271896
episode: 149   score: 236.3132825149073  

episode: 225   score: -28.96551840979278   memory length: 10000   epsilon: 0.009998671593271896
episode: 226   score: 268.4580395199986   memory length: 10000   epsilon: 0.009998671593271896
episode: 227   score: 245.18441030022927   memory length: 10000   epsilon: 0.009998671593271896
episode: 228   score: 3.518238848612029   memory length: 10000   epsilon: 0.009998671593271896
episode: 229   score: -197.95646375805939   memory length: 10000   epsilon: 0.009998671593271896
episode: 230   score: 50.102163883608284   memory length: 10000   epsilon: 0.009998671593271896
episode: 231   score: -52.238589448196144   memory length: 10000   epsilon: 0.009998671593271896
episode: 232   score: 58.85003246928204   memory length: 10000   epsilon: 0.009998671593271896
episode: 233   score: 260.86306090681677   memory length: 10000   epsilon: 0.009998671593271896
episode: 234   score: 206.00070904786455   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: 249.1663116319424  

episode: 311   score: 303.1294234307597   memory length: 10000   epsilon: 0.009998671593271896
episode: 312   score: -214.7313790905024   memory length: 10000   epsilon: 0.009998671593271896
episode: 313   score: 293.9883849969139   memory length: 10000   epsilon: 0.009998671593271896
episode: 314   score: 297.15581852067413   memory length: 10000   epsilon: 0.009998671593271896
episode: 315   score: 282.13808561885344   memory length: 10000   epsilon: 0.009998671593271896
episode: 316   score: 241.61420758211628   memory length: 10000   epsilon: 0.009998671593271896
episode: 317   score: 259.37494315528147   memory length: 10000   epsilon: 0.009998671593271896
episode: 318   score: 231.85518832719612   memory length: 10000   epsilon: 0.009998671593271896
episode: 319   score: 260.9654531602227   memory length: 10000   epsilon: 0.009998671593271896
episode: 320   score: 265.16057302993323   memory length: 10000   epsilon: 0.009998671593271896
episode: 321   score: 233.48217132488514   

episode: 397   score: 258.2938970633619   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: 286.55676608212593   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: 267.09669764604337   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: 123.02631348584974   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: 269.790887060612   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: 227.7263446278265   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -210.00319189169215   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: 263.7316630485154   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: 239.48400607072492   memory length: 10000   epsilon: 0.009998671593271896
episode: 406   score: 250.32350112412615   memory length: 10000   epsilon: 0.009998671593271896
episode: 407   score: 252.18987018479592   m

episode: 483   score: 265.2435282624436   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: -225.9559292902277   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: 257.3477351644426   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: 271.77955216285795   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: 226.35547824862155   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: 260.15874758027974   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 266.07388550098347   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: 262.3549657986903   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: 251.21182177252703   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: 285.8859307885723   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: 278.695523643441   mem

episode: 70   score: 181.41772901335466   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: 202.37377015204692   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -10.942063486305857   memory length: 10000   epsilon: 0.009998671593271896
episode: 73   score: -77.82633253929741   memory length: 10000   epsilon: 0.009998671593271896
episode: 74   score: 175.67910290132522   memory length: 10000   epsilon: 0.009998671593271896
episode: 75   score: 167.69617932070497   memory length: 10000   epsilon: 0.009998671593271896
episode: 76   score: -14.300702792169888   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -15.49948559087765   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -34.46877422491573   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -10.7189737666951   memory length: 10000   epsilon: 0.009998671593271896
episode: 80   score: -26.537098578551255   memory

episode: 156   score: 187.50901108251165   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: -52.830453956083126   memory length: 10000   epsilon: 0.009998671593271896
episode: 158   score: -45.43999864507567   memory length: 10000   epsilon: 0.009998671593271896
episode: 159   score: -90.2206338042214   memory length: 10000   epsilon: 0.009998671593271896
episode: 160   score: -463.9394544853245   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: -141.47606021440572   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: -243.19015875568996   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: -216.96556409679295   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: -210.43295633594488   memory length: 10000   epsilon: 0.009998671593271896
episode: 165   score: 199.87718540505617   memory length: 10000   epsilon: 0.009998671593271896
episode: 166   score: -142.316499546

episode: 242   score: 240.2754289100553   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: -514.8125643027089   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: 269.6576451631339   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: -434.25131019766087   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: 267.54130601230514   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: 228.08211312650053   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: 248.64529148984013   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: -209.79972663603962   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: 270.9833261611916   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: 258.65702531256744   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: 267.353728623813   

episode: 328   score: -67.41238703861677   memory length: 10000   epsilon: 0.009998671593271896
episode: 329   score: 280.43386377681674   memory length: 10000   epsilon: 0.009998671593271896
episode: 330   score: 269.0647398938415   memory length: 10000   epsilon: 0.009998671593271896
episode: 331   score: 242.35607283735368   memory length: 10000   epsilon: 0.009998671593271896
episode: 332   score: -9.822540017804556   memory length: 10000   epsilon: 0.009998671593271896
episode: 333   score: 238.53095059497778   memory length: 10000   epsilon: 0.009998671593271896
episode: 334   score: 257.6988722675485   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: 269.46566541614015   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: 251.7518944698003   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: 235.1435375586184   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: 280.1548336391397   me

episode: 414   score: 228.7459007224764   memory length: 10000   epsilon: 0.009998671593271896
episode: 415   score: 227.35112848125348   memory length: 10000   epsilon: 0.009998671593271896
episode: 416   score: 157.6369406501322   memory length: 10000   epsilon: 0.009998671593271896
episode: 417   score: 232.47882370981668   memory length: 10000   epsilon: 0.009998671593271896
episode: 418   score: 269.35624301046903   memory length: 10000   epsilon: 0.009998671593271896
episode: 419   score: 266.11606533644317   memory length: 10000   epsilon: 0.009998671593271896
episode: 420   score: 260.1706635407478   memory length: 10000   epsilon: 0.009998671593271896
episode: 421   score: 277.0904228321508   memory length: 10000   epsilon: 0.009998671593271896
episode: 422   score: 13.406986179884811   memory length: 10000   epsilon: 0.009998671593271896
episode: 423   score: 255.10871807399016   memory length: 10000   epsilon: 0.009998671593271896
episode: 424   score: 267.06012631380446   m

In [83]:
%%time

seeds = np.arange(5)

result_dict = {}

env_name = 'Acrobot-v1'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN(o_space, a_space, lr, units=128)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -358.0   memory length: 359   epsilon: 0.698248920785887
episode: 1   score: -360.0   memory length: 720   epsilon: 0.4865769398194536
episode: 2   score: -312.0   memory length: 1033   epsilon: 0.35575362775079855
episode: 3   score: -500.0   memory length: 1533   epsilon: 0.21572150942606796
episode: 4   score: -315.0   memory length: 1849   epsilon: 0.1572489411773005
episode: 5   score: -334.0   memory length: 2184   epsilon: 0.1124673042411007
episode: 6   score: -207.0   memory length: 2392   epsilon: 0.09133723000062824
episode: 7   score: -232.0   memory length: 2625   epsilon: 0.07234467887111937
episode: 8   score: -500.0   memory length: 3125   epsilon: 0.043868290040190716
episode: 9   score: -284.0   memory length: 3410   epsilon: 0.03298487560678867
episode: 10   score: -259.0   memory length: 3670   epsilon: 0.02542973235819944
episode: 11   score: -487.0   memory length: 4158   epsilon: 0.015606303325022058
episode: 12   score: -464.0   memory length

episode: 100   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
epi

episode: 199   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -108.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 207   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 208   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 298   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -73.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 306   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 307   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 308   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 309   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 397   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -108.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 406   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 407   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 408   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 496   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -500.0   memory length: 500   epsilon: 0.6063789448611848
episode: 1   score: -500.0   memory length: 1000   epsilon: 0.3676954247709635
episode: 2   score: -500.0   memory length: 1500   epsilon: 0.22296276370290227
episode: 3   score: -500.0   memory length: 2000   epsilon: 0.1351999253974994
episode: 4   score: -244.0   memory length: 2245   epsilon: 0.10580862417961168
episode: 5   score: -249.0   memory length: 2495   epsilon: 0.08239353265934016
episode: 6   score: -165.0   memory length: 2661   epsilon: 0.06978533527277099
episode: 7   score: -417.0   memory length: 3079   epsilon: 0.04593442113789762
episode: 8   score: -209.0   mem

episode: 96   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode:

episode: 195   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 294   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -123.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 393   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 492   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -65.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -500.0   memory length: 500   epsilon: 0.6063789448611848
episode: 1   score: -500.0   memory length: 1000   epsilon: 0.3676954247709635
episode: 2   score: -185.0   memory length: 1186   epsilon: 0.30525939279728115
episode: 3   score: -468.0   memory length: 1655   epsilon: 0.1909338870261128
episode: 4   score

episode: 92   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episod

episode: 191   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -237.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -166.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -76.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 290   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -105.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 389   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 488   score: -61.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -68.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 88   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -78.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -73.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100

episode: 187   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -78.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -203.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -99.0   memory length: 10000   epsilon: 0.009998671593271

episode: 286   score: -65.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -73.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 385   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 484   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -78.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -78.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 84   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -131.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96

episode: 183   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -61.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -65.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
epi

episode: 282   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896


episode: 381   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -165.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
epi

episode: 480   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
ep