### importing modules

In [3]:
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['font.size'] = 15

In [4]:
import numpy as np
import gym
from collections import deque
import random
import pickle
import time
import copy

In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

Using TensorFlow backend.


## class for DQN

In [6]:
class DQN:
    
    def __init__(self, o_space, a_space, lr, units=32, mem_len=10000):
        self.a_space = a_space
        self.state_size = o_space

        self.lr = lr
        self.units = units
        
        # hyperparameters
        self.discount = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01
        self.batch_size = 50
        self.train_start = 1000
        self.memory = deque(maxlen=mem_len)

        # create main model and target model
        self.model = self.init_model()
        self.target_model = self.init_model()

        # initialize target model
        self.update_target_model()

    def init_model(self):
        model = Sequential()
        model.add(Dense(self.units, input_dim=self.state_size, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.units, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.a_space, activation='linear',
                        kernel_initializer='he_uniform'))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.a_space)
        else:
            q_value = self.model.predict(s)
            return np.argmax(q_value[0])

    def add_experience(self, s, a, r, s_next, terminal):
        self.memory.append([s, a, r, s_next, terminal])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
      
    def train_model(self):
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.state_size))
        a, r, terminal = [], [], []

        for i in range(self.batch_size):
            update_input[i] = mini_batch[i][0]
            a.append(mini_batch[i][1])
            r.append(mini_batch[i][2])
            update_target[i] = mini_batch[i][3]
            terminal.append(mini_batch[i][4])

        target = self.model.predict(update_input)
        target_val = self.target_model.predict(update_target)

        for i in range(self.batch_size):
            if terminal[i]:
                target[i][a[i]] = r[i]
            else:
                target[i][a[i]] = r[i] + self.discount * (np.amax(target_val[i]))

        self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0) 

In [26]:
terminal_r_dict = {
    'CartPole-v1': -1.0,
    'LunarLander-v2': 100.0,
    'MountainCar-v0': 1.0,
    'Acrobot-v1': -1.0
}

memory_env = {
    'CartPole-v1': 10000,
    'LunarLander-v2': 10000,
    'MountainCar-v0': 10000,
    'Acrobot-v1': 10000
}

In [16]:
class DQN_HER:
    
    def __init__(self, o_space, a_space, lr, units=32, mem_len=10000):
        self.a_space = a_space
        self.state_size = o_space

        self.lr = lr
        self.units = units
        # hyperparameters
        self.discount = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01
        self.batch_size = 50
        self.train_start = 1000
        self.memory = deque(maxlen=mem_len)
        self.her_memory = deque(maxlen=1000)

        # create main model and target model
        self.model = self.init_model()
        self.target_model = self.init_model()

        # initialize target model
        self.update_target_model()

    def init_model(self):
        model = Sequential()
        model.add(Dense(self.units, input_dim=self.state_size, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.units, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.a_space, activation='linear',
                        kernel_initializer='he_uniform'))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.a_space)
        else:
            q_value = self.model.predict(s)
            return np.argmax(q_value[0])

    def add_experience(self, s, a, r, s_next, terminal):
        self.memory.append([s, a, r, s_next, terminal])
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def add_her_experience(self, s, a, r, s_next, terminal):
        self.her_memory.append([s, a, r, s_next, terminal])
        
    def modify_her_list(self, terminal_reward):
        new_her_experience = copy.deepcopy(self.her_memory)
        new_her_len = len(new_her_experience)
        her_goal = self.her_memory[-1][3]
        
        for i in range(new_her_len):
            new_her_experience[-1-i][0] = her_goal
            new_her_experience[-1-i][2] = self.memory[-1-i][2]
            new_her_experience[-1-i][3] = self.memory[-1-i][3]#her_goal
            new_her_experience[-1-i][4] = False
            
            if (np.sum(np.abs((new_her_experience[-1-i][3] - her_goal))) == 0):
                new_her_experience[-1-i][2] = terminal_reward
                new_her_experience[-1-i][4] = True
            
            new_her_experience[-1-i][3] = her_goal
                
        
        for hx in new_her_experience:
            self.memory.append(hx)
            
        self.reset_her()
            
    def reset_her(self):
        self.her_memory.clear()
      
    def train_model(self):
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.state_size))
        a, r, terminal = [], [], []

        for i in range(self.batch_size):
            update_input[i] = mini_batch[i][0]
            a.append(mini_batch[i][1])
            r.append(mini_batch[i][2])
            update_target[i] = mini_batch[i][3]
            terminal.append(mini_batch[i][4])

        target = self.model.predict(update_input)
        target_val = self.target_model.predict(update_target)

        for i in range(self.batch_size):
            if terminal[i]:
                target[i][a[i]] = r[i]
            else:
                target[i][a[i]] = r[i] + self.discount * (np.amax(target_val[i]))

        self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0) 

## main

#### settings

In [27]:
lr = 0.001
num_seeds = 5
seeds = np.arange(num_seeds)
num_episodes = 500

### HER

In [22]:
%%time

result_dict = {}

env_name = 'CartPole-v1'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN_HER(o_space, a_space, lr, units=32)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            agent.add_her_experience(s, a, r, s_next, terminal)
            
            agent.train_model()
            
            # every time step do the training
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()
                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
                
        
        agent.modify_her_list(terminal_reward=terminal_r_dict[env_name])
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: 32.0   memory length: 32   epsilon: 0.9684910757595269
episode: 1   score: 17.0   memory length: 81   epsilon: 0.9521577859830145
episode: 2   score: 22.0   memory length: 120   epsilon: 0.9314288037569908
episode: 3   score: 21.0   memory length: 163   epsilon: 0.9120631656822724
episode: 4   score: 16.0   memory length: 200   epsilon: 0.8975790935118436
episode: 5   score: 24.0   memory length: 240   epsilon: 0.8762831198969288
episode: 6   score: 11.0   memory length: 275   epsilon: 0.8666920568517111
episode: 7   score: 18.0   memory length: 304   epsilon: 0.8512234991370281
episode: 8   score: 11.0   memory length: 333   epsilon: 0.8419067177676068
episode: 9   score: 16.0   memory length: 360   epsilon: 0.8285367691502946
episode: 10   score: 36.0   memory length: 412   epsilon: 0.7992255563671304
episode: 11   score: 12.0   memory length: 460   epsilon: 0.7896874231428072
episode: 12   score: 13.0   memory length: 485   epsilon: 0.7794828569739644
episode: 13

episode: 107   score: 23.0   memory length: 3267   epsilon: 0.19285376940251647
episode: 108   score: 19.0   memory length: 3309   epsilon: 0.18922233964839782
episode: 109   score: 20.0   memory length: 3348   epsilon: 0.18547363030035172
episode: 110   score: 14.0   memory length: 3382   epsilon: 0.182893810249391
episode: 111   score: 29.0   memory length: 3425   epsilon: 0.1776634806674152
episode: 112   score: 19.0   memory length: 3473   epsilon: 0.1743180835205783
episode: 113   score: 24.0   memory length: 3516   epsilon: 0.17018221033225317
episode: 114   score: 27.0   memory length: 3567   epsilon: 0.16564652979915298
episode: 115   score: 17.0   memory length: 3611   epsilon: 0.16285295447419557
episode: 116   score: 16.0   memory length: 3644   epsilon: 0.16026675865517317
episode: 117   score: 23.0   memory length: 3683   epsilon: 0.15662088827739984
episode: 118   score: 53.0   memory length: 3759   epsilon: 0.14853218119844558
episode: 119   score: 21.0   memory length: 

episode: 209   score: 106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: 149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 211   score: 164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 212   score: 258.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 213   score: 129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 214   score: 178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 215   score: 85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 216   score: 164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 217   score: 142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 218   score: 96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 219   score: 160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 220   score: 208.0   memory length: 10000   epsilon: 0.009998671593271896
episod

episode: 308   score: 121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 309   score: 30.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 310   score: 215.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 311   score: 117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 312   score: 136.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 313   score: 215.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 314   score: 166.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 315   score: 165.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 316   score: 126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 317   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 318   score: 120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 319   score: 144.0   memory length: 10000   epsilon: 0.009998671593271896
episo

episode: 408   score: 143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 409   score: 158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 410   score: 113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 411   score: 157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 412   score: 121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 413   score: 120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 414   score: 133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 415   score: 135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 416   score: 138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 417   score: 160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 418   score: 109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 419   score: 106.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 24   score: 14.0   memory length: 750   epsilon: 0.6823646221455009
episode: 25   score: 15.0   memory length: 779   epsilon: 0.6722004915521225
episode: 26   score: 11.0   memory length: 805   epsilon: 0.6648431464805192
episode: 27   score: 11.0   memory length: 827   epsilon: 0.6575663287622622
episode: 28   score: 13.0   memory length: 851   epsilon: 0.649069069067341
episode: 29   score: 12.0   memory length: 876   epsilon: 0.6413229363226717
episode: 30   score: 10.0   memory length: 898   epsilon: 0.6349384896673435
episode: 31   score: 15.0   memory length: 923   epsilon: 0.6254807928315229
episode: 32   score: 14.0   memory length: 952   epsilon: 0.6167807534338766
episode: 33   score: 10.0   memory length: 976   epsilon: 0.6106406271491208
episode: 34   score: 16.0   memory length: 1002   epsilon: 0.6009433131399452
episode: 35   score: 12.0   memory length: 1030   epsilon: 0.5937715237303958
episode: 36   score: 9.0   memory length: 1051   epsilon: 0.588448905989608

episode: 130   score: 36.0   memory length: 4810   epsilon: 0.08854783998018396
episode: 131   score: 37.0   memory length: 4883   epsilon: 0.08532986055537657
episode: 132   score: 48.0   memory length: 4968   epsilon: 0.08132881992457128
episode: 133   score: 48.0   memory length: 5064   epsilon: 0.07751538450049153
episode: 134   score: 84.0   memory length: 5196   epsilon: 0.07126707206103876
episode: 135   score: 32.0   memory length: 5312   epsilon: 0.06902152328662714
episode: 136   score: 72.0   memory length: 5416   epsilon: 0.06422434623348014
episode: 137   score: 44.0   memory length: 5532   epsilon: 0.06145838929272435
episode: 138   score: 39.0   memory length: 5615   epsilon: 0.059106496128382716
episode: 139   score: 38.0   memory length: 5692   epsilon: 0.0569015068533215
episode: 140   score: 105.0   memory length: 5835   epsilon: 0.05122713070280569
episode: 141   score: 79.0   memory length: 6019   epsilon: 0.047334043005085494
episode: 142   score: 36.0   memory le

episode: 231   score: 224.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 232   score: 153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 233   score: 217.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 234   score: 246.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: 288.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 236   score: 117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 237   score: 310.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 238   score: 148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 239   score: 175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 240   score: 275.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 241   score: 199.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 242   score: 293.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 330   score: 97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 331   score: 105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 332   score: 120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 333   score: 92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 334   score: 152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: 117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: 122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: 123.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: 164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 339   score: 110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 340   score: 141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 341   score: 127.0   memory length: 10000   epsilon: 0.009998671593271896
episod

episode: 429   score: 182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 430   score: 156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 431   score: 154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 432   score: 174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 433   score: 149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: 142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: 202.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: 158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: 138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: 236.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 189.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 30   score: 18.0   memory length: 1426   epsilon: 0.48560427251675453
episode: 31   score: 14.0   memory length: 1458   epsilon: 0.47884982641548285
episode: 32   score: 14.0   memory length: 1486   epsilon: 0.47218933035690447
episode: 33   score: 9.0   memory length: 1509   epsilon: 0.46795658559511777
episode: 34   score: 17.0   memory length: 1535   epsilon: 0.4600646486360102
episode: 35   score: 15.0   memory length: 1567   epsilon: 0.45321177699177073
episode: 36   score: 13.0   memory length: 1595   epsilon: 0.44735524511437874
episode: 37   score: 9.0   memory length: 1617   epsilon: 0.44334511517564335
episode: 38   score: 10.0   memory length: 1636   epsilon: 0.4389315614456469
episode: 39   score: 13.0   memory length: 1659   epsilon: 0.43325956258749154
episode: 40   score: 16.0   memory length: 1688   epsilon: 0.4263791588948904
episode: 41   score: 10.0   memory length: 1714   epsilon: 0.42213450329202495
episode: 42   score: 11.0   memory length: 1735   epsilon

episode: 134   score: 55.0   memory length: 7161   epsilon: 0.02705711698284466
episode: 135   score: 70.0   memory length: 7286   epsilon: 0.025227005109553523
episode: 136   score: 98.0   memory length: 7454   epsilon: 0.022870915077561477
episode: 137   score: 52.0   memory length: 7604   epsilon: 0.02171145501244838
episode: 138   score: 74.0   memory length: 7730   epsilon: 0.02016206719415933
episode: 139   score: 61.0   memory length: 7865   epsilon: 0.018968362448321854
episode: 140   score: 57.0   memory length: 7983   epsilon: 0.017916891694903438
episode: 141   score: 123.0   memory length: 8163   epsilon: 0.015842281291520016
episode: 142   score: 121.0   memory length: 8407   epsilon: 0.014035949454503718
episode: 143   score: 106.0   memory length: 8634   epsilon: 0.012623609383108334
episode: 144   score: 128.0   memory length: 8868   epsilon: 0.011106214078031718
episode: 145   score: 89.0   memory length: 9085   epsilon: 0.010160018355927366
episode: 146   score: 76.0 

episode: 234   score: 237.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: 334.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 236   score: 295.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 237   score: 219.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 238   score: 399.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 239   score: 213.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 240   score: 235.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 241   score: 257.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 242   score: 311.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: 270.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: 187.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: 262.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 334   score: 12.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: 13.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: 142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: 21.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: 154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 339   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 340   score: 198.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 341   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 342   score: 131.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 343   score: 181.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 344   score: 186.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 345   score: 141.0   memory length: 10000   epsilon: 0.009998671593271896
episode

episode: 433   score: 244.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: 189.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: 169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: 168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: 201.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: 175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: 177.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 194.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: 193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 442   score: 185.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 443   score: 257.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 444   score: 212.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 36   score: 12.0   memory length: 1372   epsilon: 0.5004006352830107
episode: 37   score: 12.0   memory length: 1396   epsilon: 0.4944287442607058
episode: 38   score: 10.0   memory length: 1418   epsilon: 0.4895066468838467
episode: 39   score: 21.0   memory length: 1449   epsilon: 0.4793291555710539
episode: 40   score: 19.0   memory length: 1489   epsilon: 0.4703034042831738
episode: 41   score: 20.0   memory length: 1528   epsilon: 0.46098615996978987
episode: 42   score: 20.0   memory length: 1568   epsilon: 0.45185350084291465
episode: 43   score: 16.0   memory length: 1604   epsilon: 0.44467781503197196
episode: 44   score: 14.0   memory length: 1634   epsilon: 0.43849262988420123
episode: 45   score: 14.0   memory length: 1662   epsilon: 0.43239347672187917
episode: 46   score: 15.0   memory length: 1691   epsilon: 0.42595277973599555
episode: 47   score: 11.0   memory length: 1717   epsilon: 0.4212906564199442
episode: 48   score: 15.0   memory length: 1743   epsilon:

episode: 140   score: 64.0   memory length: 7426   epsilon: 0.023591383270687993
episode: 141   score: 68.0   memory length: 7558   epsilon: 0.022039747042716367
episode: 142   score: 43.0   memory length: 7669   epsilon: 0.0211116705177843
episode: 143   score: 44.0   memory length: 7756   epsilon: 0.02020245189549843
episode: 144   score: 78.0   memory length: 7878   epsilon: 0.018685820084884133
episode: 145   score: 49.0   memory length: 8005   epsilon: 0.017791849081300208
episode: 146   score: 66.0   memory length: 8120   epsilon: 0.0166549490687396
episode: 147   score: 82.0   memory length: 8268   epsilon: 0.015343108050509247
episode: 148   score: 221.0   memory length: 8571   epsilon: 0.01229946557870811
episode: 149   score: 76.0   memory length: 8868   epsilon: 0.011398910574533858
episode: 150   score: 93.0   memory length: 9037   epsilon: 0.010386129937474547
episode: 151   score: 82.0   memory length: 9212   epsilon: 0.009998671593271896
episode: 152   score: 108.0   mem

episode: 240   score: 416.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 241   score: 286.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 242   score: 276.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: 275.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: 230.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: 276.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: 213.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: 370.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: 358.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: 375.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: 219.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: 330.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 339   score: 149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 340   score: 175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 341   score: 225.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 342   score: 168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 343   score: 125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 344   score: 145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 345   score: 162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: 219.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: 233.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: 222.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: 160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: 206.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 438   score: 229.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: 174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: 244.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 442   score: 192.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 443   score: 225.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 444   score: 202.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 445   score: 185.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 446   score: 154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 447   score: 107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 448   score: 164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 449   score: 166.0   memory length: 10000   epsilon: 0.009998671593271896
episo

episode: 42   score: 18.0   memory length: 1522   epsilon: 0.46283472370715234
episode: 43   score: 9.0   memory length: 1549   epsilon: 0.4586858344239834
episode: 44   score: 11.0   memory length: 1569   epsilon: 0.4536654424342049
episode: 45   score: 14.0   memory length: 1594   epsilon: 0.44735524511437874
episode: 46   score: 16.0   memory length: 1624   epsilon: 0.4402509941152613
episode: 47   score: 12.0   memory length: 1652   epsilon: 0.43499694211384704
episode: 48   score: 24.0   memory length: 1688   epsilon: 0.424676198829174
episode: 49   score: 12.0   memory length: 1724   epsilon: 0.4196080198534616
episode: 50   score: 15.0   memory length: 1751   epsilon: 0.4133577680476022
episode: 51   score: 13.0   memory length: 1779   epsilon: 0.40801624104358897
episode: 52   score: 10.0   memory length: 1802   epsilon: 0.4039543904876318
episode: 53   score: 13.0   memory length: 1825   epsilon: 0.3987343766111031
episode: 54   score: 12.0   memory length: 1850   epsilon: 0.3

episode: 146   score: 47.0   memory length: 7311   epsilon: 0.02520177810444397
episode: 147   score: 49.0   memory length: 7407   epsilon: 0.023996069242762572
episode: 148   score: 68.0   memory length: 7524   epsilon: 0.022417816287487683
episode: 149   score: 67.0   memory length: 7659   epsilon: 0.02096433143107068
episode: 150   score: 86.0   memory length: 7812   epsilon: 0.019235921868246145
episode: 151   score: 73.0   memory length: 7971   epsilon: 0.017881075828405323
episode: 152   score: 70.0   memory length: 8114   epsilon: 0.01667162068942903
episode: 153   score: 72.0   memory length: 8256   epsilon: 0.015512899287729736
episode: 154   score: 64.0   memory length: 8392   epsilon: 0.014550711148242284
episode: 155   score: 65.0   memory length: 8521   epsilon: 0.013634554560149977
episode: 156   score: 85.0   memory length: 8671   epsilon: 0.012522973263067018
episode: 157   score: 57.0   memory length: 8813   epsilon: 0.011828788924918175
episode: 158   score: 88.0   me

episode: 246   score: 178.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: 251.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: 163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: 98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: 105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: 92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: 136.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 253   score: 117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 254   score: 114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 255   score: 138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 256   score: 206.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 257   score: 209.0   memory length: 10000   epsilon: 0.009998671593271896
episod

episode: 345   score: 147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: 265.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: 250.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: 274.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: 220.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: 324.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: 106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: 244.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: 159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: 308.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 305.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 356   score: 131.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 444   score: 223.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 445   score: 274.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 446   score: 213.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 447   score: 230.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 448   score: 191.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 449   score: 175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: 182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: 252.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: 223.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: 255.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: 109.0   memory length: 10000   epsilon: 0.009998671593271896
epis

In [23]:
%%time

result_dict = {}

env_name = 'MountainCar-v0'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN_HER(o_space, a_space, lr, units=64, mem_len=10000)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            agent.add_her_experience(s, a, r, s_next, terminal)
            # every time step do the training
#             agent.modify_her_list(terminal_reward=terminal_r_dict[env_name])
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
        
        agent.modify_her_list(terminal_reward=terminal_r_dict[env_name])
#         agent.train_model()
#         agent.update_target_model()
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -200.0   memory length: 200   epsilon: 0.818648829478636
episode: 1   score: -200.0   memory length: 600   epsilon: 0.6701859060067403
episode: 2   score: -200.0   memory length: 1000   epsilon: 0.5486469074854965
episode: 3   score: -200.0   memory length: 1400   epsilon: 0.4491491486100748
episode: 4   score: -200.0   memory length: 1800   epsilon: 0.3676954247709635
episode: 5   score: -200.0   memory length: 2200   epsilon: 0.3010134290933992
episode: 6   score: -200.0   memory length: 2600   epsilon: 0.24642429138466176
episode: 7   score: -200.0   memory length: 3000   epsilon: 0.20173495769715546
episode: 8   score: -200.0   memory length: 3400   epsilon: 0.1651500869836984
episode: 9   score: -200.0   memory length: 3800   epsilon: 0.1351999253974994
episode: 10   score: -200.0   memory length: 4200   epsilon: 0.11068126067226178
episode: 11   score: -200.0   memory length: 4600   epsilon: 0.09060908449456685
episode: 12   score: -200.0   memory length: 5000

episode: 100   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 198   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -180.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -170.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 207   score: -177.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 208   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: -189.0   memory length: 10000   epsilon: 0.00999867159

episode: 296   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -185.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -186.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 306   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 307   score: -200.0   memory length: 10000   epsilon: 0.009998671593

episode: 394   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: -169.0   memory length: 10000   epsilon: 0.00999867159

episode: 492   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -200.0   memory length: 200   epsilon: 0.818648829478636
episode: 1   score: -200.0   memory length: 600   epsilon: 0.6701859060067403
episode: 2   score: -200.0   memory length: 1000   epsilon: 0.5486469074854965
episode: 3   score: -200.0   memory length: 1400   epsilon: 0.4491491486100748
episode: 4   sc

episode: 92   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896


episode: 190   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -177.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -166.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -196.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -181.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -198.0   memory length: 10000   epsilon: 0.00999867159

episode: 288   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -179.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -171.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -176.0   memory length: 10000   epsilon: 0.009998671593

episode: 386   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -141.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -137.0   memory length: 10000   epsilon: 0.00999867159

episode: 484   score: -124.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -176.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -165.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -146.0   memory length: 10000   epsilon: 0.00999867159

episode: 84   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -192.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 182   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -196.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -169.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -164.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -170.0   memory length: 10000   epsilon: 0.00999867159

episode: 280   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -199.0   memory length: 10000   epsilon: 0.00999867159

episode: 378   score: -167.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 379   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 380   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: -174.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -113.0   memory length: 10000   epsilon: 0.00999867159

episode: 476   score: -173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 478   score: -176.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 479   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 480   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: -194.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: -179.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -157.0   memory length: 10000   epsilon: 0.00999867159

episode: 76   score: -168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -196.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -191.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 80   score: -184.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 81   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 82   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 83   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 84   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 174   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: -125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 181   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 182   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -85.0   memory length: 10000   epsilon: 0.00999867159327

episode: 272   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: -193.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 276   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 277   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: -176.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: -136.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 370   score: -199.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 374   score: -143.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 375   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 376   score: -165.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 377   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 378   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 379   score: -145.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 380   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 468   score: -175.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: -162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: -163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: -171.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: -120.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 477   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 478   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 479   score: -132.0   memory length: 10000   epsilon: 0.00999867159

episode: 68   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 69   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 70   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 73   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 74   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 75   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 76   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
epis

episode: 166   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 167   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 168   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 169   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 170   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 171   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 172   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 173   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 264   score: -197.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: -173.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: -155.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 270   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 271   score: -150.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 272   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 273   score: -162.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 274   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 275   score: -200.0   memory length: 10000   epsilon: 0.00999867159

episode: 362   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 363   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: -151.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: -168.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: -115.0   memory length: 10000   epsilon: 0.009998671593271

episode: 460   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 461   score: -152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 462   score: -144.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 463   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 464   score: -154.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 465   score: -129.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 466   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: -158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: -148.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: -152.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: -142.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: -142.0   memory length: 10000   epsilon: 0.009998671593

In [24]:
%%time

result_dict = {}

env_name = 'LunarLander-v2'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN_HER(o_space, a_space, lr, units=128)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            agent.add_her_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
                
        agent.modify_her_list(terminal_reward=terminal_r_dict[env_name])
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -105.10736552005467   memory length: 119   epsilon: 0.887754942528593
episode: 1   score: -351.18928229945084   memory length: 340   epsilon: 0.8016280363938307
episode: 2   score: -320.4547150739593   memory length: 533   epsilon: 0.7318672718448656
episode: 3   score: -276.5803324447238   memory length: 716   epsilon: 0.6675091808180759
episode: 4   score: -222.4292513732804   memory length: 867   epsilon: 0.6292468477910546
episode: 5   score: -484.86224686586723   memory length: 1009   epsilon: 0.5791040088995179
episode: 6   score: -408.8505772868754   memory length: 1280   epsilon: 0.47980896453558947
episode: 7   score: -461.37865388156047   memory length: 1647   epsilon: 0.4011351786721449
episode: 8   score: -349.20057727831215   memory length: 1923   epsilon: 0.36403497277104113
episode: 9   score: -229.4242276304255   memory length: 2157   epsilon: 0.3174058999715789
episode: 10   score: -306.43943732166144   memory length: 2498   epsilon: 0.2588061506321

episode: 87   score: 212.16571058522067   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -145.75262346043172   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: 234.40016117718704   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: 18.964251365660516   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -61.31019001751572   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: 202.50612149415474   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -21.30568940712064   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: 98.70322798073776   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -119.31950460445628   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -90.07867670246871   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: 5.89300106181018   memory le

episode: 173   score: -183.21335451609082   memory length: 10000   epsilon: 0.009998671593271896
episode: 174   score: 181.74976131875627   memory length: 10000   epsilon: 0.009998671593271896
episode: 175   score: -135.34459071537896   memory length: 10000   epsilon: 0.009998671593271896
episode: 176   score: 236.8104072088414   memory length: 10000   epsilon: 0.009998671593271896
episode: 177   score: 47.13329387974821   memory length: 10000   epsilon: 0.009998671593271896
episode: 178   score: 195.12305490816132   memory length: 10000   epsilon: 0.009998671593271896
episode: 179   score: 234.80839938902037   memory length: 10000   epsilon: 0.009998671593271896
episode: 180   score: 197.6937158550723   memory length: 10000   epsilon: 0.009998671593271896
episode: 181   score: -70.56960788389962   memory length: 10000   epsilon: 0.009998671593271896
episode: 182   score: 246.70516788821323   memory length: 10000   epsilon: 0.009998671593271896
episode: 183   score: 12.47103276700578  

episode: 259   score: -51.02044597726547   memory length: 10000   epsilon: 0.009998671593271896
episode: 260   score: 182.60212990068788   memory length: 10000   epsilon: 0.009998671593271896
episode: 261   score: -116.98821998942414   memory length: 10000   epsilon: 0.009998671593271896
episode: 262   score: -209.7583788241551   memory length: 10000   epsilon: 0.009998671593271896
episode: 263   score: -108.96167235042417   memory length: 10000   epsilon: 0.009998671593271896
episode: 264   score: -177.44094619730845   memory length: 10000   epsilon: 0.009998671593271896
episode: 265   score: -212.26144080840447   memory length: 10000   epsilon: 0.009998671593271896
episode: 266   score: -63.34384126739114   memory length: 10000   epsilon: 0.009998671593271896
episode: 267   score: -24.13971260065455   memory length: 10000   epsilon: 0.009998671593271896
episode: 268   score: -122.66865316909848   memory length: 10000   epsilon: 0.009998671593271896
episode: 269   score: -146.62110763

episode: 345   score: 154.46064699381205   memory length: 10000   epsilon: 0.009998671593271896
episode: 346   score: -72.44206803298042   memory length: 10000   epsilon: 0.009998671593271896
episode: 347   score: 201.30908617357017   memory length: 10000   epsilon: 0.009998671593271896
episode: 348   score: -60.01442123862819   memory length: 10000   epsilon: 0.009998671593271896
episode: 349   score: 258.0045672414463   memory length: 10000   epsilon: 0.009998671593271896
episode: 350   score: 198.61119651944512   memory length: 10000   epsilon: 0.009998671593271896
episode: 351   score: 240.3337482905615   memory length: 10000   epsilon: 0.009998671593271896
episode: 352   score: 197.6216542270505   memory length: 10000   epsilon: 0.009998671593271896
episode: 353   score: 220.8256514065956   memory length: 10000   epsilon: 0.009998671593271896
episode: 354   score: 99.38763832044552   memory length: 10000   epsilon: 0.009998671593271896
episode: 355   score: 33.35713403599255   mem

episode: 431   score: 115.14130579430983   memory length: 10000   epsilon: 0.009998671593271896
episode: 432   score: -299.0201745922669   memory length: 10000   epsilon: 0.009998671593271896
episode: 433   score: -131.92647723632956   memory length: 10000   epsilon: 0.009998671593271896
episode: 434   score: -285.6418824623978   memory length: 10000   epsilon: 0.009998671593271896
episode: 435   score: -140.28474013842177   memory length: 10000   epsilon: 0.009998671593271896
episode: 436   score: -84.3208947877051   memory length: 10000   epsilon: 0.009998671593271896
episode: 437   score: -271.6516394619057   memory length: 10000   epsilon: 0.009998671593271896
episode: 438   score: -73.46332520296907   memory length: 10000   epsilon: 0.009998671593271896
episode: 439   score: -147.88167294070328   memory length: 10000   epsilon: 0.009998671593271896
episode: 440   score: 204.05679968995614   memory length: 10000   epsilon: 0.009998671593271896
episode: 441   score: 194.608318879739

episode: 18   score: -590.6121747812101   memory length: 6089   epsilon: 0.04426508598364348
episode: 19   score: -453.97126658570977   memory length: 6395   epsilon: 0.03760416009731858
episode: 20   score: -369.3526315288575   memory length: 6717   epsilon: 0.032073659986461445
episode: 21   score: -191.2391501762043   memory length: 7039   epsilon: 0.027247276679492435
episode: 22   score: -210.58099605874008   memory length: 7300   epsilon: 0.024702502271880755
episode: 23   score: -207.23250651189204   memory length: 7886   epsilon: 0.0151600000311334
episode: 24   score: -121.63728017304396   memory length: 8633   epsilon: 0.011699320882282814
episode: 25   score: -192.11141146954617   memory length: 9373   epsilon: 0.009998671593271896
episode: 26   score: -127.3829604898879   memory length: 10000   epsilon: 0.009998671593271896
episode: 27   score: 144.13587995464036   memory length: 10000   epsilon: 0.009998671593271896
episode: 28   score: -269.96804134348054   memory length:

episode: 105   score: -86.23807769355977   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -338.2413097762286   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -263.61578269507976   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: -240.42968861975203   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: -80.60513376960566   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -129.2899147783348   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -86.96872467711579   memory length: 10000   epsilon: 0.009998671593271896
episode: 112   score: -278.8594393320086   memory length: 10000   epsilon: 0.009998671593271896
episode: 113   score: -235.3406901627934   memory length: 10000   epsilon: 0.009998671593271896
episode: 114   score: -265.731851320637   memory length: 10000   epsilon: 0.009998671593271896
episode: 115   score: -309.501260260090

episode: 191   score: 181.38062389016807   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -104.71092508312826   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -47.21816697961145   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -34.78623927980345   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -36.330817819763766   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -30.72633677404609   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -282.0623680502065   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: 191.60947177766846   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -160.8570397631622   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -178.1696023074153   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -71.091985294414

episode: 277   score: -182.12091257238507   memory length: 10000   epsilon: 0.009998671593271896
episode: 278   score: -214.44928590392846   memory length: 10000   epsilon: 0.009998671593271896
episode: 279   score: -50.386332330116346   memory length: 10000   epsilon: 0.009998671593271896
episode: 280   score: -139.88206526291935   memory length: 10000   epsilon: 0.009998671593271896
episode: 281   score: 181.72590666861373   memory length: 10000   epsilon: 0.009998671593271896
episode: 282   score: 260.80746257585156   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -291.8288019186982   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: -78.21887270920972   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: 277.346958469857   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -42.58855297747354   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -24.101373597215

episode: 363   score: -38.92950153004561   memory length: 10000   epsilon: 0.009998671593271896
episode: 364   score: -21.829927595265033   memory length: 10000   epsilon: 0.009998671593271896
episode: 365   score: 71.09511734809264   memory length: 10000   epsilon: 0.009998671593271896
episode: 366   score: 5.733822703434137   memory length: 10000   epsilon: 0.009998671593271896
episode: 367   score: 6.165859247479964   memory length: 10000   epsilon: 0.009998671593271896
episode: 368   score: 73.42540644888201   memory length: 10000   epsilon: 0.009998671593271896
episode: 369   score: 52.8288552283554   memory length: 10000   epsilon: 0.009998671593271896
episode: 370   score: 213.59716765552895   memory length: 10000   epsilon: 0.009998671593271896
episode: 371   score: 86.78735175162554   memory length: 10000   epsilon: 0.009998671593271896
episode: 372   score: 252.3384008558927   memory length: 10000   epsilon: 0.009998671593271896
episode: 373   score: 199.6522549161657   memor

episode: 449   score: 206.88356786980984   memory length: 10000   epsilon: 0.009998671593271896
episode: 450   score: 5.836419860832148   memory length: 10000   epsilon: 0.009998671593271896
episode: 451   score: 73.47834073708538   memory length: 10000   epsilon: 0.009998671593271896
episode: 452   score: 22.030621396991098   memory length: 10000   epsilon: 0.009998671593271896
episode: 453   score: -11.866476298129482   memory length: 10000   epsilon: 0.009998671593271896
episode: 454   score: 238.4122429423849   memory length: 10000   epsilon: 0.009998671593271896
episode: 455   score: -299.4520277828668   memory length: 10000   epsilon: 0.009998671593271896
episode: 456   score: 104.21546605179226   memory length: 10000   epsilon: 0.009998671593271896
episode: 457   score: 88.8929846988103   memory length: 10000   epsilon: 0.009998671593271896
episode: 458   score: -367.63854595344566   memory length: 10000   epsilon: 0.009998671593271896
episode: 459   score: 56.918284233921575   

episode: 36   score: 0.6447229660848119   memory length: 10000   epsilon: 0.009998671593271896
episode: 37   score: -19.989701183149204   memory length: 10000   epsilon: 0.009998671593271896
episode: 38   score: -30.019169557338095   memory length: 10000   epsilon: 0.009998671593271896
episode: 39   score: -299.1065835606257   memory length: 10000   epsilon: 0.009998671593271896
episode: 40   score: -18.907673616621167   memory length: 10000   epsilon: 0.009998671593271896
episode: 41   score: 272.94908642890147   memory length: 10000   epsilon: 0.009998671593271896
episode: 42   score: 13.565212426669206   memory length: 10000   epsilon: 0.009998671593271896
episode: 43   score: -68.54347934249544   memory length: 10000   epsilon: 0.009998671593271896
episode: 44   score: 48.66230753776976   memory length: 10000   epsilon: 0.009998671593271896
episode: 45   score: -69.42230389441103   memory length: 10000   epsilon: 0.009998671593271896
episode: 46   score: -153.44260398613056   memor

episode: 122   score: 217.2228877098396   memory length: 10000   epsilon: 0.009998671593271896
episode: 123   score: 148.5896067068219   memory length: 10000   epsilon: 0.009998671593271896
episode: 124   score: 190.36839234945973   memory length: 10000   epsilon: 0.009998671593271896
episode: 125   score: -25.486363028712773   memory length: 10000   epsilon: 0.009998671593271896
episode: 126   score: -93.30359947288544   memory length: 10000   epsilon: 0.009998671593271896
episode: 127   score: 108.06811701795212   memory length: 10000   epsilon: 0.009998671593271896
episode: 128   score: 49.78696901734463   memory length: 10000   epsilon: 0.009998671593271896
episode: 129   score: 5.808190535690191   memory length: 10000   epsilon: 0.009998671593271896
episode: 130   score: 81.2188527138278   memory length: 10000   epsilon: 0.009998671593271896
episode: 131   score: 191.57536854505884   memory length: 10000   epsilon: 0.009998671593271896
episode: 132   score: -87.3525501421381   mem

episode: 208   score: -167.15553097596552   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: 189.1479256142402   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: 212.77559392368906   memory length: 10000   epsilon: 0.009998671593271896
episode: 211   score: 229.4775403929375   memory length: 10000   epsilon: 0.009998671593271896
episode: 212   score: 57.66915784829638   memory length: 10000   epsilon: 0.009998671593271896
episode: 213   score: -137.2617223325024   memory length: 10000   epsilon: 0.009998671593271896
episode: 214   score: 249.54494873805663   memory length: 10000   epsilon: 0.009998671593271896
episode: 215   score: 22.58750837715191   memory length: 10000   epsilon: 0.009998671593271896
episode: 216   score: 28.919111328485357   memory length: 10000   epsilon: 0.009998671593271896
episode: 217   score: 30.759685803665715   memory length: 10000   epsilon: 0.009998671593271896
episode: 218   score: 41.009605403315064   

episode: 294   score: -52.91269867471492   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -109.04943343252498   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -56.35538286086634   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: 39.28256975245043   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -95.74316798156794   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -342.7614921387884   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -187.04546276984348   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -70.77453993875508   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -55.81792141393393   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: 26.626220468742815   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -489.879485843117

episode: 380   score: 72.51980582092571   memory length: 10000   epsilon: 0.009998671593271896
episode: 381   score: -22.142478918864118   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: -5.75782457898056   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: 237.51444880222638   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: 235.62074166725705   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: -144.58483864048333   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: 251.37737169832315   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: 29.452949114863884   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: 256.9974971332983   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: 119.0581482549285   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: 138.5599082814219   

episode: 466   score: 16.321667886658773   memory length: 10000   epsilon: 0.009998671593271896
episode: 467   score: 259.9985256860973   memory length: 10000   epsilon: 0.009998671593271896
episode: 468   score: 108.62224825523008   memory length: 10000   epsilon: 0.009998671593271896
episode: 469   score: 243.9098925126096   memory length: 10000   epsilon: 0.009998671593271896
episode: 470   score: -51.39461557143474   memory length: 10000   epsilon: 0.009998671593271896
episode: 471   score: 267.24809983061607   memory length: 10000   epsilon: 0.009998671593271896
episode: 472   score: 181.03968489783063   memory length: 10000   epsilon: 0.009998671593271896
episode: 473   score: 239.22848186818877   memory length: 10000   epsilon: 0.009998671593271896
episode: 474   score: 96.8597271494785   memory length: 10000   epsilon: 0.009998671593271896
episode: 475   score: 91.35678111261534   memory length: 10000   epsilon: 0.009998671593271896
episode: 476   score: 155.28683943030032   me

episode: 53   score: -128.15251704434738   memory length: 10000   epsilon: 0.009998671593271896
episode: 54   score: -137.96734479055758   memory length: 10000   epsilon: 0.009998671593271896
episode: 55   score: -302.7336471759595   memory length: 10000   epsilon: 0.009998671593271896
episode: 56   score: -142.3868655499458   memory length: 10000   epsilon: 0.009998671593271896
episode: 57   score: -139.46947855171015   memory length: 10000   epsilon: 0.009998671593271896
episode: 58   score: -118.5942233801817   memory length: 10000   epsilon: 0.009998671593271896
episode: 59   score: -111.08190010163177   memory length: 10000   epsilon: 0.009998671593271896
episode: 60   score: -140.39611078145415   memory length: 10000   epsilon: 0.009998671593271896
episode: 61   score: -118.79721228624611   memory length: 10000   epsilon: 0.009998671593271896
episode: 62   score: -141.90387188473693   memory length: 10000   epsilon: 0.009998671593271896
episode: 63   score: -51.51248997042072   m

episode: 139   score: -19.618497869571556   memory length: 10000   epsilon: 0.009998671593271896
episode: 140   score: 28.346034339392332   memory length: 10000   epsilon: 0.009998671593271896
episode: 141   score: -73.38160323110944   memory length: 10000   epsilon: 0.009998671593271896
episode: 142   score: 151.60188319452618   memory length: 10000   epsilon: 0.009998671593271896
episode: 143   score: 40.04090843674483   memory length: 10000   epsilon: 0.009998671593271896
episode: 144   score: 281.34627876058073   memory length: 10000   epsilon: 0.009998671593271896
episode: 145   score: -65.95694712774426   memory length: 10000   epsilon: 0.009998671593271896
episode: 146   score: -42.57321173612467   memory length: 10000   epsilon: 0.009998671593271896
episode: 147   score: -89.99866834578454   memory length: 10000   epsilon: 0.009998671593271896
episode: 148   score: -67.12843327449198   memory length: 10000   epsilon: 0.009998671593271896
episode: 149   score: -104.9535659609241

episode: 225   score: 225.21970153420654   memory length: 10000   epsilon: 0.009998671593271896
episode: 226   score: -318.10286043768895   memory length: 10000   epsilon: 0.009998671593271896
episode: 227   score: 187.74151861525252   memory length: 10000   epsilon: 0.009998671593271896
episode: 228   score: 199.38873289053612   memory length: 10000   epsilon: 0.009998671593271896
episode: 229   score: 206.86123412118476   memory length: 10000   epsilon: 0.009998671593271896
episode: 230   score: 264.2160955829393   memory length: 10000   epsilon: 0.009998671593271896
episode: 231   score: 210.42904706768718   memory length: 10000   epsilon: 0.009998671593271896
episode: 232   score: -320.76476345329235   memory length: 10000   epsilon: 0.009998671593271896
episode: 233   score: 132.37798418378915   memory length: 10000   epsilon: 0.009998671593271896
episode: 234   score: 81.18481323339769   memory length: 10000   epsilon: 0.009998671593271896
episode: 235   score: 78.37189182609129 

episode: 311   score: 42.55969228322824   memory length: 10000   epsilon: 0.009998671593271896
episode: 312   score: 46.70713398218541   memory length: 10000   epsilon: 0.009998671593271896
episode: 313   score: 235.8292141226923   memory length: 10000   epsilon: 0.009998671593271896
episode: 314   score: 95.96669888100989   memory length: 10000   epsilon: 0.009998671593271896
episode: 315   score: 129.29428985410487   memory length: 10000   epsilon: 0.009998671593271896
episode: 316   score: -42.0901678395729   memory length: 10000   epsilon: 0.009998671593271896
episode: 317   score: 174.75128686346918   memory length: 10000   epsilon: 0.009998671593271896
episode: 318   score: 202.64271149828426   memory length: 10000   epsilon: 0.009998671593271896
episode: 319   score: 155.58031868040393   memory length: 10000   epsilon: 0.009998671593271896
episode: 320   score: 220.9602361558879   memory length: 10000   epsilon: 0.009998671593271896
episode: 321   score: -229.29898888324337   me

episode: 397   score: 25.97434108045654   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: 229.27290313550503   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: 152.4209721672143   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: 212.8952884428842   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: 227.28540699402072   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: 24.34132461529336   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: 51.52441905050703   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: 89.32541625484716   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: 148.15997183521117   memory length: 10000   epsilon: 0.009998671593271896
episode: 406   score: 104.01217736469022   memory length: 10000   epsilon: 0.009998671593271896
episode: 407   score: 176.56310088389344   mem

episode: 483   score: -42.48553979211684   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: 247.02009429781535   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -87.91714013281126   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -122.89938754473701   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -85.50872858108056   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -103.63294744611935   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: 119.53589128494716   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -47.88119334511538   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -163.09466582063808   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: 164.1085628559468   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -153.57588467028

episode: 70   score: -40.87848283378068   memory length: 10000   epsilon: 0.009998671593271896
episode: 71   score: -40.38024468242108   memory length: 10000   epsilon: 0.009998671593271896
episode: 72   score: -68.33402902673976   memory length: 10000   epsilon: 0.009998671593271896
episode: 73   score: -83.99650475179119   memory length: 10000   epsilon: 0.009998671593271896
episode: 74   score: -64.30846932655432   memory length: 10000   epsilon: 0.009998671593271896
episode: 75   score: -73.50535799574331   memory length: 10000   epsilon: 0.009998671593271896
episode: 76   score: -48.529310032783194   memory length: 10000   epsilon: 0.009998671593271896
episode: 77   score: -6.769506643242919   memory length: 10000   epsilon: 0.009998671593271896
episode: 78   score: -81.41727321958304   memory length: 10000   epsilon: 0.009998671593271896
episode: 79   score: -34.31849298335522   memory length: 10000   epsilon: 0.009998671593271896
episode: 80   score: -49.93176207920343   memory 

episode: 156   score: -234.95440402255002   memory length: 10000   epsilon: 0.009998671593271896
episode: 157   score: -55.534419689456556   memory length: 10000   epsilon: 0.009998671593271896
episode: 158   score: -93.0799960786513   memory length: 10000   epsilon: 0.009998671593271896
episode: 159   score: -54.544657362745326   memory length: 10000   epsilon: 0.009998671593271896
episode: 160   score: -81.52681074709292   memory length: 10000   epsilon: 0.009998671593271896
episode: 161   score: -38.874726400350006   memory length: 10000   epsilon: 0.009998671593271896
episode: 162   score: -305.8327237680066   memory length: 10000   epsilon: 0.009998671593271896
episode: 163   score: -33.07809464002327   memory length: 10000   epsilon: 0.009998671593271896
episode: 164   score: -199.67943510709603   memory length: 10000   epsilon: 0.009998671593271896
episode: 165   score: 10.089384690210878   memory length: 10000   epsilon: 0.009998671593271896
episode: 166   score: -130.079610003

episode: 242   score: 175.89627396756987   memory length: 10000   epsilon: 0.009998671593271896
episode: 243   score: 128.50855413661395   memory length: 10000   epsilon: 0.009998671593271896
episode: 244   score: -109.6597432706887   memory length: 10000   epsilon: 0.009998671593271896
episode: 245   score: -42.76482229098012   memory length: 10000   epsilon: 0.009998671593271896
episode: 246   score: -47.29430031864352   memory length: 10000   epsilon: 0.009998671593271896
episode: 247   score: -39.8010982846091   memory length: 10000   epsilon: 0.009998671593271896
episode: 248   score: -238.91262955381956   memory length: 10000   epsilon: 0.009998671593271896
episode: 249   score: -43.1871823948062   memory length: 10000   epsilon: 0.009998671593271896
episode: 250   score: -32.71374999149333   memory length: 10000   epsilon: 0.009998671593271896
episode: 251   score: -41.33713187804051   memory length: 10000   epsilon: 0.009998671593271896
episode: 252   score: 73.61964925640085  

episode: 328   score: -38.86669990519585   memory length: 10000   epsilon: 0.009998671593271896
episode: 329   score: -216.75154912876332   memory length: 10000   epsilon: 0.009998671593271896
episode: 330   score: 254.95519731121254   memory length: 10000   epsilon: 0.009998671593271896
episode: 331   score: 165.87104426727657   memory length: 10000   epsilon: 0.009998671593271896
episode: 332   score: 186.18020285520998   memory length: 10000   epsilon: 0.009998671593271896
episode: 333   score: -88.00843989075001   memory length: 10000   epsilon: 0.009998671593271896
episode: 334   score: 99.33462850636066   memory length: 10000   epsilon: 0.009998671593271896
episode: 335   score: 41.377851722584744   memory length: 10000   epsilon: 0.009998671593271896
episode: 336   score: 177.89777329943848   memory length: 10000   epsilon: 0.009998671593271896
episode: 337   score: -354.5372906056107   memory length: 10000   epsilon: 0.009998671593271896
episode: 338   score: -51.82837498705091

episode: 414   score: 44.558243271914506   memory length: 10000   epsilon: 0.009998671593271896
episode: 415   score: 236.20027525975925   memory length: 10000   epsilon: 0.009998671593271896
episode: 416   score: 202.51726756535163   memory length: 10000   epsilon: 0.009998671593271896
episode: 417   score: 231.75673407117952   memory length: 10000   epsilon: 0.009998671593271896
episode: 418   score: -246.78086539311488   memory length: 10000   epsilon: 0.009998671593271896
episode: 419   score: -84.83980524116724   memory length: 10000   epsilon: 0.009998671593271896
episode: 420   score: -3.492542286318283   memory length: 10000   epsilon: 0.009998671593271896
episode: 421   score: -56.14737588718812   memory length: 10000   epsilon: 0.009998671593271896
episode: 422   score: 183.50005735397514   memory length: 10000   epsilon: 0.009998671593271896
episode: 423   score: -176.26094424470773   memory length: 10000   epsilon: 0.009998671593271896
episode: 424   score: 233.750382201469

In [28]:
%%time

result_dict = {}

env_name = 'Acrobot-v1'

for seed in seeds:
    if seed not in result_dict.keys():
        result_dict[seed] = []
    
    env = gym.make(env_name)
    
    np.random.seed(seed)
    env.action_space.np_random.seed(seed)
    
    o_space = env.observation_space.shape[0]
    a_space = env.action_space.n
    agent = DQN_HER(o_space, a_space, lr, units=128)
    
    scores, episodes = [], []
    
    for num_episode in range(num_episodes):
        terminal = False
        score = 0
        s = env.reset()
        s = np.reshape(s, [1, o_space])

        while not terminal:

            a = agent.get_action(s)
            s_next, r, terminal, _ = env.step(a)
            s_next = np.reshape(s_next, [1, o_space])

            # if an action make the episode end, then gives penalty of -100

            # save the sample <s, a, r, s'> to the replay memory
            agent.add_experience(s, a, r, s_next, terminal)
            agent.add_her_experience(s, a, r, s_next, terminal)
            # every time step do the training
            agent.train_model()
            score += r
            s = s_next

            if terminal:
                agent.update_target_model()

                scores.append(score)
                episodes.append(num_episode)
                print("episode:", num_episode, "  score:", score, "  memory length:",
                      len(agent.memory), "  epsilon:", agent.epsilon)
                
        agent.modify_her_list(terminal_reward=terminal_r_dict[env_name])
    
    result_dict[seed] = scores
        
print(result_dict)

episode: 0   score: -363.0   memory length: 364   epsilon: 0.6947646516921667
episode: 1   score: -388.0   memory length: 1117   epsilon: 0.47077417846163544
episode: 2   score: -500.0   memory length: 2006   epsilon: 0.28546754960345794
episode: 3   score: -500.0   memory length: 3006   epsilon: 0.1731015115206527
episode: 4   score: -471.0   memory length: 3978   epsilon: 0.10794718036101436
episode: 5   score: -200.0   memory length: 4651   epsilon: 0.08828246201521552
episode: 6   score: -344.0   memory length: 5197   epsilon: 0.0625126448964628
episode: 7   score: -337.0   memory length: 5880   epsilon: 0.04457618473551992
episode: 8   score: -223.0   memory length: 6442   epsilon: 0.03562642604596987
episode: 9   score: -500.0   memory length: 7166   epsilon: 0.021603114634930247
episode: 10   score: -232.0   memory length: 7899   epsilon: 0.01711098958080135
episode: 11   score: -500.0   memory length: 8632   epsilon: 0.010375743807537072
episode: 12   score: -287.0   memory len

episode: 100   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -156.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -108.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -123.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -131.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 108   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 109   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 110   score: -260.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 111   score: -118.0   memory length: 10000   epsilon: 0.009998671593

episode: 199   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -200.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -189.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 207   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 208   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 209   score: -122.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 210   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 298   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -108.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 306   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 307   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 308   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 309   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896


episode: 397   score: -109.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -65.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -130.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 405   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 406   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 407   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 408   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 496   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -500.0   memory length: 500   epsilon: 0.6063789448611848
episode: 1   score: -500.0   memory length: 1500   epsilon: 0.3676954247709635
episode: 2   score: -500.0   memory length: 2500   epsilon: 0.22296276370290227
episode: 3   score: -500.0   memory length: 3500   epsilon: 0.1351999253974994
episode: 4   score: -500.0   memory length: 4500   epsilon: 0.08198238810784661
episode: 5   score: -186.0   memory length: 5187   epsilon: 0.06799341638648719
episode: 6   score: -358.0   memory length: 5733   epsilon: 0.047476329612410224
episode: 7   score: -312.0   memory length: 6405   epsilon: 0.03471162545881162
episode: 8   score: -500.0   me

episode: 96   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -147.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -163.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 104   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 105   score: -500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 106   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 107   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 195   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -78.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 203   score: -182.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 204   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 205   score: -64.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 206   score: -189.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 294   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 302   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 303   score: -138.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 304   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 305   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 393   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -132.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 401   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 402   score: -259.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 403   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 404   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896


episode: 492   score: -159.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 0   score: -500.0   memory length: 500   epsilon: 0.6063789448611848
episode: 1   score: -188.0   memory length: 1189   epsilon: 0.5019048446041944
episode: 2   score: -268.0   memory length: 1647   epsilon: 0.38347558663089293
episode: 3   score: -353.0   memory length: 2270   epsilon: 0.26910424739696437
episode: 4   sco

episode: 92   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -133.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -160.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -158.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100   score: -106.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 101   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 102   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 103   score: -117.0   memory length: 10000   epsilon: 0.009998671593271896
ep

episode: 191   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -134.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -466.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 199   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 200   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 201   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 202   score: -103.0   memory length: 10000   epsilon: 0.009998671593271896


episode: 290   score: -134.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 298   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 299   score: -102.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 300   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 301   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 389   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -137.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 397   score: -61.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 398   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 399   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 400   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 488   score: -91.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -68.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 496   score: -68.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 497   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 498   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 499   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 88   score: -180.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -157.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 96   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 97   score: -275.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 98   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 99   score: -88.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 100

episode: 187   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -127.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -208.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -93.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -69.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 195   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 196   score: -115.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 197   score: -77.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 198   score: -87.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 286   score: -90.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -75.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -66.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -68.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 294   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 295   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 296   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 297   score: -140.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 385   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -135.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -146.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -96.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 393   score: -125.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 394   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 395   score: -112.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 396   score: -83.0   memory length: 10000   epsilon: 0.00999867159327189

episode: 484   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -81.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -124.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -79.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 492   score: -98.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 493   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 494   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 495   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896

episode: 84   score: -97.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 85   score: -101.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 86   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 87   score: -153.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 88   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 89   score: -110.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 90   score: -113.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 91   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 92   score: -139.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 93   score: -500.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 94   score: -149.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 95   score: -114.0   memory length: 10000   epsilon: 0.009998671593271896
episo

episode: 183   score: -63.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 184   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 185   score: -70.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 186   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 187   score: -161.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 188   score: -80.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 189   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 190   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 191   score: -99.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 192   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 193   score: -83.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 194   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 282   score: -104.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 283   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 284   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 285   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 286   score: -121.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 287   score: -107.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 288   score: -240.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 289   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 290   score: -126.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 291   score: -116.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 292   score: -111.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 293   score: -102.0   memory length: 10000   epsilon: 0.0099986715932

episode: 381   score: -62.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 382   score: -128.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 383   score: -86.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 384   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 385   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 386   score: -118.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 387   score: -95.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 388   score: -74.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 389   score: -76.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 390   score: -92.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 391   score: -85.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 392   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
e

episode: 480   score: -130.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 481   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 482   score: -89.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 483   score: -71.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 484   score: -72.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 485   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 486   score: -100.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 487   score: -84.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 488   score: -94.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 489   score: -105.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 490   score: -82.0   memory length: 10000   epsilon: 0.009998671593271896
episode: 491   score: -119.0   memory length: 10000   epsilon: 0.009998671593271896
