In [1]:
import gym
import pylab
import random
import numpy as np
from collections import deque
import tflearn

  return f(*args, **kwds)


## Task: fill empty spaces in the following agent code

In [3]:
class DeepQAgent:
    def __init__(self, state_size, action_size, render=True):
        # Tip: if you are training this on AWS the best way is to turn off rendering
        # and load it later with the serialized model
        self.render = render
        self.state_size = state_size
        self.action_size = action_size

        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.005
        self.epsilon_decay = (self.epsilon - self.epsilon_min) / 50000
        self.batch_size = 64
        self.train_start = 1000
        # replay memory
        self.memory = deque(maxlen=20000)

        self.model = self.build_model()
        self.target_model = self.build_model()
        self.update_target_model()

    def build_model(self):
        # Use tflearn to get simple NN for deep q-learning
        # Spoler alert: a couple of fully connected hidden layers should be enough
        # Output layer should have the same dimensionality as the action space

        inputs = Input(shape=(self.state_size,))
        net = Dense(64, activation='relu')(inputs)
        net = Dense(64, activation='relu')(net)
        preds = Dense(self.action_size, activation='linear')(net)
        model = Model(inputs=inputs, outputs=preds)
        
        opt = Adam(lr=self.learning_rate)
#         model.compile(optimizer=opt, loss='hinge')
        model.compile(optimizer=opt, loss='mean_squared_error')

        return model

    def update_target_model(self):
        """Update your target model to the model you are currently learning at regular time intervals"""
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, state):
        """The choice of action uses the epsilon-greedy policy for the current network."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model.predict(state)
            return np.argmax(q_value[0])

    def replay_memory(self, state, action, reward, next_state, done):
        """Save <s, a, r, s'> to replay_memory"""
        if action == 2:
            action = 1
        self.memory.append([state, action, reward, next_state, done])
        if self.epsilon > self.epsilon_min:
            self.epsilon -= self.epsilon_decay

    def train_replay(self):
        """Random samplin            if action == 0:
                fake_action = 0
            elif action == 1:
                fake_action = 2g of batch_size samples from replay memory"""
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.action_size))

        for i in range(batch_size):
            state, action, reward, next_state, done = mini_batch[i]
            target = self.model.predict(state)[0]

            # As in queuing, it gets the maximum Q Value at s'. However, it is imported from the target model.
            target[action] = reward
            
#             if done:
#                 target[action] = reward
#             else:
#                 target[action] = reward + self.discount_factor * \
#                                           np.amax(self.target_model.predict(next_state)[0])
            update_input[i] = state
            update_target[i] = target

        # You can create a minibatch of the correct target answer and the current value of your own,
        self.model.fit(update_input, update_target, batch_size=batch_size, epochs=1, verbose=0)

    def load_model(self, name):
        self.model.load_model(name)

    def save_model(self, name):
        self.model.save(name)

In [4]:
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0] # should be equal 2
ACTION_SIZE = 2
agent = DeepQAgent(state_size, ACTION_SIZE)
# agent.load_model("./save_model/<your_saved_model_name>")
scores, episodes = [], []
N_EPISODES = 4000

In [5]:
for e in range(N_EPISODES):
    rewards = []
    done = False
    score = 0
    state = env.reset()
    state = np.reshape(state, [1, state_size])

    # Action 0 (left), 1 (do nothing), 3 (declare fake_action to avoid doing nothing
    fake_action = 0

    # Counter for the same action 4 times
    action_count = 0

    counter = 0
    
    if e < 100:
        window = 8
    elif e < 150:
        window = 6
    elif e < 250: 
        window = 4
    elif e < 500:
        window = 2
    else:
        window = 1  
        
    while not done:
        if agent.render:
            env.render()

        # Select an action in the current state and proceed to a step
        action_count = action_count + 1
        
        if action_count == window:
            action = agent.get_action(state)
            action_count = 0

            if action == 0:
                fake_action = 0
            elif action == 1:
                fake_action = 2

        # Take 1 step with the selected action
        next_state, reward, done, info = env.step(fake_action)   
        rewards.append(reward)
        next_state = np.reshape(next_state, [1, state_size])
        # Give a penalty of -100 for actions that end an episode
        # reward = reward if not done else -100

        # Save <s, a, r, s'> to replay memory
        agent.replay_memory(state, fake_action, reward, next_state, done)
        # Continue to learn every time step
        agent.train_replay()
        score += reward
        state = next_state

        if done:
            env.reset()
            # Copy the learning model for each episode to the target model
            agent.update_target_model()
            
            # For each episode, the time step where cartpole stood is plot
            scores.append(score)
            episodes.append(e)
            print('episode: {:4d}, score: {:.4f}, memory length: {:5d}, epsilon: {:.4f}'.format(e, score, len(agent.memory), agent.epsilon), end='\r')
            
            if score > -200:
                for i in range(1, counter):
                    agent.memory[-i][2] = 1.0 * 0.99 ** i
#                     agent.memory[-i][2] = 1.0
                print('episode: {:4d}, score: {:.4f}, memory length: {:5d}, epsilon: {:.4f}'.format(e, score, len(agent.memory), agent.epsilon))
    # Save model for every 50 episodes
        counter += 1

    if e % 50 == 0:
        agent.save_model("./save_model/ivanushka")

episode:   12, score: -140.0000, memory length:  2540, epsilon: 0.9495
episode:   18, score: -172.0000, memory length:  3712, epsilon: 0.9261
episode:   33, score: -186.0000, memory length:  6698, epsilon: 0.8667
episode:   37, score: -168.0000, memory length:  7466, epsilon: 0.8514
episode:   53, score: -87.0000, memory length: 10553, epsilon: 0.79007
episode:   60, score: -145.0000, memory length: 11898, epsilon: 0.7632
episode:   67, score: -179.0000, memory length: 13277, epsilon: 0.7358
episode:   73, score: -170.0000, memory length: 14447, epsilon: 0.7125
episode:   76, score: -167.0000, memory length: 15014, epsilon: 0.7012
episode:   80, score: -152.0000, memory length: 15766, epsilon: 0.6863
episode:   85, score: -189.0000, memory length: 16755, epsilon: 0.6666
episode:   91, score: -148.0000, memory length: 17903, epsilon: 0.6437
episode:  105, score: -173.0000, memory length: 20000, epsilon: 0.5885
episode:  108, score: -171.0000, memory length: 20000, epsilon: 0.5772
episod

episode:  250, score: -92.0000, memory length: 20000, epsilon: 0.0905
episode:  251, score: -121.0000, memory length: 20000, epsilon: 0.0881
episode:  252, score: -171.0000, memory length: 20000, epsilon: 0.0847
episode:  253, score: -166.0000, memory length: 20000, epsilon: 0.0814
episode:  254, score: -84.0000, memory length: 20000, epsilon: 0.0797
episode:  255, score: -176.0000, memory length: 20000, epsilon: 0.0762
episode:  256, score: -164.0000, memory length: 20000, epsilon: 0.0729
episode:  257, score: -179.0000, memory length: 20000, epsilon: 0.0694
episode:  258, score: -158.0000, memory length: 20000, epsilon: 0.0662
episode:  259, score: -164.0000, memory length: 20000, epsilon: 0.0630
episode:  260, score: -83.0000, memory length: 20000, epsilon: 0.0613
episode:  261, score: -161.0000, memory length: 20000, epsilon: 0.0581
episode:  262, score: -96.0000, memory length: 20000, epsilon: 0.0562
episode:  263, score: -174.0000, memory length: 20000, epsilon: 0.0527
episode:  

episode:  367, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  368, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode:  369, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode:  370, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode:  371, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode:  372, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode:  373, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode:  375, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode:  376, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode:  377, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode:  378, score: -123.0000, memory length: 20000, epsilon: 0.0050
episode:  379, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode:  380, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode:  381, score: -130.0000, memory length: 20000, epsilon: 0.0050
episod

episode:  488, score: -123.0000, memory length: 20000, epsilon: 0.0050
episode:  489, score: -124.0000, memory length: 20000, epsilon: 0.0050
episode:  490, score: -92.0000, memory length: 20000, epsilon: 0.0050
episode:  491, score: -165.0000, memory length: 20000, epsilon: 0.0050
episode:  492, score: -130.0000, memory length: 20000, epsilon: 0.0050
episode:  493, score: -128.0000, memory length: 20000, epsilon: 0.0050
episode:  494, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode:  495, score: -161.0000, memory length: 20000, epsilon: 0.0050
episode:  496, score: -176.0000, memory length: 20000, epsilon: 0.0050
episode:  497, score: -163.0000, memory length: 20000, epsilon: 0.0050
episode:  498, score: -185.0000, memory length: 20000, epsilon: 0.0050
episode:  499, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode:  500, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode:  501, score: -133.0000, memory length: 20000, epsilon: 0.0050
episode

episode:  607, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode:  608, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  609, score: -101.0000, memory length: 20000, epsilon: 0.0050
episode:  610, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode:  611, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  612, score: -88.0000, memory length: 20000, epsilon: 0.0050
episode:  613, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  614, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  615, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  616, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode:  617, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  618, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  619, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode:  620, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode

episode:  723, score: -100.0000, memory length: 20000, epsilon: 0.0050
episode:  724, score: -86.0000, memory length: 20000, epsilon: 0.0050
episode:  725, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  726, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode:  727, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  728, score: -150.0000, memory length: 20000, epsilon: 0.0050
episode:  729, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode:  730, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode:  731, score: -98.0000, memory length: 20000, epsilon: 0.0050
episode:  732, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  733, score: -100.0000, memory length: 20000, epsilon: 0.0050
episode:  734, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode:  735, score: -170.0000, memory length: 20000, epsilon: 0.0050
episode:  736, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode:

episode:  840, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode:  841, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode:  842, score: -90.0000, memory length: 20000, epsilon: 0.0050
episode:  843, score: -100.0000, memory length: 20000, epsilon: 0.0050
episode:  844, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  845, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode:  846, score: -145.0000, memory length: 20000, epsilon: 0.0050
episode:  847, score: -115.0000, memory length: 20000, epsilon: 0.0050
episode:  848, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode:  849, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode:  850, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode:  851, score: -150.0000, memory length: 20000, epsilon: 0.0050
episode:  852, score: -98.0000, memory length: 20000, epsilon: 0.0050
episode:  853, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:

episode:  959, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  960, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  961, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  962, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  963, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  964, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  965, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  966, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode:  967, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode:  968, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode:  969, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode:  970, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode:  971, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode:  972, score: -95.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 1076, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 1077, score: -154.0000, memory length: 20000, epsilon: 0.0050
episode: 1078, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode: 1079, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode: 1080, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 1081, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 1082, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode: 1083, score: -176.0000, memory length: 20000, epsilon: 0.0050
episode: 1084, score: -167.0000, memory length: 20000, epsilon: 0.0050
episode: 1085, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode: 1086, score: -153.0000, memory length: 20000, epsilon: 0.0050
episode: 1087, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 1088, score: -173.0000, memory length: 20000, epsilon: 0.0050
episode: 1089, score: -182.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 1194, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 1195, score: -154.0000, memory length: 20000, epsilon: 0.0050
episode: 1196, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 1197, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 1198, score: -155.0000, memory length: 20000, epsilon: 0.0050
episode: 1199, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 1200, score: -155.0000, memory length: 20000, epsilon: 0.0050
episode: 1201, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 1202, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 1203, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode: 1204, score: -153.0000, memory length: 20000, epsilon: 0.0050
episode: 1205, score: -144.0000, memory length: 20000, epsilon: 0.0050
episode: 1206, score: -124.0000, memory length: 20000, epsilon: 0.0050
episode: 1207, score: -103.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 1311, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode: 1312, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 1313, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 1314, score: -99.0000, memory length: 20000, epsilon: 0.0050
episode: 1315, score: -140.0000, memory length: 20000, epsilon: 0.0050
episode: 1316, score: -148.0000, memory length: 20000, epsilon: 0.0050
episode: 1317, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 1318, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 1319, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 1320, score: -136.0000, memory length: 20000, epsilon: 0.0050
episode: 1321, score: -136.0000, memory length: 20000, epsilon: 0.0050
episode: 1322, score: -152.0000, memory length: 20000, epsilon: 0.0050
episode: 1323, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 1324, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 1430, score: -125.0000, memory length: 20000, epsilon: 0.0050
episode: 1431, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 1432, score: -99.0000, memory length: 20000, epsilon: 0.0050
episode: 1433, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 1434, score: -148.0000, memory length: 20000, epsilon: 0.0050
episode: 1435, score: -101.0000, memory length: 20000, epsilon: 0.0050
episode: 1436, score: -139.0000, memory length: 20000, epsilon: 0.0050
episode: 1437, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 1438, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode: 1439, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 1440, score: -97.0000, memory length: 20000, epsilon: 0.0050
episode: 1441, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode: 1442, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 1443, score: -130.0000, memory length: 20000, epsilon: 0.0050
episode:

episode: 1546, score: -126.0000, memory length: 20000, epsilon: 0.0050
episode: 1547, score: -92.0000, memory length: 20000, epsilon: 0.0050
episode: 1548, score: -136.0000, memory length: 20000, epsilon: 0.0050
episode: 1549, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode: 1550, score: -89.0000, memory length: 20000, epsilon: 0.0050
episode: 1551, score: -135.0000, memory length: 20000, epsilon: 0.0050
episode: 1552, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode: 1553, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 1554, score: -132.0000, memory length: 20000, epsilon: 0.0050
episode: 1555, score: -135.0000, memory length: 20000, epsilon: 0.0050
episode: 1556, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode: 1557, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 1558, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 1559, score: -152.0000, memory length: 20000, epsilon: 0.0050
episode:

episode: 1666, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 1667, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 1668, score: -157.0000, memory length: 20000, epsilon: 0.0050
episode: 1669, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 1670, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 1671, score: -87.0000, memory length: 20000, epsilon: 0.0050
episode: 1672, score: -169.0000, memory length: 20000, epsilon: 0.0050
episode: 1673, score: -145.0000, memory length: 20000, epsilon: 0.0050
episode: 1674, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 1675, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 1676, score: -98.0000, memory length: 20000, epsilon: 0.0050
episode: 1677, score: -100.0000, memory length: 20000, epsilon: 0.0050
episode: 1678, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode: 1679, score: -162.0000, memory length: 20000, epsilon: 0.0050
episode:

episode: 1905, score: -173.0000, memory length: 20000, epsilon: 0.0050
episode: 1906, score: -155.0000, memory length: 20000, epsilon: 0.0050
episode: 1907, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 1908, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 1909, score: -158.0000, memory length: 20000, epsilon: 0.0050
episode: 1910, score: -144.0000, memory length: 20000, epsilon: 0.0050
episode: 1911, score: -121.0000, memory length: 20000, epsilon: 0.0050
episode: 1912, score: -146.0000, memory length: 20000, epsilon: 0.0050
episode: 1913, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode: 1914, score: -167.0000, memory length: 20000, epsilon: 0.0050
episode: 1915, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 1916, score: -90.0000, memory length: 20000, epsilon: 0.0050
episode: 1917, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 1918, score: -154.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 2024, score: -176.0000, memory length: 20000, epsilon: 0.0050
episode: 2025, score: -162.0000, memory length: 20000, epsilon: 0.0050
episode: 2026, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 2027, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 2028, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode: 2029, score: -89.0000, memory length: 20000, epsilon: 0.0050
episode: 2030, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2031, score: -139.0000, memory length: 20000, epsilon: 0.0050
episode: 2032, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 2033, score: -121.0000, memory length: 20000, epsilon: 0.0050
episode: 2034, score: -139.0000, memory length: 20000, epsilon: 0.0050
episode: 2035, score: -169.0000, memory length: 20000, epsilon: 0.0050
episode: 2036, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 2037, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 2140, score: -160.0000, memory length: 20000, epsilon: 0.0050
episode: 2141, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 2142, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 2143, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 2144, score: -89.0000, memory length: 20000, epsilon: 0.0050
episode: 2145, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 2146, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 2147, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 2148, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 2149, score: -115.0000, memory length: 20000, epsilon: 0.0050
episode: 2150, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2151, score: -115.0000, memory length: 20000, epsilon: 0.0050
episode: 2152, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 2153, score: -143.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 2260, score: -85.0000, memory length: 20000, epsilon: 0.0050
episode: 2261, score: -177.0000, memory length: 20000, epsilon: 0.0050
episode: 2262, score: -88.0000, memory length: 20000, epsilon: 0.0050
episode: 2263, score: -84.0000, memory length: 20000, epsilon: 0.0050
episode: 2264, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 2265, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 2266, score: -153.0000, memory length: 20000, epsilon: 0.0050
episode: 2267, score: -124.0000, memory length: 20000, epsilon: 0.0050
episode: 2268, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 2269, score: -144.0000, memory length: 20000, epsilon: 0.0050
episode: 2270, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 2271, score: -87.0000, memory length: 20000, epsilon: 0.0050
episode: 2272, score: -142.0000, memory length: 20000, epsilon: 0.0050
episode: 2273, score: -91.0000, memory length: 20000, epsilon: 0.0050
episode: 22

episode: 2376, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 2377, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 2378, score: -90.0000, memory length: 20000, epsilon: 0.0050
episode: 2379, score: -90.0000, memory length: 20000, epsilon: 0.0050
episode: 2380, score: -156.0000, memory length: 20000, epsilon: 0.0050
episode: 2381, score: -157.0000, memory length: 20000, epsilon: 0.0050
episode: 2382, score: -141.0000, memory length: 20000, epsilon: 0.0050
episode: 2383, score: -85.0000, memory length: 20000, epsilon: 0.0050
episode: 2384, score: -148.0000, memory length: 20000, epsilon: 0.0050
episode: 2385, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 2386, score: -144.0000, memory length: 20000, epsilon: 0.0050
episode: 2387, score: -85.0000, memory length: 20000, epsilon: 0.0050
episode: 2388, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 2389, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 2

episode: 2494, score: -87.0000, memory length: 20000, epsilon: 0.0050
episode: 2495, score: -196.0000, memory length: 20000, epsilon: 0.0050
episode: 2496, score: -133.0000, memory length: 20000, epsilon: 0.0050
episode: 2497, score: -164.0000, memory length: 20000, epsilon: 0.0050
episode: 2498, score: -128.0000, memory length: 20000, epsilon: 0.0050
episode: 2499, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 2500, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 2501, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 2502, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2503, score: -98.0000, memory length: 20000, epsilon: 0.0050
episode: 2504, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 2505, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 2506, score: -146.0000, memory length: 20000, epsilon: 0.0050
episode: 2507, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode:

episode: 2629, score: -124.0000, memory length: 20000, epsilon: 0.0050
episode: 2630, score: -130.0000, memory length: 20000, epsilon: 0.0050
episode: 2631, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode: 2632, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 2633, score: -191.0000, memory length: 20000, epsilon: 0.0050
episode: 2634, score: -182.0000, memory length: 20000, epsilon: 0.0050
episode: 2635, score: -176.0000, memory length: 20000, epsilon: 0.0050
episode: 2636, score: -99.0000, memory length: 20000, epsilon: 0.0050
episode: 2637, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2638, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 2639, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 2640, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 2641, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 2642, score: -182.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 2749, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 2750, score: -147.0000, memory length: 20000, epsilon: 0.0050
episode: 2751, score: -150.0000, memory length: 20000, epsilon: 0.0050
episode: 2752, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 2753, score: -146.0000, memory length: 20000, epsilon: 0.0050
episode: 2754, score: -144.0000, memory length: 20000, epsilon: 0.0050
episode: 2755, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 2756, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2757, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 2758, score: -128.0000, memory length: 20000, epsilon: 0.0050
episode: 2759, score: -153.0000, memory length: 20000, epsilon: 0.0050
episode: 2760, score: -121.0000, memory length: 20000, epsilon: 0.0050
episode: 2761, score: -139.0000, memory length: 20000, epsilon: 0.0050
episode: 2762, score: -141.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 2867, score: -119.0000, memory length: 20000, epsilon: 0.0050
episode: 2868, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 2869, score: -154.0000, memory length: 20000, epsilon: 0.0050
episode: 2870, score: -119.0000, memory length: 20000, epsilon: 0.0050
episode: 2871, score: -199.0000, memory length: 20000, epsilon: 0.0050
episode: 2872, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 2873, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 2874, score: -115.0000, memory length: 20000, epsilon: 0.0050
episode: 2875, score: -155.0000, memory length: 20000, epsilon: 0.0050
episode: 2876, score: -130.0000, memory length: 20000, epsilon: 0.0050
episode: 2877, score: -126.0000, memory length: 20000, epsilon: 0.0050
episode: 2878, score: -165.0000, memory length: 20000, epsilon: 0.0050
episode: 2879, score: -123.0000, memory length: 20000, epsilon: 0.0050
episode: 2880, score: -116.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 2984, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 2985, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 2986, score: -128.0000, memory length: 20000, epsilon: 0.0050
episode: 2987, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 2988, score: -134.0000, memory length: 20000, epsilon: 0.0050
episode: 2989, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 2990, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 2991, score: -117.0000, memory length: 20000, epsilon: 0.0050
episode: 2992, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 2993, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 2994, score: -135.0000, memory length: 20000, epsilon: 0.0050
episode: 2995, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 2996, score: -89.0000, memory length: 20000, epsilon: 0.0050
episode: 2997, score: -160.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 3100, score: -115.0000, memory length: 20000, epsilon: 0.0050
episode: 3101, score: -122.0000, memory length: 20000, epsilon: 0.0050
episode: 3102, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 3103, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode: 3104, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3105, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 3106, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 3107, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 3108, score: -119.0000, memory length: 20000, epsilon: 0.0050
episode: 3109, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3110, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3111, score: -102.0000, memory length: 20000, epsilon: 0.0050
episode: 3112, score: -104.0000, memory length: 20000, epsilon: 0.0050
episode: 3113, score: -106.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 3235, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 3236, score: -121.0000, memory length: 20000, epsilon: 0.0050
episode: 3237, score: -159.0000, memory length: 20000, epsilon: 0.0050
episode: 3238, score: -130.0000, memory length: 20000, epsilon: 0.0050
episode: 3239, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 3240, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 3241, score: -145.0000, memory length: 20000, epsilon: 0.0050
episode: 3242, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 3243, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 3244, score: -148.0000, memory length: 20000, epsilon: 0.0050
episode: 3246, score: -121.0000, memory length: 20000, epsilon: 0.0050
episode: 3247, score: -147.0000, memory length: 20000, epsilon: 0.0050
episode: 3248, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 3249, score: -166.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 3354, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3355, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 3356, score: -91.0000, memory length: 20000, epsilon: 0.0050
episode: 3357, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 3358, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3359, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 3360, score: -125.0000, memory length: 20000, epsilon: 0.0050
episode: 3361, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 3362, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 3363, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 3364, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 3365, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3366, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 3367, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 3475, score: -90.0000, memory length: 20000, epsilon: 0.0050
episode: 3476, score: -126.0000, memory length: 20000, epsilon: 0.0050
episode: 3477, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3478, score: -136.0000, memory length: 20000, epsilon: 0.0050
episode: 3479, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 3480, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 3481, score: -179.0000, memory length: 20000, epsilon: 0.0050
episode: 3482, score: -157.0000, memory length: 20000, epsilon: 0.0050
episode: 3483, score: -107.0000, memory length: 20000, epsilon: 0.0050
episode: 3484, score: -127.0000, memory length: 20000, epsilon: 0.0050
episode: 3485, score: -157.0000, memory length: 20000, epsilon: 0.0050
episode: 3486, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 3487, score: -126.0000, memory length: 20000, epsilon: 0.0050
episode: 3488, score: -139.0000, memory length: 20000, epsilon: 0.0050
episode

episode: 3592, score: -129.0000, memory length: 20000, epsilon: 0.0050
episode: 3593, score: -131.0000, memory length: 20000, epsilon: 0.0050
episode: 3594, score: -136.0000, memory length: 20000, epsilon: 0.0050
episode: 3595, score: -143.0000, memory length: 20000, epsilon: 0.0050
episode: 3596, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 3597, score: -127.0000, memory length: 20000, epsilon: 0.0050
episode: 3598, score: -125.0000, memory length: 20000, epsilon: 0.0050
episode: 3599, score: -145.0000, memory length: 20000, epsilon: 0.0050
episode: 3600, score: -162.0000, memory length: 20000, epsilon: 0.0050
episode: 3601, score: -105.0000, memory length: 20000, epsilon: 0.0050
episode: 3602, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode: 3603, score: -106.0000, memory length: 20000, epsilon: 0.0050
episode: 3604, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3605, score: -102.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 3709, score: -140.0000, memory length: 20000, epsilon: 0.0050
episode: 3710, score: -114.0000, memory length: 20000, epsilon: 0.0050
episode: 3711, score: -86.0000, memory length: 20000, epsilon: 0.0050
episode: 3712, score: -151.0000, memory length: 20000, epsilon: 0.0050
episode: 3713, score: -118.0000, memory length: 20000, epsilon: 0.0050
episode: 3714, score: -172.0000, memory length: 20000, epsilon: 0.0050
episode: 3715, score: -97.0000, memory length: 20000, epsilon: 0.0050
episode: 3716, score: -165.0000, memory length: 20000, epsilon: 0.0050
episode: 3717, score: -153.0000, memory length: 20000, epsilon: 0.0050
episode: 3718, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 3719, score: -179.0000, memory length: 20000, epsilon: 0.0050
episode: 3720, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode: 3721, score: -133.0000, memory length: 20000, epsilon: 0.0050
episode: 3722, score: -158.0000, memory length: 20000, epsilon: 0.0050
episode:

episode: 3825, score: -127.0000, memory length: 20000, epsilon: 0.0050
episode: 3826, score: -109.0000, memory length: 20000, epsilon: 0.0050
episode: 3827, score: -143.0000, memory length: 20000, epsilon: 0.0050
episode: 3828, score: -108.0000, memory length: 20000, epsilon: 0.0050
episode: 3829, score: -160.0000, memory length: 20000, epsilon: 0.0050
episode: 3830, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3831, score: -149.0000, memory length: 20000, epsilon: 0.0050
episode: 3834, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 3835, score: -138.0000, memory length: 20000, epsilon: 0.0050
episode: 3836, score: -127.0000, memory length: 20000, epsilon: 0.0050
episode: 3837, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 3839, score: -111.0000, memory length: 20000, epsilon: 0.0050
episode: 3841, score: -161.0000, memory length: 20000, epsilon: 0.0050
episode: 3842, score: -106.0000, memory length: 20000, epsilon: 0.0050
episod

episode: 3952, score: -142.0000, memory length: 20000, epsilon: 0.0050
episode: 3953, score: -162.0000, memory length: 20000, epsilon: 0.0050
episode: 3954, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3955, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 3956, score: -120.0000, memory length: 20000, epsilon: 0.0050
episode: 3957, score: -112.0000, memory length: 20000, epsilon: 0.0050
episode: 3958, score: -103.0000, memory length: 20000, epsilon: 0.0050
episode: 3959, score: -157.0000, memory length: 20000, epsilon: 0.0050
episode: 3960, score: -116.0000, memory length: 20000, epsilon: 0.0050
episode: 3961, score: -155.0000, memory length: 20000, epsilon: 0.0050
episode: 3962, score: -113.0000, memory length: 20000, epsilon: 0.0050
episode: 3963, score: -189.0000, memory length: 20000, epsilon: 0.0050
episode: 3964, score: -110.0000, memory length: 20000, epsilon: 0.0050
episode: 3965, score: -104.0000, memory length: 20000, epsilon: 0.0050
episod