https://www.youtube.com/watch?v=OYhFoMySoVs

In [1]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os # for creating directories

2022-11-01 13:38:57.855053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
env = gym.make('CartPole-v0') # initialise environment


In [20]:
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [21]:
state_size = env.observation_space.shape[0]
state_size

4

In [22]:
action_size = env.action_space.n
action_size

2

In [5]:
batch_size = 32

In [6]:
n_episodes = 1001 # n games we want agent to play (default 1001)


In [7]:
output_dir = 'model_output/cartpole/'


In [8]:

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [9]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) # double-ended queue; acts like list, but elements can be added/removed from either end
        self.gamma = 0.95 # decay or discount rate: enables agent to take into account future actions in addition to the immediate ones, but discounted at this rate
        self.epsilon = 1.0 # exploration rate: how much to act randomly; more initially than later due to epsilon decay
        self.epsilon_decay = 0.995 # decrease number of random explorations as the agent's performance (hopefully) improves over time
        self.epsilon_min = 0.01 # minimum amount of random exploration permitted
        self.learning_rate = 0.001 # rate at which NN adjusts models parameters via SGD to reduce cost 
        self.model = self._build_model() # private method 
    
    def _build_model(self):
        # neural net to approximate Q-value function:
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu')) # 1st hidden layer; states as input
        model.add(Dense(24, activation='relu')) # 2nd hidden layer
        model.add(Dense(self.action_size, activation='linear')) # 2 actions, so 2 output neurons: 0 and 1 (L/R)
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done)) # list of previous experiences, enabling re-training later

    def act(self, state):
        if np.random.rand() <= self.epsilon: # if acting randomly, take random action
            return random.randrange(self.action_size)
        act_values = self.model.predict(state) # if not acting randomly, predict reward value based on current state
        return np.argmax(act_values[0]) # pick the action that will give the highest reward (i.e., go left or right?)

    def replay(self, batch_size): # method that trains NN with experiences sampled from memory
        minibatch = random.sample(self.memory, batch_size) # sample a minibatch from memory
        for state, action, reward, next_state, done in minibatch: # extract data for each minibatch sample
            target = reward # if done (boolean whether game ended or not, i.e., whether final state or not), then target = reward
            if not done: # if not done, then predict future discounted reward
                target = (reward + self.gamma * # (target) = reward + (discount rate gamma) * 
                          np.amax(self.model.predict(next_state)[0])) # (maximum target Q based on future action a')
            target_f = self.model.predict(state) # approximately map current state to future discounted reward
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0) # single epoch of training with x=state, y=target_f; fit decreases loss btwn target_f and y_hat
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [10]:
agent = DQNAgent(state_size, action_size) # initialise agent

2022-11-01 13:39:08.254317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(name, **kwargs)


In [11]:
done = False
for e in range(n_episodes): # iterate over new episodes of the game
    state = env.reset() # reset state at start of each new episode of the game
    state = np.reshape(state, [1, state_size])
    
    for time in range(5000):  # time represents a frame of the game; goal is to keep pole upright as long as possible up to range, e.g., 500 or 5000 timesteps
#         env.render()
        action = agent.act(state) # action is either 0 or 1 (move cart left or right); decide on one or other here
        next_state, reward, done, _ = env.step(action) # agent interacts with env, gets feedback; 4 state data points, e.g., pole angle, cart position        
        reward = reward if not done else -10 # reward +1 for each additional frame with pole upright        
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done) # remember the previous timestep's state, actions, reward, etc.        
        state = next_state # set "current state" for upcoming iteration to the current next state        
        if done: # episode ends if agent drops pole or we reach timestep 5000
            print("episode: {}/{}, score: {}, e: {:.2}" # print the episode's score and agent's epsilon
                  .format(e, n_episodes, time, agent.epsilon))
            break # exit loop
    if len(agent.memory) > batch_size:
        agent.replay(batch_size) # train the agent by replaying the experiences of the episode
    if e % 50 == 0:
        agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")         

episode: 0/1001, score: 99, e: 1.0
episode: 1/1001, score: 13, e: 0.99
episode: 2/1001, score: 15, e: 0.99
episode: 3/1001, score: 24, e: 0.99
episode: 4/1001, score: 13, e: 0.98


episode: 5/1001, score: 21, e: 0.98
episode: 6/1001, score: 13, e: 0.97
episode: 7/1001, score: 23, e: 0.97
episode: 8/1001, score: 34, e: 0.96
episode: 9/1001, score: 34, e: 0.96


episode: 10/1001, score: 12, e: 0.95
episode: 11/1001, score: 21, e: 0.95
episode: 12/1001, score: 55, e: 0.94
episode: 13/1001, score: 16, e: 0.94
episode: 14/1001, score: 27, e: 0.93


episode: 15/1001, score: 11, e: 0.93
episode: 16/1001, score: 22, e: 0.92
episode: 17/1001, score: 40, e: 0.92
episode: 18/1001, score: 25, e: 0.91
episode: 19/1001, score: 10, e: 0.91


episode: 20/1001, score: 17, e: 0.9
episode: 21/1001, score: 37, e: 0.9
episode: 22/1001, score: 7, e: 0.9
episode: 23/1001, score: 11, e: 0.89
episode: 24/1001, score: 17, e: 0.89


episode: 25/1001, score: 14, e: 0.88
episode: 26/1001, score: 16, e: 0.88
episode: 27/1001, score: 34, e: 0.87
episode: 28/1001, score: 8, e: 0.87


episode: 29/1001, score: 11, e: 0.86
episode: 30/1001, score: 19, e: 0.86
episode: 31/1001, score: 41, e: 0.86
episode: 32/1001, score: 38, e: 0.85
episode: 33/1001, score: 18, e: 0.85


episode: 34/1001, score: 9, e: 0.84
episode: 35/1001, score: 13, e: 0.84
episode: 36/1001, score: 10, e: 0.83
episode: 37/1001, score: 10, e: 0.83
episode: 38/1001, score: 26, e: 0.83


episode: 39/1001, score: 18, e: 0.82
episode: 40/1001, score: 10, e: 0.82
episode: 41/1001, score: 45, e: 0.81
episode: 42/1001, score: 15, e: 0.81


episode: 43/1001, score: 18, e: 0.81
episode: 44/1001, score: 9, e: 0.8
episode: 45/1001, score: 30, e: 0.8
episode: 46/1001, score: 16, e: 0.79
episode: 47/1001, score: 18, e: 0.79


episode: 48/1001, score: 13, e: 0.79
episode: 49/1001, score: 34, e: 0.78
episode: 50/1001, score: 23, e: 0.78
episode: 51/1001, score: 14, e: 0.77
episode: 52/1001, score: 18, e: 0.77


episode: 53/1001, score: 9, e: 0.77
episode: 54/1001, score: 19, e: 0.76
episode: 55/1001, score: 9, e: 0.76
episode: 56/1001, score: 12, e: 0.76


episode: 57/1001, score: 18, e: 0.75
episode: 58/1001, score: 11, e: 0.75
episode: 59/1001, score: 15, e: 0.74
episode: 60/1001, score: 21, e: 0.74
episode: 61/1001, score: 24, e: 0.74


episode: 62/1001, score: 11, e: 0.73
episode: 63/1001, score: 15, e: 0.73
episode: 64/1001, score: 11, e: 0.73
episode: 65/1001, score: 8, e: 0.72
episode: 66/1001, score: 11, e: 0.72


episode: 67/1001, score: 9, e: 0.71
episode: 68/1001, score: 9, e: 0.71
episode: 69/1001, score: 9, e: 0.71
episode: 70/1001, score: 52, e: 0.7


episode: 71/1001, score: 16, e: 0.7
episode: 72/1001, score: 16, e: 0.7
episode: 73/1001, score: 11, e: 0.69
episode: 74/1001, score: 24, e: 0.69
episode: 75/1001, score: 26, e: 0.69


episode: 76/1001, score: 46, e: 0.68
episode: 77/1001, score: 9, e: 0.68
episode: 78/1001, score: 11, e: 0.68
episode: 79/1001, score: 24, e: 0.67


episode: 80/1001, score: 12, e: 0.67
episode: 81/1001, score: 24, e: 0.67
episode: 82/1001, score: 23, e: 0.66
episode: 83/1001, score: 11, e: 0.66
episode: 84/1001, score: 24, e: 0.66


episode: 85/1001, score: 14, e: 0.65
episode: 86/1001, score: 13, e: 0.65
episode: 87/1001, score: 10, e: 0.65
episode: 88/1001, score: 11, e: 0.64


episode: 89/1001, score: 27, e: 0.64
episode: 90/1001, score: 10, e: 0.64
episode: 91/1001, score: 11, e: 0.63
episode: 92/1001, score: 12, e: 0.63
episode: 93/1001, score: 25, e: 0.63


episode: 94/1001, score: 21, e: 0.62
episode: 95/1001, score: 19, e: 0.62
episode: 96/1001, score: 11, e: 0.62
episode: 97/1001, score: 24, e: 0.61


episode: 98/1001, score: 16, e: 0.61
episode: 99/1001, score: 11, e: 0.61
episode: 100/1001, score: 11, e: 0.61
episode: 101/1001, score: 10, e: 0.6
episode: 102/1001, score: 9, e: 0.6


episode: 103/1001, score: 8, e: 0.6
episode: 104/1001, score: 14, e: 0.59
episode: 105/1001, score: 11, e: 0.59
episode: 106/1001, score: 17, e: 0.59
episode: 107/1001, score: 14, e: 0.58


episode: 108/1001, score: 14, e: 0.58
episode: 109/1001, score: 10, e: 0.58
episode: 110/1001, score: 10, e: 0.58
episode: 111/1001, score: 15, e: 0.57


episode: 112/1001, score: 11, e: 0.57
episode: 113/1001, score: 11, e: 0.57
episode: 114/1001, score: 13, e: 0.56
episode: 115/1001, score: 9, e: 0.56
episode: 116/1001, score: 8, e: 0.56


episode: 117/1001, score: 8, e: 0.56
episode: 118/1001, score: 12, e: 0.55
episode: 119/1001, score: 10, e: 0.55
episode: 120/1001, score: 9, e: 0.55
episode: 121/1001, score: 19, e: 0.55


episode: 122/1001, score: 11, e: 0.54
episode: 123/1001, score: 14, e: 0.54
episode: 124/1001, score: 11, e: 0.54
episode: 125/1001, score: 11, e: 0.53


episode: 126/1001, score: 9, e: 0.53
episode: 127/1001, score: 13, e: 0.53
episode: 128/1001, score: 19, e: 0.53
episode: 129/1001, score: 8, e: 0.52
episode: 130/1001, score: 13, e: 0.52


episode: 131/1001, score: 12, e: 0.52
episode: 132/1001, score: 9, e: 0.52
episode: 133/1001, score: 14, e: 0.51
episode: 134/1001, score: 10, e: 0.51


episode: 135/1001, score: 9, e: 0.51
episode: 136/1001, score: 24, e: 0.51
episode: 137/1001, score: 9, e: 0.5
episode: 138/1001, score: 10, e: 0.5
episode: 139/1001, score: 10, e: 0.5


episode: 140/1001, score: 12, e: 0.5
episode: 141/1001, score: 11, e: 0.49
episode: 142/1001, score: 12, e: 0.49
episode: 143/1001, score: 15, e: 0.49


episode: 144/1001, score: 9, e: 0.49
episode: 145/1001, score: 21, e: 0.48
episode: 146/1001, score: 15, e: 0.48
episode: 147/1001, score: 10, e: 0.48
episode: 148/1001, score: 8, e: 0.48


episode: 149/1001, score: 11, e: 0.47
episode: 150/1001, score: 8, e: 0.47
episode: 151/1001, score: 10, e: 0.47
episode: 152/1001, score: 10, e: 0.47


episode: 153/1001, score: 10, e: 0.46
episode: 154/1001, score: 22, e: 0.46
episode: 155/1001, score: 10, e: 0.46
episode: 156/1001, score: 11, e: 0.46
episode: 157/1001, score: 13, e: 0.46


episode: 158/1001, score: 8, e: 0.45
episode: 159/1001, score: 9, e: 0.45
episode: 160/1001, score: 14, e: 0.45
episode: 161/1001, score: 9, e: 0.45


episode: 162/1001, score: 10, e: 0.44
episode: 163/1001, score: 10, e: 0.44
episode: 164/1001, score: 19, e: 0.44
episode: 165/1001, score: 9, e: 0.44
episode: 166/1001, score: 14, e: 0.44


episode: 167/1001, score: 10, e: 0.43
episode: 168/1001, score: 10, e: 0.43
episode: 169/1001, score: 11, e: 0.43
episode: 170/1001, score: 9, e: 0.43


episode: 171/1001, score: 22, e: 0.42
episode: 172/1001, score: 15, e: 0.42
episode: 173/1001, score: 11, e: 0.42
episode: 174/1001, score: 12, e: 0.42
episode: 175/1001, score: 8, e: 0.42


episode: 176/1001, score: 13, e: 0.41
episode: 177/1001, score: 8, e: 0.41
episode: 178/1001, score: 14, e: 0.41
episode: 179/1001, score: 10, e: 0.41


episode: 180/1001, score: 10, e: 0.41
episode: 181/1001, score: 20, e: 0.4
episode: 182/1001, score: 31, e: 0.4
episode: 183/1001, score: 29, e: 0.4


episode: 184/1001, score: 10, e: 0.4
episode: 185/1001, score: 8, e: 0.4
episode: 186/1001, score: 19, e: 0.39
episode: 187/1001, score: 43, e: 0.39


episode: 188/1001, score: 64, e: 0.39
episode: 189/1001, score: 14, e: 0.39
episode: 190/1001, score: 13, e: 0.39
episode: 191/1001, score: 9, e: 0.38


episode: 192/1001, score: 11, e: 0.38
episode: 193/1001, score: 9, e: 0.38
episode: 194/1001, score: 12, e: 0.38
episode: 195/1001, score: 10, e: 0.38
episode: 196/1001, score: 8, e: 0.37


episode: 197/1001, score: 9, e: 0.37
episode: 198/1001, score: 10, e: 0.37
episode: 199/1001, score: 11, e: 0.37
episode: 200/1001, score: 14, e: 0.37


episode: 201/1001, score: 29, e: 0.37
episode: 202/1001, score: 21, e: 0.36
episode: 203/1001, score: 17, e: 0.36
episode: 204/1001, score: 19, e: 0.36
episode: 205/1001, score: 23, e: 0.36


episode: 206/1001, score: 15, e: 0.36
episode: 207/1001, score: 10, e: 0.35
episode: 208/1001, score: 43, e: 0.35


episode: 209/1001, score: 39, e: 0.35
episode: 210/1001, score: 36, e: 0.35
episode: 211/1001, score: 21, e: 0.35
episode: 212/1001, score: 29, e: 0.35


episode: 213/1001, score: 31, e: 0.34
episode: 214/1001, score: 75, e: 0.34
episode: 215/1001, score: 38, e: 0.34
episode: 216/1001, score: 36, e: 0.34


episode: 217/1001, score: 30, e: 0.34
episode: 218/1001, score: 34, e: 0.34
episode: 219/1001, score: 15, e: 0.33
episode: 220/1001, score: 18, e: 0.33


episode: 221/1001, score: 21, e: 0.33
episode: 222/1001, score: 12, e: 0.33
episode: 223/1001, score: 21, e: 0.33
episode: 224/1001, score: 26, e: 0.33


episode: 225/1001, score: 45, e: 0.32


episode: 226/1001, score: 38, e: 0.32
episode: 227/1001, score: 18, e: 0.32
episode: 228/1001, score: 11, e: 0.32
episode: 229/1001, score: 27, e: 0.32


episode: 230/1001, score: 19, e: 0.32
episode: 231/1001, score: 12, e: 0.31
episode: 232/1001, score: 28, e: 0.31
episode: 233/1001, score: 11, e: 0.31


episode: 234/1001, score: 47, e: 0.31
episode: 235/1001, score: 14, e: 0.31
episode: 236/1001, score: 9, e: 0.31
episode: 237/1001, score: 11, e: 0.3
episode: 238/1001, score: 10, e: 0.3


episode: 239/1001, score: 10, e: 0.3
episode: 240/1001, score: 8, e: 0.3
episode: 241/1001, score: 10, e: 0.3
episode: 242/1001, score: 12, e: 0.3


episode: 243/1001, score: 15, e: 0.3
episode: 244/1001, score: 40, e: 0.29
episode: 245/1001, score: 22, e: 0.29
episode: 246/1001, score: 13, e: 0.29


episode: 247/1001, score: 7, e: 0.29
episode: 248/1001, score: 8, e: 0.29
episode: 249/1001, score: 7, e: 0.29
episode: 250/1001, score: 15, e: 0.29
episode: 251/1001, score: 12, e: 0.28


episode: 252/1001, score: 14, e: 0.28
episode: 253/1001, score: 13, e: 0.28
episode: 254/1001, score: 10, e: 0.28
episode: 255/1001, score: 16, e: 0.28


episode: 256/1001, score: 13, e: 0.28
episode: 257/1001, score: 11, e: 0.28
episode: 258/1001, score: 8, e: 0.27
episode: 259/1001, score: 10, e: 0.27


episode: 260/1001, score: 12, e: 0.27
episode: 261/1001, score: 15, e: 0.27
episode: 262/1001, score: 33, e: 0.27
episode: 263/1001, score: 12, e: 0.27


episode: 264/1001, score: 15, e: 0.27
episode: 265/1001, score: 12, e: 0.26
episode: 266/1001, score: 19, e: 0.26
episode: 267/1001, score: 23, e: 0.26


episode: 268/1001, score: 25, e: 0.26
episode: 269/1001, score: 18, e: 0.26
episode: 270/1001, score: 21, e: 0.26
episode: 271/1001, score: 39, e: 0.26


episode: 272/1001, score: 24, e: 0.26
episode: 273/1001, score: 45, e: 0.25
episode: 274/1001, score: 17, e: 0.25
episode: 275/1001, score: 12, e: 0.25


episode: 276/1001, score: 17, e: 0.25
episode: 277/1001, score: 13, e: 0.25
episode: 278/1001, score: 12, e: 0.25
episode: 279/1001, score: 12, e: 0.25


episode: 280/1001, score: 65, e: 0.25
episode: 281/1001, score: 49, e: 0.24
episode: 282/1001, score: 25, e: 0.24


episode: 283/1001, score: 16, e: 0.24
episode: 284/1001, score: 23, e: 0.24
episode: 285/1001, score: 39, e: 0.24
episode: 286/1001, score: 52, e: 0.24


episode: 287/1001, score: 22, e: 0.24
episode: 288/1001, score: 27, e: 0.24
episode: 289/1001, score: 13, e: 0.23
episode: 290/1001, score: 22, e: 0.23


episode: 291/1001, score: 31, e: 0.23
episode: 292/1001, score: 51, e: 0.23
episode: 293/1001, score: 52, e: 0.23


episode: 294/1001, score: 60, e: 0.23
episode: 295/1001, score: 22, e: 0.23
episode: 296/1001, score: 62, e: 0.23


episode: 297/1001, score: 20, e: 0.23
episode: 298/1001, score: 14, e: 0.22
episode: 299/1001, score: 9, e: 0.22
episode: 300/1001, score: 19, e: 0.22


episode: 301/1001, score: 16, e: 0.22
episode: 302/1001, score: 18, e: 0.22
episode: 303/1001, score: 20, e: 0.22
episode: 304/1001, score: 37, e: 0.22


episode: 305/1001, score: 47, e: 0.22
episode: 306/1001, score: 22, e: 0.22
episode: 307/1001, score: 67, e: 0.21


episode: 308/1001, score: 55, e: 0.21
episode: 309/1001, score: 41, e: 0.21
episode: 310/1001, score: 31, e: 0.21
episode: 311/1001, score: 25, e: 0.21


episode: 312/1001, score: 52, e: 0.21


episode: 313/1001, score: 28, e: 0.21
episode: 314/1001, score: 13, e: 0.21
episode: 315/1001, score: 56, e: 0.21
episode: 316/1001, score: 39, e: 0.21


episode: 317/1001, score: 44, e: 0.2


episode: 318/1001, score: 44, e: 0.2
episode: 319/1001, score: 45, e: 0.2
episode: 320/1001, score: 30, e: 0.2
episode: 321/1001, score: 18, e: 0.2


episode: 322/1001, score: 21, e: 0.2
episode: 323/1001, score: 16, e: 0.2
episode: 324/1001, score: 13, e: 0.2
episode: 325/1001, score: 24, e: 0.2


episode: 326/1001, score: 10, e: 0.2
episode: 327/1001, score: 23, e: 0.19
episode: 328/1001, score: 16, e: 0.19
episode: 329/1001, score: 19, e: 0.19


episode: 330/1001, score: 93, e: 0.19
episode: 331/1001, score: 21, e: 0.19


episode: 332/1001, score: 43, e: 0.19
episode: 333/1001, score: 33, e: 0.19
episode: 334/1001, score: 13, e: 0.19
episode: 335/1001, score: 24, e: 0.19


episode: 336/1001, score: 80, e: 0.19
episode: 337/1001, score: 29, e: 0.18
episode: 338/1001, score: 25, e: 0.18


episode: 339/1001, score: 29, e: 0.18
episode: 340/1001, score: 23, e: 0.18
episode: 341/1001, score: 21, e: 0.18
episode: 342/1001, score: 25, e: 0.18


episode: 343/1001, score: 16, e: 0.18
episode: 344/1001, score: 20, e: 0.18
episode: 345/1001, score: 18, e: 0.18
episode: 346/1001, score: 57, e: 0.18


episode: 347/1001, score: 32, e: 0.18
episode: 348/1001, score: 17, e: 0.17
episode: 349/1001, score: 35, e: 0.17


episode: 350/1001, score: 74, e: 0.17
episode: 351/1001, score: 21, e: 0.17
episode: 352/1001, score: 40, e: 0.17


episode: 353/1001, score: 37, e: 0.17
episode: 354/1001, score: 65, e: 0.17
episode: 355/1001, score: 48, e: 0.17


episode: 356/1001, score: 38, e: 0.17
episode: 357/1001, score: 38, e: 0.17
episode: 358/1001, score: 36, e: 0.17


episode: 359/1001, score: 53, e: 0.17
episode: 360/1001, score: 55, e: 0.16
episode: 361/1001, score: 91, e: 0.16


episode: 362/1001, score: 45, e: 0.16
episode: 363/1001, score: 23, e: 0.16
episode: 364/1001, score: 33, e: 0.16
episode: 365/1001, score: 30, e: 0.16


episode: 366/1001, score: 25, e: 0.16
episode: 367/1001, score: 30, e: 0.16
episode: 368/1001, score: 16, e: 0.16


episode: 369/1001, score: 22, e: 0.16
episode: 370/1001, score: 68, e: 0.16
episode: 371/1001, score: 52, e: 0.16


episode: 372/1001, score: 35, e: 0.15
episode: 373/1001, score: 36, e: 0.15
episode: 374/1001, score: 24, e: 0.15
episode: 375/1001, score: 32, e: 0.15


episode: 376/1001, score: 65, e: 0.15
episode: 377/1001, score: 25, e: 0.15
episode: 378/1001, score: 49, e: 0.15


episode: 379/1001, score: 174, e: 0.15
episode: 380/1001, score: 30, e: 0.15


episode: 381/1001, score: 18, e: 0.15
episode: 382/1001, score: 20, e: 0.15
episode: 383/1001, score: 40, e: 0.15
episode: 384/1001, score: 19, e: 0.15


episode: 385/1001, score: 17, e: 0.15
episode: 386/1001, score: 40, e: 0.14
episode: 387/1001, score: 26, e: 0.14


episode: 388/1001, score: 16, e: 0.14
episode: 389/1001, score: 20, e: 0.14
episode: 390/1001, score: 15, e: 0.14
episode: 391/1001, score: 18, e: 0.14


episode: 392/1001, score: 19, e: 0.14
episode: 393/1001, score: 21, e: 0.14
episode: 394/1001, score: 11, e: 0.14
episode: 395/1001, score: 18, e: 0.14


episode: 396/1001, score: 15, e: 0.14
episode: 397/1001, score: 11, e: 0.14
episode: 398/1001, score: 11, e: 0.14
episode: 399/1001, score: 17, e: 0.14


episode: 400/1001, score: 26, e: 0.13
episode: 401/1001, score: 52, e: 0.13
episode: 402/1001, score: 39, e: 0.13
episode: 403/1001, score: 29, e: 0.13


episode: 404/1001, score: 17, e: 0.13
episode: 405/1001, score: 13, e: 0.13
episode: 406/1001, score: 21, e: 0.13


episode: 407/1001, score: 33, e: 0.13
episode: 408/1001, score: 19, e: 0.13
episode: 409/1001, score: 14, e: 0.13
episode: 410/1001, score: 12, e: 0.13


episode: 411/1001, score: 65, e: 0.13


episode: 412/1001, score: 82, e: 0.13
episode: 413/1001, score: 59, e: 0.13
episode: 414/1001, score: 77, e: 0.13


episode: 415/1001, score: 78, e: 0.12


episode: 416/1001, score: 43, e: 0.12
episode: 417/1001, score: 57, e: 0.12
episode: 418/1001, score: 58, e: 0.12


episode: 419/1001, score: 28, e: 0.12
episode: 420/1001, score: 18, e: 0.12
episode: 421/1001, score: 20, e: 0.12
episode: 422/1001, score: 23, e: 0.12


episode: 423/1001, score: 17, e: 0.12
episode: 424/1001, score: 15, e: 0.12
episode: 425/1001, score: 34, e: 0.12


episode: 426/1001, score: 21, e: 0.12
episode: 427/1001, score: 30, e: 0.12
episode: 428/1001, score: 34, e: 0.12
episode: 429/1001, score: 55, e: 0.12


episode: 430/1001, score: 94, e: 0.12


episode: 431/1001, score: 120, e: 0.12
episode: 432/1001, score: 23, e: 0.11


episode: 433/1001, score: 36, e: 0.11
episode: 434/1001, score: 59, e: 0.11
episode: 435/1001, score: 81, e: 0.11


episode: 436/1001, score: 109, e: 0.11


episode: 437/1001, score: 63, e: 0.11
episode: 438/1001, score: 25, e: 0.11
episode: 439/1001, score: 32, e: 0.11


episode: 440/1001, score: 44, e: 0.11
episode: 441/1001, score: 86, e: 0.11


episode: 442/1001, score: 61, e: 0.11
episode: 443/1001, score: 40, e: 0.11
episode: 444/1001, score: 44, e: 0.11


episode: 445/1001, score: 199, e: 0.11


episode: 446/1001, score: 144, e: 0.11
episode: 447/1001, score: 82, e: 0.11
episode: 448/1001, score: 56, e: 0.11


episode: 449/1001, score: 199, e: 0.11
episode: 450/1001, score: 29, e: 0.1


episode: 451/1001, score: 104, e: 0.1
episode: 452/1001, score: 73, e: 0.1


episode: 453/1001, score: 68, e: 0.1


episode: 454/1001, score: 39, e: 0.1
episode: 455/1001, score: 152, e: 0.1


episode: 456/1001, score: 46, e: 0.1
episode: 457/1001, score: 55, e: 0.1
episode: 458/1001, score: 62, e: 0.1


episode: 459/1001, score: 58, e: 0.1
episode: 460/1001, score: 54, e: 0.1
episode: 461/1001, score: 57, e: 0.099


episode: 462/1001, score: 38, e: 0.099


episode: 463/1001, score: 44, e: 0.098
episode: 464/1001, score: 104, e: 0.098
episode: 465/1001, score: 37, e: 0.097


episode: 466/1001, score: 28, e: 0.097


episode: 467/1001, score: 57, e: 0.096
episode: 468/1001, score: 46, e: 0.096
episode: 469/1001, score: 50, e: 0.095


episode: 470/1001, score: 45, e: 0.095
episode: 471/1001, score: 34, e: 0.094
episode: 472/1001, score: 28, e: 0.094


episode: 473/1001, score: 36, e: 0.093


episode: 474/1001, score: 181, e: 0.093
episode: 475/1001, score: 28, e: 0.092
episode: 476/1001, score: 35, e: 0.092


episode: 477/1001, score: 49, e: 0.092
episode: 478/1001, score: 42, e: 0.091
episode: 479/1001, score: 40, e: 0.091


episode: 480/1001, score: 57, e: 0.09


episode: 481/1001, score: 60, e: 0.09
episode: 482/1001, score: 41, e: 0.089
episode: 483/1001, score: 34, e: 0.089


episode: 484/1001, score: 39, e: 0.088
episode: 485/1001, score: 112, e: 0.088
episode: 486/1001, score: 34, e: 0.088


episode: 487/1001, score: 22, e: 0.087
episode: 488/1001, score: 20, e: 0.087
episode: 489/1001, score: 33, e: 0.086


episode: 490/1001, score: 119, e: 0.086


episode: 491/1001, score: 199, e: 0.085
episode: 492/1001, score: 94, e: 0.085
episode: 493/1001, score: 42, e: 0.084


episode: 494/1001, score: 76, e: 0.084
episode: 495/1001, score: 43, e: 0.084
episode: 496/1001, score: 44, e: 0.083


episode: 497/1001, score: 66, e: 0.083


episode: 498/1001, score: 24, e: 0.082
episode: 499/1001, score: 57, e: 0.082
episode: 500/1001, score: 32, e: 0.082


episode: 501/1001, score: 40, e: 0.081
episode: 502/1001, score: 48, e: 0.081
episode: 503/1001, score: 34, e: 0.08


episode: 504/1001, score: 33, e: 0.08
episode: 505/1001, score: 199, e: 0.08


episode: 506/1001, score: 57, e: 0.079
episode: 507/1001, score: 57, e: 0.079


episode: 508/1001, score: 58, e: 0.078
episode: 509/1001, score: 55, e: 0.078
episode: 510/1001, score: 80, e: 0.078


episode: 511/1001, score: 39, e: 0.077
episode: 512/1001, score: 27, e: 0.077
episode: 513/1001, score: 40, e: 0.076


episode: 514/1001, score: 62, e: 0.076
episode: 515/1001, score: 96, e: 0.076
episode: 516/1001, score: 37, e: 0.075


episode: 517/1001, score: 30, e: 0.075
episode: 518/1001, score: 71, e: 0.075


episode: 519/1001, score: 57, e: 0.074
episode: 520/1001, score: 42, e: 0.074
episode: 521/1001, score: 35, e: 0.073


episode: 522/1001, score: 46, e: 0.073
episode: 523/1001, score: 31, e: 0.073


episode: 524/1001, score: 199, e: 0.072
episode: 525/1001, score: 199, e: 0.072


episode: 526/1001, score: 51, e: 0.072


episode: 527/1001, score: 117, e: 0.071
episode: 528/1001, score: 30, e: 0.071
episode: 529/1001, score: 41, e: 0.071


episode: 530/1001, score: 59, e: 0.07
episode: 531/1001, score: 66, e: 0.07


episode: 532/1001, score: 43, e: 0.069


KeyboardInterrupt: 