In [1]:
class DQN:
    def __init__(self, session, input_size, output_size, name="main"):
        self.session = session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name
        
        self._build_network()
    def _build_network(self, h_size=10, l_rate=1e-1):
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(tf.float32, [None, self.input_size], name="input_x")
            W1 = tf.get_variable("W1", shape=[self.input_size, h_size],
                                initializer=tf.contrib.layers.xavier_initializer())
            layer1 = tf.nn.tanh(tf.matmul(self._X, W1))
            W2 = tf.get_variable("W2", shape=[h_size, self.output_size],
                                initializer=tf.contrib.layers.xavier_initializer())
            self._Qpred = tf.matmul(layer1, W2)
            
        self._Y = tf.placeholder(
            shape=[None, self.output_size], dtype=tf.float32)
        self._loss = tf.reduce_mean(tf.square(self._Y - self._Qpred))
        self._train = tf.train.AdamOptimizer(
            learning_rate=l_rate).minimize(self._loss)

    def predict(self, state):
        X = np.reshape(state, [1, self.input_size])
        return self.session.run(self._Qpred, feed_dict={self._X: X})
    
    def update(self, x_stack, y_stack):
        return self.session.run([self._loss, self._train], feed_dict={
            self._X: x_stack, self._Y: y_stack
        })

In [2]:
import numpy as np
import tensorflow as tf
import random
from collections import deque
from gym.envs.registration import register

In [3]:
import gym
env = gym.make('CartPole-v0')
register(
    id='CartPole-v2',
    entry_point='gym.envs.classic_control:CartPoleEnv',
    tags={'wrapper_config.TimeLimit.max_episode_steps': 10000},
    reward_threshold=10000.0,
)

[2017-07-28 18:10:26,722] Making new env: CartPole-v0


In [4]:
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

In [5]:
dis = 0.9
REPLAY_MEMORY = 50000

In [6]:
def simple_replay_train(DQN, train_batch):
    x_stack = np.empty(0).reshape(0, DQN.input_size)
    y_stack = np.empty(0).reshape(0, DQN.output_size)
    
    for state, action, reward, next_state, done in train_batch:
        Q = DQN.predict(state)
        if done:
            Q[0, action] = reward
        else:
            Q[0, action] = reward + dis * np.max(DQN.predict(next_state))
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
    return DQN.update(x_stack, y_stack)

In [7]:
def replay_train(mainDQN, targetDQN, train_batch):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)
    
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)
        if done:
            Q[0, action] = reward
        else:
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
    return mainDQN.update(x_stack, y_stack)

In [8]:
def bot_play(mainDQN):
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [9]:
def get_copy_var_ops(*, dest_scope_name="target", src_scope_name="main"):
    op_holder = []
    src_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope=src_scope_name)
    dest_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope=dest_scope_name)
    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))
    return op_holder

In [10]:
def main():
    max_episodes = 5000
    replay_buffer = deque()
    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size, name="main")
        targetDQN = DQN(sess, input_size, output_size, name="target")
        tf.global_variables_initializer().run()
        
        copy_ops = get_copy_var_ops(dest_scope_name="target", src_scope_name="main")
        sess.run(copy_ops)
    
        
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            
            state = env.reset()
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(mainDQN.predict(state))
                next_state, reward, done, _ = env.step(action)
                if done:
                    reward = -100
                
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()
                
                state = next_state
                step_count += 1
                if step_count > 10000:
                    break
            print("Episode: {} steps: {}".format(episode, step_count))
            if step_count > 10000:
                pass
            if episode % 10 ==1:
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = replay_train(mainDQN, targetDQN, minibatch)
                print("Loss: ", loss)
                sess.run(copy_ops)
        
        bot_play(mainDQN)

In [11]:
main()

Episode: 0 steps: 14
Episode: 1 steps: 9
Loss:  0.553538
Episode: 2 steps: 24
Episode: 3 steps: 8
Episode: 4 steps: 13
Episode: 5 steps: 13
Episode: 6 steps: 12
Episode: 7 steps: 11
Episode: 8 steps: 8
Episode: 9 steps: 12
Episode: 10 steps: 11
Episode: 11 steps: 9
Loss:  1.06086
Episode: 12 steps: 11
Episode: 13 steps: 10
Episode: 14 steps: 9
Episode: 15 steps: 11
Episode: 16 steps: 9
Episode: 17 steps: 11
Episode: 18 steps: 10
Episode: 19 steps: 13
Episode: 20 steps: 11
Episode: 21 steps: 12
Loss:  1.72997
Episode: 22 steps: 13
Episode: 23 steps: 15
Episode: 24 steps: 12
Episode: 25 steps: 11
Episode: 26 steps: 12
Episode: 27 steps: 8
Episode: 28 steps: 8
Episode: 29 steps: 9
Episode: 30 steps: 11
Episode: 31 steps: 9
Loss:  483.176
Episode: 32 steps: 47
Episode: 33 steps: 37
Episode: 34 steps: 35
Episode: 35 steps: 29
Episode: 36 steps: 30
Episode: 37 steps: 31
Episode: 38 steps: 33
Episode: 39 steps: 28
Episode: 40 steps: 39
Episode: 41 steps: 41
Loss:  949.632
Episode: 42 steps: 9

Loss:  5.25897
Episode: 342 steps: 19
Episode: 343 steps: 28
Episode: 344 steps: 30
Episode: 345 steps: 17
Episode: 346 steps: 54
Episode: 347 steps: 17
Episode: 348 steps: 25
Episode: 349 steps: 20
Episode: 350 steps: 24
Episode: 351 steps: 15
Loss:  489.259
Episode: 352 steps: 27
Episode: 353 steps: 34
Episode: 354 steps: 26
Episode: 355 steps: 22
Episode: 356 steps: 26
Episode: 357 steps: 31
Episode: 358 steps: 27
Episode: 359 steps: 30
Episode: 360 steps: 21
Episode: 361 steps: 32
Loss:  9.94427
Episode: 362 steps: 61
Episode: 363 steps: 66
Episode: 364 steps: 56
Episode: 365 steps: 116
Episode: 366 steps: 76
Episode: 367 steps: 62
Episode: 368 steps: 60
Episode: 369 steps: 53
Episode: 370 steps: 60
Episode: 371 steps: 54
Loss:  2.10324
Episode: 372 steps: 60
Episode: 373 steps: 72
Episode: 374 steps: 28
Episode: 375 steps: 28
Episode: 376 steps: 25
Episode: 377 steps: 35
Episode: 378 steps: 61
Episode: 379 steps: 32
Episode: 380 steps: 76
Episode: 381 steps: 37
Loss:  5.25196
Epis

Loss:  457.399
Episode: 682 steps: 31
Episode: 683 steps: 25
Episode: 684 steps: 27
Episode: 685 steps: 25
Episode: 686 steps: 30
Episode: 687 steps: 28
Episode: 688 steps: 21
Episode: 689 steps: 31
Episode: 690 steps: 30
Episode: 691 steps: 22
Loss:  2.54269
Episode: 692 steps: 19
Episode: 693 steps: 18
Episode: 694 steps: 14
Episode: 695 steps: 18
Episode: 696 steps: 16
Episode: 697 steps: 14
Episode: 698 steps: 16
Episode: 699 steps: 21
Episode: 700 steps: 14
Episode: 701 steps: 23
Loss:  2.2121
Episode: 702 steps: 150
Episode: 703 steps: 181
Episode: 704 steps: 126
Episode: 705 steps: 120
Episode: 706 steps: 93
Episode: 707 steps: 159
Episode: 708 steps: 171
Episode: 709 steps: 173
Episode: 710 steps: 146
Episode: 711 steps: 169
Loss:  2.76384
Episode: 712 steps: 9
Episode: 713 steps: 9
Episode: 714 steps: 9
Episode: 715 steps: 8
Episode: 716 steps: 12
Episode: 717 steps: 10
Episode: 718 steps: 9
Episode: 719 steps: 9
Episode: 720 steps: 11
Episode: 721 steps: 10
Loss:  4.2025
Epis

Loss:  2.93285
Episode: 1022 steps: 56
Episode: 1023 steps: 51
Episode: 1024 steps: 23
Episode: 1025 steps: 61
Episode: 1026 steps: 36
Episode: 1027 steps: 26
Episode: 1028 steps: 66
Episode: 1029 steps: 69
Episode: 1030 steps: 27
Episode: 1031 steps: 44
Loss:  6.88726
Episode: 1032 steps: 20
Episode: 1033 steps: 20
Episode: 1034 steps: 19
Episode: 1035 steps: 16
Episode: 1036 steps: 26
Episode: 1037 steps: 18
Episode: 1038 steps: 14
Episode: 1039 steps: 24
Episode: 1040 steps: 14
Episode: 1041 steps: 13
Loss:  418.33
Episode: 1042 steps: 22
Episode: 1043 steps: 22
Episode: 1044 steps: 25
Episode: 1045 steps: 22
Episode: 1046 steps: 27
Episode: 1047 steps: 28
Episode: 1048 steps: 32
Episode: 1049 steps: 19
Episode: 1050 steps: 26
Episode: 1051 steps: 17
Loss:  3.18332
Episode: 1052 steps: 22
Episode: 1053 steps: 16
Episode: 1054 steps: 16
Episode: 1055 steps: 24
Episode: 1056 steps: 18
Episode: 1057 steps: 20
Episode: 1058 steps: 22
Episode: 1059 steps: 23
Episode: 1060 steps: 21
Episo

Loss:  11.7294
Episode: 1352 steps: 81
Episode: 1353 steps: 21
Episode: 1354 steps: 26
Episode: 1355 steps: 75
Episode: 1356 steps: 37
Episode: 1357 steps: 37
Episode: 1358 steps: 66
Episode: 1359 steps: 47
Episode: 1360 steps: 21
Episode: 1361 steps: 34
Loss:  6.54986
Episode: 1362 steps: 147
Episode: 1363 steps: 42
Episode: 1364 steps: 182
Episode: 1365 steps: 58
Episode: 1366 steps: 44
Episode: 1367 steps: 83
Episode: 1368 steps: 120
Episode: 1369 steps: 85
Episode: 1370 steps: 70
Episode: 1371 steps: 43
Loss:  7.00854
Episode: 1372 steps: 31
Episode: 1373 steps: 35
Episode: 1374 steps: 24
Episode: 1375 steps: 17
Episode: 1376 steps: 22
Episode: 1377 steps: 16
Episode: 1378 steps: 18
Episode: 1379 steps: 18
Episode: 1380 steps: 16
Episode: 1381 steps: 13
Loss:  5.91479
Episode: 1382 steps: 112
Episode: 1383 steps: 86
Episode: 1384 steps: 51
Episode: 1385 steps: 79
Episode: 1386 steps: 63
Episode: 1387 steps: 57
Episode: 1388 steps: 89
Episode: 1389 steps: 101
Episode: 1390 steps: 52

Loss:  8.45056
Episode: 1672 steps: 10
Episode: 1673 steps: 9
Episode: 1674 steps: 11
Episode: 1675 steps: 9
Episode: 1676 steps: 10
Episode: 1677 steps: 9
Episode: 1678 steps: 9
Episode: 1679 steps: 9
Episode: 1680 steps: 9
Episode: 1681 steps: 10
Loss:  3.33809
Episode: 1682 steps: 26
Episode: 1683 steps: 23
Episode: 1684 steps: 24
Episode: 1685 steps: 31
Episode: 1686 steps: 27
Episode: 1687 steps: 31
Episode: 1688 steps: 24
Episode: 1689 steps: 33
Episode: 1690 steps: 29
Episode: 1691 steps: 30
Loss:  7.40006
Episode: 1692 steps: 88
Episode: 1693 steps: 71
Episode: 1694 steps: 69
Episode: 1695 steps: 72
Episode: 1696 steps: 47
Episode: 1697 steps: 46
Episode: 1698 steps: 81
Episode: 1699 steps: 48
Episode: 1700 steps: 89
Episode: 1701 steps: 76
Loss:  560.644
Episode: 1702 steps: 29
Episode: 1703 steps: 10
Episode: 1704 steps: 48
Episode: 1705 steps: 25
Episode: 1706 steps: 30
Episode: 1707 steps: 22
Episode: 1708 steps: 8
Episode: 1709 steps: 38
Episode: 1710 steps: 28
Episode: 17

Loss:  4.87621
Episode: 2002 steps: 61
Episode: 2003 steps: 21
Episode: 2004 steps: 96
Episode: 2005 steps: 70
Episode: 2006 steps: 22
Episode: 2007 steps: 26
Episode: 2008 steps: 90
Episode: 2009 steps: 74
Episode: 2010 steps: 65
Episode: 2011 steps: 29
Loss:  13.4551
Episode: 2012 steps: 64
Episode: 2013 steps: 36
Episode: 2014 steps: 27
Episode: 2015 steps: 28
Episode: 2016 steps: 23
Episode: 2017 steps: 22
Episode: 2018 steps: 30
Episode: 2019 steps: 21
Episode: 2020 steps: 23
Episode: 2021 steps: 25
Loss:  8.99056
Episode: 2022 steps: 20
Episode: 2023 steps: 11
Episode: 2024 steps: 14
Episode: 2025 steps: 15
Episode: 2026 steps: 20
Episode: 2027 steps: 17
Episode: 2028 steps: 13
Episode: 2029 steps: 15
Episode: 2030 steps: 13
Episode: 2031 steps: 9
Loss:  1.72615
Episode: 2032 steps: 27
Episode: 2033 steps: 32
Episode: 2034 steps: 25
Episode: 2035 steps: 40
Episode: 2036 steps: 26
Episode: 2037 steps: 31
Episode: 2038 steps: 21
Episode: 2039 steps: 23
Episode: 2040 steps: 21
Episo

Loss:  10.0539
Episode: 2332 steps: 38
Episode: 2333 steps: 36
Episode: 2334 steps: 46
Episode: 2335 steps: 39
Episode: 2336 steps: 29
Episode: 2337 steps: 19
Episode: 2338 steps: 23
Episode: 2339 steps: 22
Episode: 2340 steps: 35
Episode: 2341 steps: 20
Loss:  523.77
Episode: 2342 steps: 17
Episode: 2343 steps: 16
Episode: 2344 steps: 30
Episode: 2345 steps: 14
Episode: 2346 steps: 19
Episode: 2347 steps: 22
Episode: 2348 steps: 23
Episode: 2349 steps: 14
Episode: 2350 steps: 15
Episode: 2351 steps: 15
Loss:  3.46999
Episode: 2352 steps: 30
Episode: 2353 steps: 25
Episode: 2354 steps: 26
Episode: 2355 steps: 29
Episode: 2356 steps: 26
Episode: 2357 steps: 32
Episode: 2358 steps: 22
Episode: 2359 steps: 22
Episode: 2360 steps: 27
Episode: 2361 steps: 30
Loss:  5.93357
Episode: 2362 steps: 23
Episode: 2363 steps: 10
Episode: 2364 steps: 33
Episode: 2365 steps: 58
Episode: 2366 steps: 22
Episode: 2367 steps: 14
Episode: 2368 steps: 28
Episode: 2369 steps: 21
Episode: 2370 steps: 13
Episo

Loss:  483.612
Episode: 2662 steps: 10
Episode: 2663 steps: 10
Episode: 2664 steps: 9
Episode: 2665 steps: 9
Episode: 2666 steps: 8
Episode: 2667 steps: 10
Episode: 2668 steps: 9
Episode: 2669 steps: 10
Episode: 2670 steps: 8
Episode: 2671 steps: 9
Loss:  3.18542
Episode: 2672 steps: 31
Episode: 2673 steps: 24
Episode: 2674 steps: 24
Episode: 2675 steps: 31
Episode: 2676 steps: 25
Episode: 2677 steps: 44
Episode: 2678 steps: 37
Episode: 2679 steps: 22
Episode: 2680 steps: 44
Episode: 2681 steps: 38
Loss:  7.21791
Episode: 2682 steps: 30
Episode: 2683 steps: 37
Episode: 2684 steps: 24
Episode: 2685 steps: 71
Episode: 2686 steps: 27
Episode: 2687 steps: 36
Episode: 2688 steps: 23
Episode: 2689 steps: 30
Episode: 2690 steps: 31
Episode: 2691 steps: 37
Loss:  1.64383
Episode: 2692 steps: 24
Episode: 2693 steps: 30
Episode: 2694 steps: 27
Episode: 2695 steps: 39
Episode: 2696 steps: 29
Episode: 2697 steps: 31
Episode: 2698 steps: 27
Episode: 2699 steps: 33
Episode: 2700 steps: 25
Episode: 2

Loss:  507.888
Episode: 2992 steps: 27
Episode: 2993 steps: 31
Episode: 2994 steps: 24
Episode: 2995 steps: 31
Episode: 2996 steps: 31
Episode: 2997 steps: 45
Episode: 2998 steps: 23
Episode: 2999 steps: 25
Episode: 3000 steps: 27
Episode: 3001 steps: 32
Loss:  500.668
Episode: 3002 steps: 24
Episode: 3003 steps: 25
Episode: 3004 steps: 27
Episode: 3005 steps: 25
Episode: 3006 steps: 51
Episode: 3007 steps: 24
Episode: 3008 steps: 32
Episode: 3009 steps: 21
Episode: 3010 steps: 28
Episode: 3011 steps: 24
Loss:  532.555
Episode: 3012 steps: 9
Episode: 3013 steps: 9
Episode: 3014 steps: 9
Episode: 3015 steps: 9
Episode: 3016 steps: 11
Episode: 3017 steps: 9
Episode: 3018 steps: 9
Episode: 3019 steps: 9
Episode: 3020 steps: 11
Episode: 3021 steps: 10
Loss:  4.5566
Episode: 3022 steps: 22
Episode: 3023 steps: 44
Episode: 3024 steps: 27
Episode: 3025 steps: 42
Episode: 3026 steps: 33
Episode: 3027 steps: 28
Episode: 3028 steps: 28
Episode: 3029 steps: 54
Episode: 3030 steps: 22
Episode: 303

Loss:  510.386
Episode: 3322 steps: 26
Episode: 3323 steps: 15
Episode: 3324 steps: 21
Episode: 3325 steps: 29
Episode: 3326 steps: 20
Episode: 3327 steps: 15
Episode: 3328 steps: 24
Episode: 3329 steps: 18
Episode: 3330 steps: 26
Episode: 3331 steps: 14
Loss:  5.70208
Episode: 3332 steps: 25
Episode: 3333 steps: 24
Episode: 3334 steps: 22
Episode: 3335 steps: 26
Episode: 3336 steps: 21
Episode: 3337 steps: 23
Episode: 3338 steps: 22
Episode: 3339 steps: 20
Episode: 3340 steps: 21
Episode: 3341 steps: 19
Loss:  2.93841
Episode: 3342 steps: 11
Episode: 3343 steps: 21
Episode: 3344 steps: 10
Episode: 3345 steps: 19
Episode: 3346 steps: 14
Episode: 3347 steps: 22
Episode: 3348 steps: 16
Episode: 3349 steps: 17
Episode: 3350 steps: 16
Episode: 3351 steps: 13
Loss:  483.436
Episode: 3352 steps: 26
Episode: 3353 steps: 40
Episode: 3354 steps: 27
Episode: 3355 steps: 63
Episode: 3356 steps: 52
Episode: 3357 steps: 23
Episode: 3358 steps: 48
Episode: 3359 steps: 51
Episode: 3360 steps: 22
Epis

Loss:  3.44482
Episode: 3652 steps: 60
Episode: 3653 steps: 61
Episode: 3654 steps: 57
Episode: 3655 steps: 36
Episode: 3656 steps: 35
Episode: 3657 steps: 59
Episode: 3658 steps: 69
Episode: 3659 steps: 33
Episode: 3660 steps: 56
Episode: 3661 steps: 72
Loss:  2.86468
Episode: 3662 steps: 73
Episode: 3663 steps: 43
Episode: 3664 steps: 31
Episode: 3665 steps: 19
Episode: 3666 steps: 19
Episode: 3667 steps: 78
Episode: 3668 steps: 20
Episode: 3669 steps: 18
Episode: 3670 steps: 20
Episode: 3671 steps: 57
Loss:  4.24397
Episode: 3672 steps: 13
Episode: 3673 steps: 12
Episode: 3674 steps: 27
Episode: 3675 steps: 17
Episode: 3676 steps: 17
Episode: 3677 steps: 20
Episode: 3678 steps: 17
Episode: 3679 steps: 21
Episode: 3680 steps: 38
Episode: 3681 steps: 28
Loss:  540.008
Episode: 3682 steps: 12
Episode: 3683 steps: 17
Episode: 3684 steps: 20
Episode: 3685 steps: 12
Episode: 3686 steps: 19
Episode: 3687 steps: 18
Episode: 3688 steps: 15
Episode: 3689 steps: 15
Episode: 3690 steps: 24
Epis

Loss:  492.873
Episode: 3982 steps: 39
Episode: 3983 steps: 49
Episode: 3984 steps: 26
Episode: 3985 steps: 80
Episode: 3986 steps: 43
Episode: 3987 steps: 54
Episode: 3988 steps: 22
Episode: 3989 steps: 23
Episode: 3990 steps: 31
Episode: 3991 steps: 44
Loss:  3.56185
Episode: 3992 steps: 24
Episode: 3993 steps: 24
Episode: 3994 steps: 38
Episode: 3995 steps: 9
Episode: 3996 steps: 8
Episode: 3997 steps: 9
Episode: 3998 steps: 8
Episode: 3999 steps: 9
Episode: 4000 steps: 23
Episode: 4001 steps: 39
Loss:  4.06262
Episode: 4002 steps: 37
Episode: 4003 steps: 79
Episode: 4004 steps: 54
Episode: 4005 steps: 56
Episode: 4006 steps: 83
Episode: 4007 steps: 72
Episode: 4008 steps: 78
Episode: 4009 steps: 37
Episode: 4010 steps: 37
Episode: 4011 steps: 44
Loss:  4.84857
Episode: 4012 steps: 88
Episode: 4013 steps: 53
Episode: 4014 steps: 21
Episode: 4015 steps: 40
Episode: 4016 steps: 48
Episode: 4017 steps: 56
Episode: 4018 steps: 60
Episode: 4019 steps: 55
Episode: 4020 steps: 71
Episode: 

Loss:  4.10334
Episode: 4312 steps: 41
Episode: 4313 steps: 50
Episode: 4314 steps: 53
Episode: 4315 steps: 39
Episode: 4316 steps: 35
Episode: 4317 steps: 45
Episode: 4318 steps: 37
Episode: 4319 steps: 33
Episode: 4320 steps: 32
Episode: 4321 steps: 34
Loss:  9.72638
Episode: 4322 steps: 36
Episode: 4323 steps: 45
Episode: 4324 steps: 35
Episode: 4325 steps: 52
Episode: 4326 steps: 38
Episode: 4327 steps: 56
Episode: 4328 steps: 115
Episode: 4329 steps: 52
Episode: 4330 steps: 86
Episode: 4331 steps: 43
Loss:  468.993
Episode: 4332 steps: 26
Episode: 4333 steps: 34
Episode: 4334 steps: 102
Episode: 4335 steps: 69
Episode: 4336 steps: 25
Episode: 4337 steps: 24
Episode: 4338 steps: 21
Episode: 4339 steps: 51
Episode: 4340 steps: 21
Episode: 4341 steps: 31
Loss:  4.93125
Episode: 4342 steps: 32
Episode: 4343 steps: 44
Episode: 4344 steps: 50
Episode: 4345 steps: 41
Episode: 4346 steps: 45
Episode: 4347 steps: 74
Episode: 4348 steps: 42
Episode: 4349 steps: 34
Episode: 4350 steps: 82
Ep

Loss:  5.33666
Episode: 4642 steps: 30
Episode: 4643 steps: 30
Episode: 4644 steps: 24
Episode: 4645 steps: 23
Episode: 4646 steps: 36
Episode: 4647 steps: 22
Episode: 4648 steps: 27
Episode: 4649 steps: 23
Episode: 4650 steps: 28
Episode: 4651 steps: 40
Loss:  5.27296
Episode: 4652 steps: 24
Episode: 4653 steps: 23
Episode: 4654 steps: 23
Episode: 4655 steps: 20
Episode: 4656 steps: 26
Episode: 4657 steps: 31
Episode: 4658 steps: 21
Episode: 4659 steps: 27
Episode: 4660 steps: 19
Episode: 4661 steps: 21
Loss:  3.93372
Episode: 4662 steps: 18
Episode: 4663 steps: 25
Episode: 4664 steps: 25
Episode: 4665 steps: 24
Episode: 4666 steps: 22
Episode: 4667 steps: 29
Episode: 4668 steps: 27
Episode: 4669 steps: 28
Episode: 4670 steps: 22
Episode: 4671 steps: 43
Loss:  15.601
Episode: 4672 steps: 22
Episode: 4673 steps: 29
Episode: 4674 steps: 21
Episode: 4675 steps: 26
Episode: 4676 steps: 50
Episode: 4677 steps: 36
Episode: 4678 steps: 21
Episode: 4679 steps: 23
Episode: 4680 steps: 32
Episo

Loss:  551.002
Episode: 4972 steps: 33
Episode: 4973 steps: 83
Episode: 4974 steps: 84
Episode: 4975 steps: 98
Episode: 4976 steps: 92
Episode: 4977 steps: 75
Episode: 4978 steps: 41
Episode: 4979 steps: 83
Episode: 4980 steps: 82
Episode: 4981 steps: 62
Loss:  568.522
Episode: 4982 steps: 26
Episode: 4983 steps: 22
Episode: 4984 steps: 28
Episode: 4985 steps: 26
Episode: 4986 steps: 24
Episode: 4987 steps: 31
Episode: 4988 steps: 24
Episode: 4989 steps: 21
Episode: 4990 steps: 44
Episode: 4991 steps: 32
Loss:  548.8
Episode: 4992 steps: 36
Episode: 4993 steps: 22
Episode: 4994 steps: 20
Episode: 4995 steps: 50
Episode: 4996 steps: 22
Episode: 4997 steps: 30
Episode: 4998 steps: 27
Episode: 4999 steps: 24
Total score: 26.0
