In [1]:
# This code is based on
# https://github.com/hunkim/DeepRL-Agents
# CF https://github.com/golbin/TensorFlow-Tutorials
# https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py
# conda install -c conda-forge ffmpeg

import numpy as np
import random
from collections import deque

#import tensorflow as tf

# 예전 소스 실행을 위한 설정
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

import gym
from gym import wrappers

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
env = gym.make('CartPole-v0')

# Constants defining our neural network
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

dis = 0.9
REPLAY_MEMORY = 50000

In [3]:
class DQN:
    def __init__(self, session, input_size, output_size, name="main"):
        self.session = session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name

        self._build_network()

    def _build_network(self, h_size=10, l_rate=1e-1):
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(tf.float32, [None, self.input_size], name="input_x")

            # First layer of weights
            # replace tf.contrib.layers.xavier_initializer() to tf.random_normal_initializer()
            W1 = tf.get_variable("W1", shape=[self.input_size, h_size],
                                 initializer=tf.random_normal_initializer())
            layer1 = tf.nn.tanh(tf.matmul(self._X, W1))

            # Second layer of Weights
            # replace tf.contrib.layers.xavier_initializer() to tf.random_normal_initializer()
            W2 = tf.get_variable("W2", shape=[h_size, self.output_size],
                                 initializer=tf.random_normal_initializer())

            # Q prediction
            self._Qpred = tf.matmul(layer1, W2)

        # We need to define the parts of the network needed for learning a policy
        self._Y = tf.placeholder(shape=[None, self.output_size], dtype=tf.float32)

        # Loss function
        self._loss = tf.reduce_mean(tf.square(self._Y - self._Qpred))
        # Learning
        self._train = tf.train.AdamOptimizer(learning_rate=l_rate).minimize(self._loss)

    def predict(self, state):
        x = np.reshape(state, [1, self.input_size])
        return self.session.run(self._Qpred, feed_dict={self._X: x})

    def update(self, x_stack, y_stack):
        return self.session.run([self._loss, self._train], feed_dict={self._X: x_stack, self._Y: y_stack})

In [4]:
def replay_train(mainDQN, targetDQN, train_batch):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)

    # Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predic(state)

        # terminal?
        if done:
            Q[0, action] = reward
        else:
            # get target from target DQN (Q')
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))

        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack( [x_stack, state])

    # Train our network using target and predicted Q values on each episode
    return mainDQN.update(x_stack, y_stack)

def ddqn_replay_train(mainDQN, targetDQN, train_batch):

#     Double DQN implementation
#     :param mainDQN: main DQN
#     :param targetDQN: target DQN
#     :param train_batch: minibatch for train
#     :return: loss

    x_stack = np.empty(0).reshape(0, mainDQN.input_size)
    y_stack = np.empty(0).reshape(0, mainDQN.output_size)

    # Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)

        # terminal?
        if done:
            Q[0, action] = reward
        else:
            # Double DQN: y = r + gamma * targetDQN(s')[a] where
            # a = argmax(mainDQN(s'))
            Q[0, action] = reward + dis * targetDQN.predict(next_state)[0, np.argmax(mainDQN.predict(next_state))]

        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])

    # Train our network using target and predicted Q values on each episode
    return mainDQN.update(x_stack, y_stack)

def get_copy_var_ops(*, dest_scope_name="target", src_scope_name="main"):

    # Copy variables src_scope to dest_scope
    op_holder = []

    src_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=src_scope_name)
    dest_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dest_scope_name)

    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))

    return op_holder

def bot_play(mainDQN, env=env):
    # See our trained network in action
    state = env.reset()
    reward_sum = 0
    while True:
        env.render()
        action = np.argmax(mainDQN.predict(state))
        state, reward, done, _ = env.step(action)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [5]:
def main():
    max_episodes = 5000
    # store the previous observations in replay memory
    replay_buffer = deque()

    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size, name="main")
        targetDQN = DQN(sess, input_size, output_size, name="target")
        
        tf.global_variables_initializer().run()

        #initial copy q_net -> target_net
        copy_ops = get_copy_var_ops(dest_scope_name="target", src_scope_name="main")
        sess.run(copy_ops)

        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            state = env.reset()

            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    # Choose an action by greedily from the Q-network
                    action = np.argmax(mainDQN.predict(state))

                # Get new state and reward from environment
                next_state, reward, done, _ = env.step(action)
                if done: # Penalty
                    reward = -100

                # Save the experience to our buffer
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                      replay_buffer.popleft()

                state = next_state
                step_count += 1
                if step_count > 10000:   # Good enough. Let's move on
                    break

            print("Episode: {} steps: {}".format(episode, step_count))
            if step_count > 10000:
                pass
                # break

            if episode % 10 == 1: # train every 10 episode
                # Get a random batch of experiences
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = ddqn_replay_train(mainDQN, targetDQN, minibatch)

                print("Loss: ", loss)
                # copy q_net -> target_net
                sess.run(copy_ops)

        # See our trained bot in action
        env2 = wrappers.Monitor(env, 'gym-results', force=True)

        for i in range(200):
            bot_play(mainDQN, env=env2)

        env2.close()
        # gym.upload("gym-results", api_key="sk_VT2wPcSSOylnlPORltmQ")

if __name__ == "__main__":
    main()

Episode: 0 steps: 22
Episode: 1 steps: 35
Loss:  4.207395
Episode: 2 steps: 22
Episode: 3 steps: 48
Episode: 4 steps: 34
Episode: 5 steps: 25
Episode: 6 steps: 74
Episode: 7 steps: 50
Episode: 8 steps: 91
Episode: 9 steps: 28
Episode: 10 steps: 105
Episode: 11 steps: 116
Loss:  8.471715
Episode: 12 steps: 62
Episode: 13 steps: 45
Episode: 14 steps: 60
Episode: 15 steps: 99
Episode: 16 steps: 27
Episode: 17 steps: 100
Episode: 18 steps: 82
Episode: 19 steps: 91
Episode: 20 steps: 56
Episode: 21 steps: 47
Loss:  3.4491024
Episode: 22 steps: 92
Episode: 23 steps: 150
Episode: 24 steps: 128
Episode: 25 steps: 98
Episode: 26 steps: 100
Episode: 27 steps: 110
Episode: 28 steps: 144
Episode: 29 steps: 82
Episode: 30 steps: 161
Episode: 31 steps: 200
Loss:  1.5147042
Episode: 32 steps: 55
Episode: 33 steps: 197
Episode: 34 steps: 200
Episode: 35 steps: 51
Episode: 36 steps: 200
Episode: 37 steps: 200
Episode: 38 steps: 177
Episode: 39 steps: 200
Episode: 40 steps: 200
Episode: 41 steps: 200
Lo

Loss:  3.2174602
Episode: 342 steps: 24
Episode: 343 steps: 31
Episode: 344 steps: 28
Episode: 345 steps: 27
Episode: 346 steps: 32
Episode: 347 steps: 38
Episode: 348 steps: 41
Episode: 349 steps: 37
Episode: 350 steps: 30
Episode: 351 steps: 40
Loss:  3.1098247
Episode: 352 steps: 25
Episode: 353 steps: 25
Episode: 354 steps: 29
Episode: 355 steps: 19
Episode: 356 steps: 23
Episode: 357 steps: 28
Episode: 358 steps: 23
Episode: 359 steps: 25
Episode: 360 steps: 24
Episode: 361 steps: 31
Loss:  2.5679972
Episode: 362 steps: 46
Episode: 363 steps: 35
Episode: 364 steps: 48
Episode: 365 steps: 35
Episode: 366 steps: 30
Episode: 367 steps: 35
Episode: 368 steps: 54
Episode: 369 steps: 24
Episode: 370 steps: 21
Episode: 371 steps: 29
Loss:  2.6109133
Episode: 372 steps: 20
Episode: 373 steps: 30
Episode: 374 steps: 21
Episode: 375 steps: 20
Episode: 376 steps: 45
Episode: 377 steps: 30
Episode: 378 steps: 25
Episode: 379 steps: 21
Episode: 380 steps: 19
Episode: 381 steps: 23
Loss:  4.915

Loss:  0.99073887
Episode: 682 steps: 20
Episode: 683 steps: 38
Episode: 684 steps: 21
Episode: 685 steps: 26
Episode: 686 steps: 20
Episode: 687 steps: 49
Episode: 688 steps: 25
Episode: 689 steps: 22
Episode: 690 steps: 21
Episode: 691 steps: 15
Loss:  4.757894
Episode: 692 steps: 10
Episode: 693 steps: 10
Episode: 694 steps: 10
Episode: 695 steps: 8
Episode: 696 steps: 9
Episode: 697 steps: 8
Episode: 698 steps: 10
Episode: 699 steps: 9
Episode: 700 steps: 9
Episode: 701 steps: 9
Loss:  539.8501
Episode: 702 steps: 60
Episode: 703 steps: 50
Episode: 704 steps: 40
Episode: 705 steps: 43
Episode: 706 steps: 57
Episode: 707 steps: 75
Episode: 708 steps: 66
Episode: 709 steps: 51
Episode: 710 steps: 45
Episode: 711 steps: 42
Loss:  4.1010146
Episode: 712 steps: 28
Episode: 713 steps: 40
Episode: 714 steps: 24
Episode: 715 steps: 28
Episode: 716 steps: 40
Episode: 717 steps: 28
Episode: 718 steps: 20
Episode: 719 steps: 26
Episode: 720 steps: 25
Episode: 721 steps: 24
Loss:  3.5021389
Ep

Loss:  1.7820209
Episode: 1022 steps: 9
Episode: 1023 steps: 9
Episode: 1024 steps: 11
Episode: 1025 steps: 10
Episode: 1026 steps: 9
Episode: 1027 steps: 10
Episode: 1028 steps: 10
Episode: 1029 steps: 10
Episode: 1030 steps: 8
Episode: 1031 steps: 9
Loss:  1.0436037
Episode: 1032 steps: 60
Episode: 1033 steps: 37
Episode: 1034 steps: 32
Episode: 1035 steps: 84
Episode: 1036 steps: 30
Episode: 1037 steps: 72
Episode: 1038 steps: 36
Episode: 1039 steps: 31
Episode: 1040 steps: 81
Episode: 1041 steps: 56
Loss:  3.3209186
Episode: 1042 steps: 89
Episode: 1043 steps: 200
Episode: 1044 steps: 157
Episode: 1045 steps: 76
Episode: 1046 steps: 159
Episode: 1047 steps: 79
Episode: 1048 steps: 193
Episode: 1049 steps: 127
Episode: 1050 steps: 200
Episode: 1051 steps: 153
Loss:  8.937176
Episode: 1052 steps: 29
Episode: 1053 steps: 38
Episode: 1054 steps: 63
Episode: 1055 steps: 74
Episode: 1056 steps: 44
Episode: 1057 steps: 28
Episode: 1058 steps: 70
Episode: 1059 steps: 85
Episode: 1060 steps

Loss:  11.078581
Episode: 1342 steps: 28
Episode: 1343 steps: 24
Episode: 1344 steps: 22
Episode: 1345 steps: 64
Episode: 1346 steps: 27
Episode: 1347 steps: 25
Episode: 1348 steps: 39
Episode: 1349 steps: 29
Episode: 1350 steps: 54
Episode: 1351 steps: 24
Loss:  2.78109
Episode: 1352 steps: 41
Episode: 1353 steps: 26
Episode: 1354 steps: 36
Episode: 1355 steps: 43
Episode: 1356 steps: 52
Episode: 1357 steps: 29
Episode: 1358 steps: 26
Episode: 1359 steps: 45
Episode: 1360 steps: 27
Episode: 1361 steps: 68
Loss:  2.2087426
Episode: 1362 steps: 30
Episode: 1363 steps: 47
Episode: 1364 steps: 55
Episode: 1365 steps: 101
Episode: 1366 steps: 25
Episode: 1367 steps: 42
Episode: 1368 steps: 36
Episode: 1369 steps: 34
Episode: 1370 steps: 20
Episode: 1371 steps: 32
Loss:  2.8663785
Episode: 1372 steps: 29
Episode: 1373 steps: 39
Episode: 1374 steps: 46
Episode: 1375 steps: 27
Episode: 1376 steps: 20
Episode: 1377 steps: 27
Episode: 1378 steps: 36
Episode: 1379 steps: 23
Episode: 1380 steps: 

Loss:  6.0428033
Episode: 1662 steps: 11
Episode: 1663 steps: 13
Episode: 1664 steps: 16
Episode: 1665 steps: 15
Episode: 1666 steps: 13
Episode: 1667 steps: 24
Episode: 1668 steps: 18
Episode: 1669 steps: 17
Episode: 1670 steps: 12
Episode: 1671 steps: 17
Loss:  10.57073
Episode: 1672 steps: 42
Episode: 1673 steps: 20
Episode: 1674 steps: 24
Episode: 1675 steps: 18
Episode: 1676 steps: 24
Episode: 1677 steps: 19
Episode: 1678 steps: 19
Episode: 1679 steps: 27
Episode: 1680 steps: 20
Episode: 1681 steps: 24
Loss:  10.1320095
Episode: 1682 steps: 90
Episode: 1683 steps: 47
Episode: 1684 steps: 49
Episode: 1685 steps: 49
Episode: 1686 steps: 45
Episode: 1687 steps: 58
Episode: 1688 steps: 49
Episode: 1689 steps: 41
Episode: 1690 steps: 72
Episode: 1691 steps: 32
Loss:  1011.65674
Episode: 1692 steps: 12
Episode: 1693 steps: 12
Episode: 1694 steps: 10
Episode: 1695 steps: 14
Episode: 1696 steps: 13
Episode: 1697 steps: 13
Episode: 1698 steps: 10
Episode: 1699 steps: 10
Episode: 1700 steps

Loss:  503.46402
Episode: 1982 steps: 16
Episode: 1983 steps: 16
Episode: 1984 steps: 18
Episode: 1985 steps: 24
Episode: 1986 steps: 52
Episode: 1987 steps: 16
Episode: 1988 steps: 13
Episode: 1989 steps: 26
Episode: 1990 steps: 53
Episode: 1991 steps: 15
Loss:  1.3355895
Episode: 1992 steps: 70
Episode: 1993 steps: 128
Episode: 1994 steps: 96
Episode: 1995 steps: 78
Episode: 1996 steps: 62
Episode: 1997 steps: 90
Episode: 1998 steps: 89
Episode: 1999 steps: 85
Episode: 2000 steps: 69
Episode: 2001 steps: 71
Loss:  3.9241834
Episode: 2002 steps: 27
Episode: 2003 steps: 30
Episode: 2004 steps: 30
Episode: 2005 steps: 27
Episode: 2006 steps: 29
Episode: 2007 steps: 45
Episode: 2008 steps: 56
Episode: 2009 steps: 31
Episode: 2010 steps: 60
Episode: 2011 steps: 37
Loss:  4.6212053
Episode: 2012 steps: 21
Episode: 2013 steps: 25
Episode: 2014 steps: 20
Episode: 2015 steps: 12
Episode: 2016 steps: 19
Episode: 2017 steps: 14
Episode: 2018 steps: 21
Episode: 2019 steps: 39
Episode: 2020 steps

Loss:  453.09918
Episode: 2302 steps: 12
Episode: 2303 steps: 10
Episode: 2304 steps: 10
Episode: 2305 steps: 9
Episode: 2306 steps: 8
Episode: 2307 steps: 8
Episode: 2308 steps: 8
Episode: 2309 steps: 11
Episode: 2310 steps: 10
Episode: 2311 steps: 11
Loss:  3.3248436
Episode: 2312 steps: 18
Episode: 2313 steps: 24
Episode: 2314 steps: 26
Episode: 2315 steps: 30
Episode: 2316 steps: 21
Episode: 2317 steps: 23
Episode: 2318 steps: 18
Episode: 2319 steps: 26
Episode: 2320 steps: 26
Episode: 2321 steps: 18
Loss:  4.3892198
Episode: 2322 steps: 40
Episode: 2323 steps: 44
Episode: 2324 steps: 64
Episode: 2325 steps: 53
Episode: 2326 steps: 80
Episode: 2327 steps: 43
Episode: 2328 steps: 42
Episode: 2329 steps: 44
Episode: 2330 steps: 52
Episode: 2331 steps: 40
Loss:  4.7136984
Episode: 2332 steps: 49
Episode: 2333 steps: 81
Episode: 2334 steps: 47
Episode: 2335 steps: 82
Episode: 2336 steps: 56
Episode: 2337 steps: 48
Episode: 2338 steps: 56
Episode: 2339 steps: 54
Episode: 2340 steps: 55


Loss:  7.0821724
Episode: 2622 steps: 77
Episode: 2623 steps: 56
Episode: 2624 steps: 41
Episode: 2625 steps: 39
Episode: 2626 steps: 33
Episode: 2627 steps: 52
Episode: 2628 steps: 37
Episode: 2629 steps: 45
Episode: 2630 steps: 38
Episode: 2631 steps: 37
Loss:  26.492563
Episode: 2632 steps: 73
Episode: 2633 steps: 46
Episode: 2634 steps: 47
Episode: 2635 steps: 33
Episode: 2636 steps: 46
Episode: 2637 steps: 57
Episode: 2638 steps: 50
Episode: 2639 steps: 53
Episode: 2640 steps: 38
Episode: 2641 steps: 31
Loss:  464.92587
Episode: 2642 steps: 73
Episode: 2643 steps: 32
Episode: 2644 steps: 36
Episode: 2645 steps: 36
Episode: 2646 steps: 22
Episode: 2647 steps: 41
Episode: 2648 steps: 26
Episode: 2649 steps: 48
Episode: 2650 steps: 54
Episode: 2651 steps: 73
Loss:  489.84882
Episode: 2652 steps: 24
Episode: 2653 steps: 23
Episode: 2654 steps: 25
Episode: 2655 steps: 20
Episode: 2656 steps: 25
Episode: 2657 steps: 21
Episode: 2658 steps: 30
Episode: 2659 steps: 22
Episode: 2660 steps:

Loss:  3.613364
Episode: 2952 steps: 24
Episode: 2953 steps: 57
Episode: 2954 steps: 22
Episode: 2955 steps: 32
Episode: 2956 steps: 37
Episode: 2957 steps: 34
Episode: 2958 steps: 29
Episode: 2959 steps: 26
Episode: 2960 steps: 22
Episode: 2961 steps: 25
Loss:  2.329383
Episode: 2962 steps: 45
Episode: 2963 steps: 51
Episode: 2964 steps: 102
Episode: 2965 steps: 86
Episode: 2966 steps: 61
Episode: 2967 steps: 56
Episode: 2968 steps: 61
Episode: 2969 steps: 77
Episode: 2970 steps: 56
Episode: 2971 steps: 64
Loss:  9.464853
Episode: 2972 steps: 37
Episode: 2973 steps: 45
Episode: 2974 steps: 47
Episode: 2975 steps: 44
Episode: 2976 steps: 82
Episode: 2977 steps: 42
Episode: 2978 steps: 44
Episode: 2979 steps: 47
Episode: 2980 steps: 33
Episode: 2981 steps: 62
Loss:  8.668169
Episode: 2982 steps: 25
Episode: 2983 steps: 22
Episode: 2984 steps: 56
Episode: 2985 steps: 38
Episode: 2986 steps: 24
Episode: 2987 steps: 28
Episode: 2988 steps: 24
Episode: 2989 steps: 25
Episode: 2990 steps: 28

Episode: 3278 steps: 64
Episode: 3279 steps: 47
Episode: 3280 steps: 55
Episode: 3281 steps: 37
Loss:  2.8366973
Episode: 3282 steps: 78
Episode: 3283 steps: 38
Episode: 3284 steps: 67
Episode: 3285 steps: 26
Episode: 3286 steps: 46
Episode: 3287 steps: 57
Episode: 3288 steps: 44
Episode: 3289 steps: 62
Episode: 3290 steps: 27
Episode: 3291 steps: 37
Loss:  4.297942
Episode: 3292 steps: 21
Episode: 3293 steps: 19
Episode: 3294 steps: 20
Episode: 3295 steps: 17
Episode: 3296 steps: 19
Episode: 3297 steps: 57
Episode: 3298 steps: 22
Episode: 3299 steps: 24
Episode: 3300 steps: 26
Episode: 3301 steps: 32
Loss:  6.7134924
Episode: 3302 steps: 22
Episode: 3303 steps: 24
Episode: 3304 steps: 25
Episode: 3305 steps: 58
Episode: 3306 steps: 57
Episode: 3307 steps: 32
Episode: 3308 steps: 51
Episode: 3309 steps: 25
Episode: 3310 steps: 53
Episode: 3311 steps: 25
Loss:  13.413366
Episode: 3312 steps: 35
Episode: 3313 steps: 26
Episode: 3314 steps: 30
Episode: 3315 steps: 25
Episode: 3316 steps: 

Loss:  5.8615127
Episode: 3602 steps: 22
Episode: 3603 steps: 23
Episode: 3604 steps: 19
Episode: 3605 steps: 27
Episode: 3606 steps: 33
Episode: 3607 steps: 22
Episode: 3608 steps: 21
Episode: 3609 steps: 33
Episode: 3610 steps: 28
Episode: 3611 steps: 24
Loss:  1.1804198
Episode: 3612 steps: 22
Episode: 3613 steps: 20
Episode: 3614 steps: 19
Episode: 3615 steps: 22
Episode: 3616 steps: 19
Episode: 3617 steps: 16
Episode: 3618 steps: 21
Episode: 3619 steps: 20
Episode: 3620 steps: 15
Episode: 3621 steps: 21
Loss:  4.6633224
Episode: 3622 steps: 36
Episode: 3623 steps: 22
Episode: 3624 steps: 33
Episode: 3625 steps: 43
Episode: 3626 steps: 23
Episode: 3627 steps: 23
Episode: 3628 steps: 23
Episode: 3629 steps: 30
Episode: 3630 steps: 33
Episode: 3631 steps: 33
Loss:  6.0743685
Episode: 3632 steps: 24
Episode: 3633 steps: 22
Episode: 3634 steps: 35
Episode: 3635 steps: 25
Episode: 3636 steps: 24
Episode: 3637 steps: 30
Episode: 3638 steps: 22
Episode: 3639 steps: 27
Episode: 3640 steps:

Loss:  9.020705
Episode: 3922 steps: 31
Episode: 3923 steps: 30
Episode: 3924 steps: 27
Episode: 3925 steps: 38
Episode: 3926 steps: 34
Episode: 3927 steps: 22
Episode: 3928 steps: 30
Episode: 3929 steps: 23
Episode: 3930 steps: 31
Episode: 3931 steps: 28
Loss:  429.31607
Episode: 3932 steps: 23
Episode: 3933 steps: 24
Episode: 3934 steps: 20
Episode: 3935 steps: 28
Episode: 3936 steps: 27
Episode: 3937 steps: 26
Episode: 3938 steps: 25
Episode: 3939 steps: 22
Episode: 3940 steps: 23
Episode: 3941 steps: 19
Loss:  543.099
Episode: 3942 steps: 38
Episode: 3943 steps: 36
Episode: 3944 steps: 47
Episode: 3945 steps: 51
Episode: 3946 steps: 30
Episode: 3947 steps: 41
Episode: 3948 steps: 55
Episode: 3949 steps: 42
Episode: 3950 steps: 33
Episode: 3951 steps: 38
Loss:  467.27667
Episode: 3952 steps: 18
Episode: 3953 steps: 19
Episode: 3954 steps: 15
Episode: 3955 steps: 15
Episode: 3956 steps: 15
Episode: 3957 steps: 11
Episode: 3958 steps: 18
Episode: 3959 steps: 14
Episode: 3960 steps: 15

Loss:  12.968226
Episode: 4242 steps: 22
Episode: 4243 steps: 33
Episode: 4244 steps: 44
Episode: 4245 steps: 67
Episode: 4246 steps: 25
Episode: 4247 steps: 34
Episode: 4248 steps: 56
Episode: 4249 steps: 55
Episode: 4250 steps: 51
Episode: 4251 steps: 43
Loss:  6.082249
Episode: 4252 steps: 62
Episode: 4253 steps: 33
Episode: 4254 steps: 35
Episode: 4255 steps: 29
Episode: 4256 steps: 41
Episode: 4257 steps: 39
Episode: 4258 steps: 27
Episode: 4259 steps: 31
Episode: 4260 steps: 50
Episode: 4261 steps: 61
Loss:  430.05206
Episode: 4262 steps: 26
Episode: 4263 steps: 25
Episode: 4264 steps: 22
Episode: 4265 steps: 28
Episode: 4266 steps: 28
Episode: 4267 steps: 43
Episode: 4268 steps: 25
Episode: 4269 steps: 37
Episode: 4270 steps: 35
Episode: 4271 steps: 27
Loss:  514.08093
Episode: 4272 steps: 31
Episode: 4273 steps: 31
Episode: 4274 steps: 48
Episode: 4275 steps: 43
Episode: 4276 steps: 26
Episode: 4277 steps: 29
Episode: 4278 steps: 38
Episode: 4279 steps: 85
Episode: 4280 steps: 

Loss:  6.878885
Episode: 4562 steps: 42
Episode: 4563 steps: 23
Episode: 4564 steps: 30
Episode: 4565 steps: 30
Episode: 4566 steps: 28
Episode: 4567 steps: 42
Episode: 4568 steps: 21
Episode: 4569 steps: 25
Episode: 4570 steps: 23
Episode: 4571 steps: 55
Loss:  4.6223702
Episode: 4572 steps: 24
Episode: 4573 steps: 24
Episode: 4574 steps: 24
Episode: 4575 steps: 37
Episode: 4576 steps: 28
Episode: 4577 steps: 21
Episode: 4578 steps: 29
Episode: 4579 steps: 25
Episode: 4580 steps: 43
Episode: 4581 steps: 23
Loss:  3.5985076
Episode: 4582 steps: 28
Episode: 4583 steps: 23
Episode: 4584 steps: 28
Episode: 4585 steps: 26
Episode: 4586 steps: 40
Episode: 4587 steps: 37
Episode: 4588 steps: 27
Episode: 4589 steps: 67
Episode: 4590 steps: 26
Episode: 4591 steps: 32
Loss:  3.013647
Episode: 4592 steps: 45
Episode: 4593 steps: 24
Episode: 4594 steps: 36
Episode: 4595 steps: 46
Episode: 4596 steps: 23
Episode: 4597 steps: 24
Episode: 4598 steps: 24
Episode: 4599 steps: 24
Episode: 4600 steps: 2

Loss:  3.3329062
Episode: 4892 steps: 32
Episode: 4893 steps: 31
Episode: 4894 steps: 20
Episode: 4895 steps: 32
Episode: 4896 steps: 36
Episode: 4897 steps: 55
Episode: 4898 steps: 29
Episode: 4899 steps: 39
Episode: 4900 steps: 34
Episode: 4901 steps: 22
Loss:  1.1774976
Episode: 4902 steps: 21
Episode: 4903 steps: 36
Episode: 4904 steps: 48
Episode: 4905 steps: 39
Episode: 4906 steps: 49
Episode: 4907 steps: 23
Episode: 4908 steps: 24
Episode: 4909 steps: 22
Episode: 4910 steps: 31
Episode: 4911 steps: 24
Loss:  1.1727265
Episode: 4912 steps: 25
Episode: 4913 steps: 27
Episode: 4914 steps: 31
Episode: 4915 steps: 27
Episode: 4916 steps: 31
Episode: 4917 steps: 37
Episode: 4918 steps: 40
Episode: 4919 steps: 35
Episode: 4920 steps: 46
Episode: 4921 steps: 30
Loss:  4.871104
Episode: 4922 steps: 57
Episode: 4923 steps: 28
Episode: 4924 steps: 30
Episode: 4925 steps: 43
Episode: 4926 steps: 30
Episode: 4927 steps: 39
Episode: 4928 steps: 39
Episode: 4929 steps: 29
Episode: 4930 steps: 