In [1]:
%matplotlib inline
import math
import matplotlib.pyplot as plt
import os
os.putenv('SDL_VIDEODRIVER', 'fbcon')
os.environ["SDL_VIDEODRIVER"] = "dummy"  # this line make pop-out window not appear
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import tensorflow as tf
import numpy as np
import skimage.color
import skimage.transform
from tqdm import tqdm

In [2]:
!rm -rf ./movie && mkdir ./movie

In [3]:
from ple import PLE
from ple.games.flappybird import FlappyBird

game = FlappyBird()
env = PLE(game, fps=30, display_screen=False)  # environment interface to game

screen_width = 80
screen_height = 80
num_stack = 4

pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom


In [4]:
class Actor_critic:
    def __init__(self, name, num_action, discount_factor=0.99):
        self.discount_factor = discount_factor
        self.num_action = num_action
        self.name = name
        with tf.variable_scope(name):
            self.build_model()

    def build_model(self):
        # input: current screen, selected action and reward
        self.input_screen = tf.placeholder(
            tf.float32, shape=[None, num_stack, 5])
        self.action = tf.placeholder(tf.int32, [None])
        self.reward = tf.placeholder(tf.float32, [None])
        self.is_training = tf.placeholder(tf.bool, shape=[])

        def value_net(screen, reuse=False):
            with tf.variable_scope(
                    "value_net",
                    reuse=reuse,
                    initializer=tf.truncated_normal_initializer(stddev=1e-2)):
                x = tf.contrib.layers.flatten(screen)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=1, activation=None)
                return x

        def policy_net(screen, reuse=False):
            with tf.variable_scope("policy_net", reuse=reuse):
                x = tf.contrib.layers.flatten(screen)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=512, activation=tf.nn.relu)
                x = tf.layers.dense(x, units=self.num_action, activation=None)
                return x

        # value
        self.v_output = value_net(
            self.input_screen
        )  # Q(s,a,theta) for all a, shape (batch_size, num_action)
        self.tar_V = tf.placeholder(tf.float32, [None])
        self.V_loss = tf.reduce_mean(
            tf.square(self.reward + self.discount_factor * self.tar_V -
                      self.v_output))
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-6)
        g_gvs = optimizer.compute_gradients(
            self.V_loss,
            var_list=[
                v for v in tf.global_variables() if 'value_net' in v.name
            ])
        self.V_train_op = optimizer.apply_gradients(g_gvs)

        # policy
        self.policy_logit = policy_net(
            self.input_screen
        )  # logit of probility(P(s,a,theta)) for all a, shape (batch_size, num_action)
        index = tf.stack([tf.range(tf.shape(self.action)[0]), self.action],
                         axis=1)
        self.prob = tf.gather_nd(
            tf.nn.softmax(self.policy_logit),
            index)  # P(s,a,theta) for selected action, shape (batch_size, 1)

        # loss = E[log(p(s,a))*r]
        self.policy_loss = -tf.reduce_mean(
            tf.log(self.prob + 0.00000001) * self.reward)
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-6)
        g_gvs = optimizer.compute_gradients(
            self.policy_loss,
            var_list=[
                v for v in tf.global_variables() if 'policy_net' in v.name
            ])
        self.train_op = optimizer.apply_gradients(g_gvs)
        self.pred = tf.multinomial(self.policy_logit,
                                   1)  # sample action from distribution

    def select_action(self, input_screen, sess):
        input_screen = np.array(input_screen)
        feed_dict = {
            self.input_screen: input_screen[None, :],
        }
        action = sess.run(
            self.pred,
            feed_dict=feed_dict)[0][0]  # sameple action from distribution
        return action

    def update_policy(self, input_screens, actions, rewards,
                      input_screens_plum):
        feed_dict = {
            self.input_screen:
            np.array(input_screens_plum)
        }
        esti_V = sess.run(self.v_output, feed_dict=feed_dict).flatten()
        td_target = rewards + self.discount_factor * esti_V

        feed_dict = {
            self.input_screen: np.array(input_screens),
        }
        esti_V = sess.run(self.v_output, feed_dict=feed_dict).flatten()
        td_error = td_target - esti_V
        feed_dict = {
            self.input_screen:
            np.array(input_screens_plum),
        }
        feed_dict = {
            self.input_screen: np.array(input_screens),
            self.tar_V: td_target,
            self.reward: rewards,
        }

        V_loss, _ = sess.run([self.V_loss, self.V_train_op],
                             feed_dict=feed_dict)

        feed_dict = {
            self.input_screen: np.array(input_screens),
            self.action: actions,
            self.reward: td_error,
        }
        policy_loss, _ = sess.run([self.policy_loss, self.train_op],
                                  feed_dict=feed_dict)
        return V_loss, policy_loss


In [5]:
# init agent
tf.reset_default_graph()
num_action = len(env.getActionSet())
# agent for frequently updating
ac_agent = Actor_critic('PG_Agent', num_action)
# init all
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
sess.run(tf.global_variables_initializer())

In [6]:
def preprocess(state):
    data = [
        state['player_y'],
        state['player_vel'],
        state['next_pipe_top_y'],
        state['next_pipe_bottom_y'],
        state['next_pipe_dist_to_player'],
#         state['next_next_pipe_top_y'],
#         state['next_next_pipe_bottom_y'],
#         state['next_next_pipe_dist_to_player'],
    ]
    
    return data

def make_anim(images, fps=60, true_image=False):
    duration = len(images) / fps
    import moviepy.editor as mpy

    def make_frame(t):
        try:
            x = images[int(len(images) / duration * t)]
        except:
            x = images[-1]

        if true_image:
            return x.astype(np.uint8)
        else:
            return ((x + 1) / 2 * 255).astype(np.uint8)

    clip = mpy.VideoClip(make_frame, duration=duration)
    clip.fps = fps
    return clip

In [7]:
from IPython.display import Image, display

update_every_episode = 1
print_every_episode = 100
save_video_every_episode = 500
NUM_EPISODE = 30_000
NUM_EXPLORE = 0
reward_values = {
    "positive": 1,
    "tick": 0.2,  # reward per timestamp
    "loss": -1,
}
for episode in range(0, NUM_EPISODE + 1):

    # Reset the environment
    game = FlappyBird()
    env = PLE(
        game,
        fps=30,
        display_screen=False,
        reward_values=reward_values,
        rng=np.random.RandomState(1))
    env.reset_game()
    env.act(0)  # dummy input to make sure input screen is correct

    # record frame
    if episode % save_video_every_episode == 0:
        frames = [env.getScreenRGB()]

    # grayscale input screen for this episode
#     input_screens = [preprocess(env.getScreenGrayscale())] * 4
    input_screens = [preprocess(game.getGameState())] * 4

    # cumulate reward for this episode
    cum_reward = 0

    experiences = []
    t = 0
    while not env.game_over():
        # feed four previous screen, select an action
        action = ac_agent.select_action(input_screens[-4:], sess)

        # execute the action and get reward
        reward = env.act(env.getActionSet()[action])

        # record frame
        if episode % save_video_every_episode == 0:
            frames.append(env.getScreenRGB())

        # cumulate reward
        cum_reward += reward

        # append grayscale screen for this episode
        input_screens.append(preprocess(game.getGameState()))

        # append experience for this episode
        experiences.append(
            [input_screens[-5:-1], action, reward, input_screens[-4:]])

        t += 1

    def discount_reward(x, discount_rate):
        discounted_r = np.zeros(len(x))
        num_r = len(x)
        for i in range(num_r):
            discounted_r[i] = x[i] * math.pow(discount_rate, i)
        discounted_r = np.cumsum(discounted_r[::-1])
        return discounted_r[::-1]

    rewards = [e[2] for e in experiences]
    discounted_reward = discount_reward(rewards, ac_agent.discount_factor)

    # normalize
    discounted_reward -= np.mean(discounted_reward)
    discounted_reward /= np.std(discounted_reward)
    train_screens = []
    train_actions = []
    train_rewards = []
    train_input_screens_plum = []
    for i in range(len(experiences)):
        experiences[i][2] = discounted_reward[i]
        train_screens.append(experiences[i][0])
        train_actions.append(experiences[i][1])
        train_rewards.append(experiences[i][2])
        train_input_screens_plum.append(experiences[i][3])
    loss = ac_agent.update_policy(train_screens, train_actions, train_rewards,
                                  train_input_screens_plum)

    if episode % print_every_episode == 0 and episode > NUM_EXPLORE:
        print("[{}] time live:{}, cumulated reward: {:.5f}, loss: {}".format(
            episode, t, cum_reward, loss))

    if episode % save_video_every_episode == 0 and episode > NUM_EXPLORE:  # for every 5000 episode, record an animation
        clip = make_anim(frames, fps=60, true_image=True).rotate(-90)
        clip.write_videofile("movie/ac_{}_{}.webm".format(episode, t), fps=60)
#         display(clip.ipython_display(fps=60, autoplay=1, loop=1))


[100] time live:61, cumulated reward: 11.20000, loss: (3.9600792, -0.0011545287)
[200] time live:61, cumulated reward: 11.20000, loss: (3.9600897, -0.0005060394)
[300] time live:61, cumulated reward: 11.20000, loss: (3.9600933, -0.00046241612)
[400] time live:61, cumulated reward: 11.20000, loss: (3.9600933, -0.00079019374)
[500] time live:61, cumulated reward: 11.20000, loss: (3.9600577, -0.07273477)
[MoviePy] >>>> Building video movie/ac_500_61.webm
[MoviePy] Writing video movie/ac_500_61.webm


100%|██████████| 63/63 [00:00<00:00, 117.52it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_500_61.webm 






[600] time live:61, cumulated reward: 11.20000, loss: (3.9600933, -0.0017635616)
[700] time live:61, cumulated reward: 11.20000, loss: (3.960096, -0.00078133814)
[800] time live:61, cumulated reward: 11.20000, loss: (3.9600937, -0.0014968382)
[900] time live:61, cumulated reward: 11.20000, loss: (3.9600973, -0.0011133823)
[1000] time live:61, cumulated reward: 11.20000, loss: (3.9600897, -0.003660325)
[MoviePy] >>>> Building video movie/ac_1000_61.webm
[MoviePy] Writing video movie/ac_1000_61.webm


100%|██████████| 63/63 [00:00<00:00, 120.33it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_1000_61.webm 






[1100] time live:61, cumulated reward: 11.20000, loss: (3.959814, -0.11076362)
[1200] time live:73, cumulated reward: 14.60000, loss: (3.9600396, -0.20363174)
[1300] time live:71, cumulated reward: 14.20000, loss: (3.9601297, -0.10364502)
[1400] time live:103, cumulated reward: 21.60000, loss: (3.9601452, 0.009175224)
[1500] time live:138, cumulated reward: 29.60000, loss: (3.9601302, -0.04545612)
[MoviePy] >>>> Building video movie/ac_1500_138.webm
[MoviePy] Writing video movie/ac_1500_138.webm


100%|██████████| 140/140 [00:01<00:00, 121.17it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_1500_138.webm 

[1600] time live:66, cumulated reward: 13.20000, loss: (3.96007, -0.06512543)
[1700] time live:97, cumulated reward: 19.40000, loss: (3.9601367, 0.04584992)
[1800] time live:63, cumulated reward: 11.60000, loss: (3.9601047, -0.13015752)
[1900] time live:104, cumulated reward: 21.80000, loss: (3.9600945, -0.052454956)
[2000] time live:99, cumulated reward: 19.80000, loss: (3.9601192, -0.08061392)
[MoviePy] >>>> Building video movie/ac_2000_99.webm
[MoviePy] Writing video movie/ac_2000_99.webm


 99%|█████████▉| 100/101 [00:00<00:00, 136.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_2000_99.webm 






[2100] time live:104, cumulated reward: 21.80000, loss: (3.96012, -0.020032912)
[2200] time live:65, cumulated reward: 13.00000, loss: (3.9600873, -0.07470222)
[2300] time live:182, cumulated reward: 39.40000, loss: (3.9601204, -0.021376995)
[2400] time live:133, cumulated reward: 27.60000, loss: (3.96009, 0.007375322)
[2500] time live:222, cumulated reward: 48.40000, loss: (3.9600885, 0.04134185)
[MoviePy] >>>> Building video movie/ac_2500_222.webm
[MoviePy] Writing video movie/ac_2500_222.webm


100%|█████████▉| 223/224 [00:01<00:00, 123.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_2500_222.webm 






[2600] time live:133, cumulated reward: 27.60000, loss: (3.96011, -0.0028202266)
[2700] time live:133, cumulated reward: 27.60000, loss: (3.960104, 0.027935028)
[2800] time live:139, cumulated reward: 29.80000, loss: (3.9601002, -0.04236185)
[2900] time live:133, cumulated reward: 27.60000, loss: (3.9601195, 0.0067000613)
[3000] time live:108, cumulated reward: 22.60000, loss: (3.9600928, -0.064845406)
[MoviePy] >>>> Building video movie/ac_3000_108.webm
[MoviePy] Writing video movie/ac_3000_108.webm


 99%|█████████▉| 109/110 [00:00<00:00, 134.53it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_3000_108.webm 






[3100] time live:143, cumulated reward: 30.60000, loss: (3.960106, -0.007265129)
[3200] time live:133, cumulated reward: 27.60000, loss: (3.9600961, 0.0158366)
[3300] time live:585, cumulated reward: 130.00000, loss: (3.960106, 0.008644723)
[3400] time live:214, cumulated reward: 46.80000, loss: (3.9601007, -0.02323255)
[3500] time live:216, cumulated reward: 47.20000, loss: (3.9600775, 0.017409716)
[MoviePy] >>>> Building video movie/ac_3500_216.webm
[MoviePy] Writing video movie/ac_3500_216.webm


100%|█████████▉| 217/218 [00:01<00:00, 125.42it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_3500_216.webm 

[3600] time live:101, cumulated reward: 21.20000, loss: (3.9601028, -0.013091878)
[3700] time live:777, cumulated reward: 173.40000, loss: (3.9601038, 0.010201674)
[3800] time live:585, cumulated reward: 130.00000, loss: (3.9600978, -0.010839438)
[3900] time live:337, cumulated reward: 74.40000, loss: (3.9600978, -0.023101522)
[4000] time live:133, cumulated reward: 27.60000, loss: (3.960105, 0.00021223022)
[MoviePy] >>>> Building video movie/ac_4000_133.webm
[MoviePy] Writing video movie/ac_4000_133.webm


 99%|█████████▉| 134/135 [00:01<00:00, 133.58it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_4000_133.webm 






[4100] time live:187, cumulated reward: 40.40000, loss: (3.960108, -0.010669732)
[4200] time live:250, cumulated reward: 55.00000, loss: (3.9600985, -0.008076143)
[4300] time live:99, cumulated reward: 19.80000, loss: (3.9601057, -0.030551834)
[4400] time live:585, cumulated reward: 130.00000, loss: (3.9601038, -0.0049237562)
[4500] time live:330, cumulated reward: 73.00000, loss: (3.9600935, 0.023383975)
[MoviePy] >>>> Building video movie/ac_4500_330.webm
[MoviePy] Writing video movie/ac_4500_330.webm


100%|█████████▉| 331/332 [00:02<00:00, 124.78it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_4500_330.webm 






[4600] time live:138, cumulated reward: 29.60000, loss: (3.9601068, -0.06072843)
[4700] time live:333, cumulated reward: 73.60000, loss: (3.9600923, -0.0032795155)
[4800] time live:183, cumulated reward: 39.60000, loss: (3.9600997, 0.0037133915)
[4900] time live:369, cumulated reward: 81.80000, loss: (3.9601088, -0.018532937)
[5000] time live:554, cumulated reward: 123.80000, loss: (3.960103, 0.004835833)
[MoviePy] >>>> Building video movie/ac_5000_554.webm
[MoviePy] Writing video movie/ac_5000_554.webm


100%|█████████▉| 555/556 [00:05<00:00, 110.76it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_5000_554.webm 






[5100] time live:1154, cumulated reward: 259.80000, loss: (3.9601011, -0.0118988445)
[5200] time live:253, cumulated reward: 55.60000, loss: (3.9600904, -0.033943705)
[5300] time live:100, cumulated reward: 20.00000, loss: (3.960103, -0.0490186)
[5400] time live:375, cumulated reward: 83.00000, loss: (3.9600947, 0.013973096)
[5500] time live:585, cumulated reward: 130.00000, loss: (3.960103, -0.0261591)
[MoviePy] >>>> Building video movie/ac_5500_585.webm
[MoviePy] Writing video movie/ac_5500_585.webm


100%|██████████| 587/587 [00:05<00:00, 109.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_5500_585.webm 






[5600] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.01785021)
[5700] time live:585, cumulated reward: 130.00000, loss: (3.960103, -0.014625734)
[5800] time live:445, cumulated reward: 99.00000, loss: (3.9601028, -0.029296765)
[5900] time live:777, cumulated reward: 173.40000, loss: (3.9601042, -0.012986797)
[6000] time live:775, cumulated reward: 173.00000, loss: (3.9601007, 0.0160599)
[MoviePy] >>>> Building video movie/ac_6000_775.webm
[MoviePy] Writing video movie/ac_6000_775.webm


100%|█████████▉| 776/777 [00:07<00:00, 109.50it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_6000_775.webm 






[6100] time live:372, cumulated reward: 82.40000, loss: (3.9600966, -0.019910222)
[6200] time live:482, cumulated reward: 107.40000, loss: (3.9600985, -0.025792906)
[6300] time live:438, cumulated reward: 96.60000, loss: (3.960096, -0.010681462)
[6400] time live:585, cumulated reward: 130.00000, loss: (3.9600997, -0.014404219)
[6500] time live:585, cumulated reward: 130.00000, loss: (3.9601016, -0.022224033)
[MoviePy] >>>> Building video movie/ac_6500_585.webm
[MoviePy] Writing video movie/ac_6500_585.webm


100%|██████████| 587/587 [00:05<00:00, 109.43it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_6500_585.webm 






[6600] time live:585, cumulated reward: 130.00000, loss: (3.9601023, -0.0064743427)
[6700] time live:711, cumulated reward: 159.20000, loss: (3.960106, -0.014316394)
[6800] time live:924, cumulated reward: 206.80000, loss: (3.9601002, -0.0049277074)
[6900] time live:133, cumulated reward: 27.60000, loss: (3.9600997, -0.018570961)
[7000] time live:445, cumulated reward: 99.00000, loss: (3.9600935, -0.002490235)
[MoviePy] >>>> Building video movie/ac_7000_445.webm
[MoviePy] Writing video movie/ac_7000_445.webm


100%|█████████▉| 446/447 [00:03<00:00, 115.28it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_7000_445.webm 






[7100] time live:450, cumulated reward: 100.00000, loss: (3.9600997, -0.00021890402)
[7200] time live:585, cumulated reward: 130.00000, loss: (3.9600964, 0.0029733616)
[7300] time live:133, cumulated reward: 27.60000, loss: (3.9601023, 0.07982336)
[7400] time live:1264, cumulated reward: 283.80000, loss: (3.9601026, -0.00781442)
[7500] time live:585, cumulated reward: 130.00000, loss: (3.960099, 0.0016002243)
[MoviePy] >>>> Building video movie/ac_7500_585.webm
[MoviePy] Writing video movie/ac_7500_585.webm


100%|██████████| 587/587 [00:05<00:00, 110.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_7500_585.webm 






[7600] time live:924, cumulated reward: 206.80000, loss: (3.960101, -0.018514035)
[7700] time live:1127, cumulated reward: 253.40000, loss: (3.9601016, 0.00096892973)
[7800] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.009508275)
[7900] time live:775, cumulated reward: 173.00000, loss: (3.960102, -0.0025962892)
[8000] time live:585, cumulated reward: 130.00000, loss: (3.9600968, -0.0012809125)
[MoviePy] >>>> Building video movie/ac_8000_585.webm
[MoviePy] Writing video movie/ac_8000_585.webm


100%|██████████| 587/587 [00:05<00:00, 115.18it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_8000_585.webm 






[8100] time live:585, cumulated reward: 130.00000, loss: (3.9601038, -0.00048147552)
[8200] time live:337, cumulated reward: 74.40000, loss: (3.960105, 0.027818196)
[8300] time live:2054, cumulated reward: 462.80000, loss: (3.960101, -0.0028350933)
[8400] time live:218, cumulated reward: 47.60000, loss: (3.960105, -0.059647683)
[8500] time live:585, cumulated reward: 130.00000, loss: (3.9601026, -0.010245848)
[MoviePy] >>>> Building video movie/ac_8500_585.webm
[MoviePy] Writing video movie/ac_8500_585.webm


100%|██████████| 587/587 [00:05<00:00, 110.66it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_8500_585.webm 






[8600] time live:478, cumulated reward: 106.60000, loss: (3.9601, 0.02651581)
[8700] time live:585, cumulated reward: 130.00000, loss: (3.9601004, -0.008154066)
[8800] time live:585, cumulated reward: 130.00000, loss: (3.960101, 3.1041895e-05)
[8900] time live:585, cumulated reward: 130.00000, loss: (3.9601023, 0.013205078)
[9000] time live:585, cumulated reward: 130.00000, loss: (3.9601016, 0.019484619)
[MoviePy] >>>> Building video movie/ac_9000_585.webm
[MoviePy] Writing video movie/ac_9000_585.webm


100%|██████████| 587/587 [00:05<00:00, 108.93it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_9000_585.webm 






[9100] time live:585, cumulated reward: 130.00000, loss: (3.9600985, 0.011320065)
[9200] time live:585, cumulated reward: 130.00000, loss: (3.9600983, -0.007179742)
[9300] time live:412, cumulated reward: 91.40000, loss: (3.960101, 0.017613377)
[9400] time live:133, cumulated reward: 27.60000, loss: (3.9601023, -0.024864301)
[9500] time live:889, cumulated reward: 198.80000, loss: (3.9601038, 0.00056815497)
[MoviePy] >>>> Building video movie/ac_9500_889.webm
[MoviePy] Writing video movie/ac_9500_889.webm


100%|█████████▉| 890/891 [00:08<00:00, 109.39it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_9500_889.webm 






[9600] time live:585, cumulated reward: 130.00000, loss: (3.960097, -0.008561715)
[9700] time live:1869, cumulated reward: 420.80000, loss: (3.9601018, -0.0036799598)
[9800] time live:585, cumulated reward: 130.00000, loss: (3.9601011, -0.0021369245)
[9900] time live:133, cumulated reward: 27.60000, loss: (3.9601038, -0.030558884)
[10000] time live:585, cumulated reward: 130.00000, loss: (3.9601016, -0.0185965)
[MoviePy] >>>> Building video movie/ac_10000_585.webm
[MoviePy] Writing video movie/ac_10000_585.webm


100%|██████████| 587/587 [00:05<00:00, 113.95it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_10000_585.webm 






[10100] time live:1046, cumulated reward: 235.20000, loss: (3.9601016, 0.0011410686)
[10200] time live:1340, cumulated reward: 301.00000, loss: (3.9601004, -0.004786907)
[10300] time live:585, cumulated reward: 130.00000, loss: (3.9600978, 0.012821517)
[10400] time live:585, cumulated reward: 130.00000, loss: (3.9600968, 0.024039153)
[10500] time live:585, cumulated reward: 130.00000, loss: (3.9600995, 0.048892997)
[MoviePy] >>>> Building video movie/ac_10500_585.webm
[MoviePy] Writing video movie/ac_10500_585.webm


100%|██████████| 587/587 [00:04<00:00, 117.94it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_10500_585.webm 






[10600] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.015457069)
[10700] time live:226, cumulated reward: 49.20000, loss: (3.9601033, -0.023471814)
[10800] time live:223, cumulated reward: 48.60000, loss: (3.9600978, 0.006204789)
[10900] time live:585, cumulated reward: 130.00000, loss: (3.9601045, -0.009142279)
[11000] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.026431726)
[MoviePy] >>>> Building video movie/ac_11000_585.webm
[MoviePy] Writing video movie/ac_11000_585.webm


100%|██████████| 587/587 [00:05<00:00, 113.70it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_11000_585.webm 






[11100] time live:449, cumulated reward: 99.80000, loss: (3.9600968, 0.0028329697)
[11200] time live:639, cumulated reward: 142.80000, loss: (3.9601038, -0.019312674)
[11300] time live:1694, cumulated reward: 381.80000, loss: (3.9601004, -0.0013195275)
[11400] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.007926473)
[11500] time live:585, cumulated reward: 130.00000, loss: (3.960099, -0.0008637119)
[MoviePy] >>>> Building video movie/ac_11500_585.webm
[MoviePy] Writing video movie/ac_11500_585.webm


100%|██████████| 587/587 [00:04<00:00, 119.77it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_11500_585.webm 






[11600] time live:1150, cumulated reward: 258.00000, loss: (3.9601014, -0.00066080695)
[11700] time live:672, cumulated reward: 150.40000, loss: (3.9600992, -0.008598864)
[11800] time live:2244, cumulated reward: 505.80000, loss: (3.9601004, -0.012967007)
[11900] time live:776, cumulated reward: 173.20000, loss: (3.960099, 0.017770244)
[12000] time live:179, cumulated reward: 38.80000, loss: (3.9600935, -0.028456848)
[MoviePy] >>>> Building video movie/ac_12000_179.webm
[MoviePy] Writing video movie/ac_12000_179.webm


 99%|█████████▉| 180/181 [00:01<00:00, 127.00it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_12000_179.webm 






[12100] time live:256, cumulated reward: 56.20000, loss: (3.9600997, 0.01463462)
[12200] time live:149, cumulated reward: 31.80000, loss: (3.9600964, -0.023287725)
[12300] time live:924, cumulated reward: 206.80000, loss: (3.9601016, -0.013032681)
[12400] time live:218, cumulated reward: 47.60000, loss: (3.9601004, -0.0111005595)
[12500] time live:626, cumulated reward: 139.20000, loss: (3.9600983, -0.02430172)
[MoviePy] >>>> Building video movie/ac_12500_626.webm
[MoviePy] Writing video movie/ac_12500_626.webm


100%|█████████▉| 627/628 [00:05<00:00, 113.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_12500_626.webm 






[12600] time live:1828, cumulated reward: 411.60000, loss: (3.9601004, -0.0141713675)
[12700] time live:449, cumulated reward: 99.80000, loss: (3.9601007, -0.022521943)
[12800] time live:1679, cumulated reward: 377.80000, loss: (3.9601007, -0.0111396825)
[12900] time live:215, cumulated reward: 47.00000, loss: (3.9601054, -0.028989924)
[13000] time live:585, cumulated reward: 130.00000, loss: (3.9601016, -0.00048377167)
[MoviePy] >>>> Building video movie/ac_13000_585.webm
[MoviePy] Writing video movie/ac_13000_585.webm


100%|██████████| 587/587 [00:05<00:00, 109.12it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_13000_585.webm 






[13100] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.016316114)
[13200] time live:555, cumulated reward: 124.00000, loss: (3.9600995, -0.013841443)
[13300] time live:97, cumulated reward: 19.40000, loss: (3.960086, -0.05557076)
[13400] time live:1828, cumulated reward: 411.60000, loss: (3.960101, -0.015718468)
[13500] time live:97, cumulated reward: 19.40000, loss: (3.9600856, -0.034977768)
[MoviePy] >>>> Building video movie/ac_13500_97.webm
[MoviePy] Writing video movie/ac_13500_97.webm


 99%|█████████▉| 98/99 [00:00<00:00, 136.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_13500_97.webm 






[13600] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.01517633)
[13700] time live:2054, cumulated reward: 462.80000, loss: (3.9601004, -0.0068624653)
[13800] time live:290, cumulated reward: 63.00000, loss: (3.9601011, -0.023699874)
[13900] time live:1453, cumulated reward: 326.60000, loss: (3.9601018, 0.014944432)
[14000] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.0005513024)
[MoviePy] >>>> Building video movie/ac_14000_585.webm
[MoviePy] Writing video movie/ac_14000_585.webm


100%|██████████| 587/587 [00:05<00:00, 113.80it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_14000_585.webm 






[14100] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.012770939)
[14200] time live:585, cumulated reward: 130.00000, loss: (3.9600997, 0.001748949)
[14300] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.0066332887)
[14400] time live:924, cumulated reward: 206.80000, loss: (3.9601004, 0.0013351755)
[14500] time live:1267, cumulated reward: 285.40000, loss: (3.9601004, -0.0056971577)
[MoviePy] >>>> Building video movie/ac_14500_1267.webm
[MoviePy] Writing video movie/ac_14500_1267.webm


100%|█████████▉| 1268/1269 [00:11<00:00, 107.35it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_14500_1267.webm 






[14600] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.015917588)
[14700] time live:585, cumulated reward: 130.00000, loss: (3.9600978, -0.011751463)
[14800] time live:1278, cumulated reward: 287.60000, loss: (3.9601018, -0.0175274)
[14900] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.009595188)
[15000] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.019237144)
[MoviePy] >>>> Building video movie/ac_15000_585.webm
[MoviePy] Writing video movie/ac_15000_585.webm


100%|██████████| 587/587 [00:04<00:00, 118.27it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_15000_585.webm 






[15100] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.0035254343)
[15200] time live:585, cumulated reward: 130.00000, loss: (3.9601011, 0.021175595)
[15300] time live:1340, cumulated reward: 301.00000, loss: (3.9601004, -0.017765334)
[15400] time live:811, cumulated reward: 181.20000, loss: (3.9601011, -0.015482751)
[15500] time live:811, cumulated reward: 181.20000, loss: (3.9601023, 0.0047033806)
[MoviePy] >>>> Building video movie/ac_15500_811.webm
[MoviePy] Writing video movie/ac_15500_811.webm


100%|█████████▉| 812/813 [00:07<00:00, 108.45it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_15500_811.webm 

[15600] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.017436206)
[15700] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.0072721737)
[15800] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.0030663637)
[15900] time live:585, cumulated reward: 130.00000, loss: (3.9601011, -0.002482417)
[16000] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.012273854)
[MoviePy] >>>> Building video movie/ac_16000_585.webm
[MoviePy] Writing video movie/ac_16000_585.webm


100%|██████████| 587/587 [00:05<00:00, 111.63it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_16000_585.webm 






[16100] time live:585, cumulated reward: 130.00000, loss: (3.960099, -0.0044009886)
[16200] time live:217, cumulated reward: 47.40000, loss: (3.9601004, -0.03603913)
[16300] time live:225, cumulated reward: 49.00000, loss: (3.9601007, -0.03909194)
[16400] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.018346492)
[16500] time live:924, cumulated reward: 206.80000, loss: (3.960101, -0.018896053)
[MoviePy] >>>> Building video movie/ac_16500_924.webm
[MoviePy] Writing video movie/ac_16500_924.webm


100%|█████████▉| 925/926 [00:08<00:00, 109.67it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_16500_924.webm 






[16600] time live:698, cumulated reward: 155.60000, loss: (3.9601, -0.010477762)
[16700] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.01866663)
[16800] time live:2054, cumulated reward: 462.80000, loss: (3.960101, -0.009478978)
[16900] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.025188623)
[17000] time live:1340, cumulated reward: 301.00000, loss: (3.9601004, 0.01642867)
[MoviePy] >>>> Building video movie/ac_17000_1340.webm
[MoviePy] Writing video movie/ac_17000_1340.webm


100%|█████████▉| 1341/1342 [00:11<00:00, 111.77it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_17000_1340.webm 






[17100] time live:924, cumulated reward: 206.80000, loss: (3.960101, -0.0016978336)
[17200] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.00067154446)
[17300] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.011954938)
[17400] time live:133, cumulated reward: 27.60000, loss: (3.9600987, -0.07355144)
[17500] time live:585, cumulated reward: 130.00000, loss: (3.9601018, -0.01359577)
[MoviePy] >>>> Building video movie/ac_17500_585.webm
[MoviePy] Writing video movie/ac_17500_585.webm


100%|██████████| 587/587 [00:05<00:00, 112.35it/s]

[MoviePy] Done.





[MoviePy] >>>> Video ready: movie/ac_17500_585.webm 

[17600] time live:1969, cumulated reward: 443.80000, loss: (3.9601, -0.0046341955)
[17700] time live:1114, cumulated reward: 249.80000, loss: (3.9601007, 0.0012482068)
[17800] time live:585, cumulated reward: 130.00000, loss: (3.9601016, 0.023652282)
[17900] time live:924, cumulated reward: 206.80000, loss: (3.9601002, 0.0015613202)
[18000] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.0052289)
[MoviePy] >>>> Building video movie/ac_18000_2470.webm
[MoviePy] Writing video movie/ac_18000_2470.webm


100%|█████████▉| 2471/2472 [00:21<00:00, 115.39it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_18000_2470.webm 






[18100] time live:811, cumulated reward: 181.20000, loss: (3.960101, -0.0054104356)
[18200] time live:585, cumulated reward: 130.00000, loss: (3.9601016, 0.008303066)
[18300] time live:2223, cumulated reward: 501.60000, loss: (3.9600995, -0.0030704609)
[18400] time live:437, cumulated reward: 96.40000, loss: (3.9600976, -0.02137592)
[18500] time live:899, cumulated reward: 201.80000, loss: (3.9601016, -0.006260173)
[MoviePy] >>>> Building video movie/ac_18500_899.webm
[MoviePy] Writing video movie/ac_18500_899.webm


100%|█████████▉| 900/901 [00:07<00:00, 114.26it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_18500_899.webm 






[18600] time live:2470, cumulated reward: 557.00000, loss: (3.9601007, -0.0048158257)
[18700] time live:2470, cumulated reward: 557.00000, loss: (3.9601, -0.0047637806)
[18800] time live:811, cumulated reward: 181.20000, loss: (3.9600997, -0.008390557)
[18900] time live:924, cumulated reward: 206.80000, loss: (3.9600985, -0.017170446)
[19000] time live:585, cumulated reward: 130.00000, loss: (3.9601004, 0.0053377002)
[MoviePy] >>>> Building video movie/ac_19000_585.webm
[MoviePy] Writing video movie/ac_19000_585.webm


100%|██████████| 587/587 [00:05<00:00, 114.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_19000_585.webm 






[19100] time live:788, cumulated reward: 176.60000, loss: (3.9601007, -0.007344268)
[19200] time live:628, cumulated reward: 139.60000, loss: (3.9600997, -0.0084621515)
[19300] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.011532348)
[19400] time live:2244, cumulated reward: 505.80000, loss: (3.9601011, -0.005546744)
[19500] time live:811, cumulated reward: 181.20000, loss: (3.9601002, -0.016525574)
[MoviePy] >>>> Building video movie/ac_19500_811.webm
[MoviePy] Writing video movie/ac_19500_811.webm


100%|█████████▉| 812/813 [00:07<00:00, 113.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_19500_811.webm 






[19600] time live:2470, cumulated reward: 557.00000, loss: (3.9601, -0.011344834)
[19700] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.01176291)
[19800] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.010378312)
[19900] time live:924, cumulated reward: 206.80000, loss: (3.960101, -0.027292268)
[20000] time live:187, cumulated reward: 40.40000, loss: (3.9601026, -0.05461006)
[MoviePy] >>>> Building video movie/ac_20000_187.webm
[MoviePy] Writing video movie/ac_20000_187.webm


 99%|█████████▉| 188/189 [00:01<00:00, 127.62it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_20000_187.webm 

[20100] time live:924, cumulated reward: 206.80000, loss: (3.9601002, -0.0056354296)
[20200] time live:585, cumulated reward: 130.00000, loss: (3.9600985, 0.0027351251)
[20300] time live:585, cumulated reward: 130.00000, loss: (3.9600997, 0.006064931)
[20400] time live:811, cumulated reward: 181.20000, loss: (3.9601002, 0.00067443826)
[20500] time live:585, cumulated reward: 130.00000, loss: (3.9600997, -0.015038892)
[MoviePy] >>>> Building video movie/ac_20500_585.webm
[MoviePy] Writing video movie/ac_20500_585.webm


100%|██████████| 587/587 [00:05<00:00, 111.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_20500_585.webm 






[20600] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.012166785)
[20700] time live:1828, cumulated reward: 411.60000, loss: (3.9601007, -0.0019683966)
[20800] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.0120761655)
[20900] time live:2434, cumulated reward: 548.80000, loss: (3.9601, -0.0056829085)
[21000] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.011945135)
[MoviePy] >>>> Building video movie/ac_21000_2470.webm
[MoviePy] Writing video movie/ac_21000_2470.webm


100%|█████████▉| 2471/2472 [00:21<00:00, 114.39it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_21000_2470.webm 






[21100] time live:440, cumulated reward: 98.00000, loss: (3.9601014, -0.039402388)
[21200] time live:585, cumulated reward: 130.00000, loss: (3.960101, 0.008918759)
[21300] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.008436899)
[21400] time live:585, cumulated reward: 130.00000, loss: (3.9600995, 0.002262532)
[21500] time live:749, cumulated reward: 167.80000, loss: (3.9601011, -0.005258939)
[MoviePy] >>>> Building video movie/ac_21500_749.webm
[MoviePy] Writing video movie/ac_21500_749.webm


100%|█████████▉| 750/751 [00:06<00:00, 112.24it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_21500_749.webm 






[21600] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.019595731)
[21700] time live:1828, cumulated reward: 411.60000, loss: (3.9601007, -0.0018443635)
[21800] time live:924, cumulated reward: 206.80000, loss: (3.9601002, -0.0049444274)
[21900] time live:585, cumulated reward: 130.00000, loss: (3.9601018, -0.0016854751)
[22000] time live:777, cumulated reward: 173.40000, loss: (3.9600985, -0.01616166)
[MoviePy] >>>> Building video movie/ac_22000_777.webm
[MoviePy] Writing video movie/ac_22000_777.webm


100%|█████████▉| 778/779 [00:06<00:00, 111.65it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_22000_777.webm 






[22100] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.017310636)
[22200] time live:1798, cumulated reward: 405.60000, loss: (3.9600985, -0.01085139)
[22300] time live:1453, cumulated reward: 326.60000, loss: (3.960102, -0.005185653)
[22400] time live:585, cumulated reward: 130.00000, loss: (3.960099, -0.0017993947)
[22500] time live:632, cumulated reward: 141.40000, loss: (3.9601004, -0.012606781)
[MoviePy] >>>> Building video movie/ac_22500_632.webm
[MoviePy] Writing video movie/ac_22500_632.webm


100%|█████████▉| 633/634 [00:05<00:00, 109.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_22500_632.webm 






[22600] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, 0.0011432893)
[22700] time live:585, cumulated reward: 130.00000, loss: (3.9600978, -0.0050306832)
[22800] time live:2470, cumulated reward: 557.00000, loss: (3.9601, 0.0041891434)
[22900] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.004546643)
[23000] time live:585, cumulated reward: 130.00000, loss: (3.9601023, -0.030133897)
[MoviePy] >>>> Building video movie/ac_23000_585.webm
[MoviePy] Writing video movie/ac_23000_585.webm


100%|██████████| 587/587 [00:05<00:00, 105.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_23000_585.webm 






[23100] time live:585, cumulated reward: 130.00000, loss: (3.9600997, -0.017484011)
[23200] time live:924, cumulated reward: 206.80000, loss: (3.9600997, 0.0155414995)
[23300] time live:2470, cumulated reward: 557.00000, loss: (3.9600997, -0.010738457)
[23400] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.0027843195)
[23500] time live:585, cumulated reward: 130.00000, loss: (3.9600997, -0.031288788)
[MoviePy] >>>> Building video movie/ac_23500_585.webm
[MoviePy] Writing video movie/ac_23500_585.webm


100%|██████████| 587/587 [00:05<00:00, 111.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_23500_585.webm 






[23600] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.020074375)
[23700] time live:1679, cumulated reward: 377.80000, loss: (3.9601007, -0.014814526)
[23800] time live:1227, cumulated reward: 275.40000, loss: (3.9601007, -0.0025545105)
[23900] time live:410, cumulated reward: 91.00000, loss: (3.9601016, -0.023614157)
[24000] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -0.010620005)
[MoviePy] >>>> Building video movie/ac_24000_585.webm
[MoviePy] Writing video movie/ac_24000_585.webm


100%|██████████| 587/587 [00:05<00:00, 116.65it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_24000_585.webm 






[24100] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.016516883)
[24200] time live:775, cumulated reward: 173.00000, loss: (3.9601, -0.022410834)
[24300] time live:1390, cumulated reward: 313.00000, loss: (3.960101, 0.004607364)
[24400] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.006390537)
[24500] time live:1264, cumulated reward: 283.80000, loss: (3.9601, 0.0009040327)
[MoviePy] >>>> Building video movie/ac_24500_1264.webm
[MoviePy] Writing video movie/ac_24500_1264.webm


100%|█████████▉| 1265/1266 [00:12<00:00, 102.53it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_24500_1264.webm 






[24600] time live:777, cumulated reward: 173.40000, loss: (3.9601004, -0.006152709)
[24700] time live:2434, cumulated reward: 548.80000, loss: (3.9601004, -0.009558238)
[24800] time live:585, cumulated reward: 130.00000, loss: (3.960099, -0.013928379)
[24900] time live:585, cumulated reward: 130.00000, loss: (3.9601011, -0.008235067)
[25000] time live:585, cumulated reward: 130.00000, loss: (3.9600983, 0.0059904065)
[MoviePy] >>>> Building video movie/ac_25000_585.webm
[MoviePy] Writing video movie/ac_25000_585.webm


100%|██████████| 587/587 [00:05<00:00, 113.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_25000_585.webm 






[25100] time live:1828, cumulated reward: 411.60000, loss: (3.960099, -0.0014503599)
[25200] time live:1869, cumulated reward: 420.80000, loss: (3.9601004, 0.00552051)
[25300] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.00415042)
[25400] time live:2018, cumulated reward: 454.60000, loss: (3.9601002, -0.0050974004)
[25500] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.016314419)
[MoviePy] >>>> Building video movie/ac_25500_585.webm
[MoviePy] Writing video movie/ac_25500_585.webm


100%|██████████| 587/587 [00:05<00:00, 110.45it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_25500_585.webm 






[25600] time live:811, cumulated reward: 181.20000, loss: (3.9601004, -0.0035116472)
[25700] time live:776, cumulated reward: 173.20000, loss: (3.9600997, -0.01187116)
[25800] time live:2434, cumulated reward: 548.80000, loss: (3.9601, -0.0019853136)
[25900] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.006954911)
[26000] time live:699, cumulated reward: 155.80000, loss: (3.9600997, -0.013119658)
[MoviePy] >>>> Building video movie/ac_26000_699.webm
[MoviePy] Writing video movie/ac_26000_699.webm


100%|█████████▉| 700/701 [00:06<00:00, 116.21it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_26000_699.webm 






[26100] time live:585, cumulated reward: 130.00000, loss: (3.9601002, 0.0041480265)
[26200] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.024270369)
[26300] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.00010900498)
[26400] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.0044454667)
[26500] time live:477, cumulated reward: 106.40000, loss: (3.9601011, 0.0071935933)
[MoviePy] >>>> Building video movie/ac_26500_477.webm
[MoviePy] Writing video movie/ac_26500_477.webm


100%|█████████▉| 478/479 [00:04<00:00, 114.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_26500_477.webm 






[26600] time live:585, cumulated reward: 130.00000, loss: (3.9600997, -0.00021426739)
[26700] time live:1869, cumulated reward: 420.80000, loss: (3.9601004, -0.0071312827)
[26800] time live:76, cumulated reward: 15.20000, loss: (3.960106, -0.11318357)
[26900] time live:1648, cumulated reward: 371.60000, loss: (3.9601002, -0.008740429)
[27000] time live:811, cumulated reward: 181.20000, loss: (3.9601016, -0.01818573)
[MoviePy] >>>> Building video movie/ac_27000_811.webm
[MoviePy] Writing video movie/ac_27000_811.webm


100%|█████████▉| 812/813 [00:07<00:00, 108.86it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_27000_811.webm 






[27100] time live:2054, cumulated reward: 462.80000, loss: (3.9601004, -0.010001015)
[27200] time live:1227, cumulated reward: 275.40000, loss: (3.9600995, -0.009698645)
[27300] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, 0.00439627)
[27400] time live:1269, cumulated reward: 285.80000, loss: (3.9601002, -0.013994195)
[27500] time live:2470, cumulated reward: 557.00000, loss: (3.9601007, 0.0013729637)
[MoviePy] >>>> Building video movie/ac_27500_2470.webm
[MoviePy] Writing video movie/ac_27500_2470.webm


100%|█████████▉| 2471/2472 [00:21<00:00, 114.41it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_27500_2470.webm 






[27600] time live:1453, cumulated reward: 326.60000, loss: (3.9601, -0.008816071)
[27700] time live:924, cumulated reward: 206.80000, loss: (3.9600997, -0.002673211)
[27800] time live:213, cumulated reward: 45.60000, loss: (3.9601014, -0.0017162747)
[27900] time live:585, cumulated reward: 130.00000, loss: (3.9600983, 0.005540336)
[28000] time live:811, cumulated reward: 181.20000, loss: (3.9600997, 0.0035750063)
[MoviePy] >>>> Building video movie/ac_28000_811.webm
[MoviePy] Writing video movie/ac_28000_811.webm


100%|█████████▉| 812/813 [00:07<00:00, 110.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_28000_811.webm 






[28100] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.0089870095)
[28200] time live:2470, cumulated reward: 557.00000, loss: (3.9600997, 0.009208405)
[28300] time live:2244, cumulated reward: 505.80000, loss: (3.9601004, 0.00034777835)
[28400] time live:585, cumulated reward: 130.00000, loss: (3.9600978, -0.018924594)
[28500] time live:811, cumulated reward: 181.20000, loss: (3.9601002, 0.011162569)
[MoviePy] >>>> Building video movie/ac_28500_811.webm
[MoviePy] Writing video movie/ac_28500_811.webm


100%|█████████▉| 812/813 [00:07<00:00, 110.67it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_28500_811.webm 






[28600] time live:585, cumulated reward: 130.00000, loss: (3.9601004, -0.014705191)
[28700] time live:2470, cumulated reward: 557.00000, loss: (3.9601, -0.005077964)
[28800] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.0063100443)
[28900] time live:585, cumulated reward: 130.00000, loss: (3.9600985, -0.0121510625)
[29000] time live:2470, cumulated reward: 557.00000, loss: (3.9601004, -0.0059301485)
[MoviePy] >>>> Building video movie/ac_29000_2470.webm
[MoviePy] Writing video movie/ac_29000_2470.webm


100%|█████████▉| 2471/2472 [00:21<00:00, 116.12it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_29000_2470.webm 






[29100] time live:585, cumulated reward: 130.00000, loss: (3.9601004, -0.014816881)
[29200] time live:585, cumulated reward: 130.00000, loss: (3.960101, -0.018328182)
[29300] time live:585, cumulated reward: 130.00000, loss: (3.960101, 0.0032681122)
[29400] time live:585, cumulated reward: 130.00000, loss: (3.9601004, 0.0021408733)
[29500] time live:585, cumulated reward: 130.00000, loss: (3.9601002, -0.027400954)
[MoviePy] >>>> Building video movie/ac_29500_585.webm
[MoviePy] Writing video movie/ac_29500_585.webm


100%|██████████| 587/587 [00:04<00:00, 118.89it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_29500_585.webm 






[29600] time live:2434, cumulated reward: 548.80000, loss: (3.9601, 0.0024035084)
[29700] time live:2434, cumulated reward: 548.80000, loss: (3.9601, -0.003987331)
[29800] time live:2470, cumulated reward: 557.00000, loss: (3.9601007, -0.008694058)
[29900] time live:811, cumulated reward: 181.20000, loss: (3.960101, -0.022795625)
[30000] time live:585, cumulated reward: 130.00000, loss: (3.9600995, -8.446966e-05)
[MoviePy] >>>> Building video movie/ac_30000_585.webm
[MoviePy] Writing video movie/ac_30000_585.webm


100%|██████████| 587/587 [00:05<00:00, 117.06it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/ac_30000_585.webm 






# Report 

I found it difficult to train. Tried a lot of different hyper parameters and didn't find a stable combination. Maybe it's the nature of RL: unable to reproduce the result. I've get a time live 2470!!! I think it's the upp

I have spent 2 days entirely for tweaking the hyper parameters...I have question about the exploration part. The code has `exploration ratio` but not using it at all. This seems weired...

Comparing with the CNN one, IMO, it's more diffult to make the model converge. I use only the info of next-pipe, no next-next pipe. According to the visualization, I think I've achieve the upper bound of using such data. The place where the bird fails is a sudden change of pipe position.

In [17]:
from IPython.display import HTML
from base64 import b64encode

with open('./movie/ac_29000_2470.webm', 'rb') as f:
    b64 = b64encode(f.read()).decode()
HTML(data='''<video controls> 
<source type="video/webm" src="data:video/webm;base64,{}">
</video>
'''.format(b64))