In [1]:
%matplotlib inline

import gym
from gym.wrappers import Monitor
import itertools
import numpy as np
import os
import random
import sys
import tensorflow as tf

if "../" not in sys.path:
    sys.path.append("../")

from lib import plotting
from collections import deque, namedtuple

  return f(*args, **kwds)


In [2]:
env = gym.envs.make("Breakout-v0")

[2018-01-12 14:03:03,961] Making new env: Breakout-v0


In [3]:
# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions
VALID_ACTIONS = [0, 1, 2, 3]

In [4]:
class StateProcessor():
    """
    Processes a raw Atari images. Resizes it and converts it to grayscale.
    """
    def __init__(self):
        # Build the Tensorflow graph
        with tf.variable_scope("state_processor"):
            self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)
            self.output = tf.image.rgb_to_grayscale(self.input_state)
            self.output = tf.image.crop_to_bounding_box(self.output, 34, 0, 160, 160)
            self.output = tf.image.resize_images(
                self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.output = tf.squeeze(self.output)

    def process(self, sess, state):
        """
        Args:
            sess: A Tensorflow session object
            state: A [210, 160, 3] Atari RGB State

        Returns:
            A processed [84, 84, 1] state representing grayscale values.
        """
        return sess.run(self.output, { self.input_state: state })

In [5]:
class Estimator():
    """Q-Value Estimator neural network.

    This network is used for both the Q-Network and the Target Network.
    """

    def __init__(self, scope="estimator", summaries_dir=None):
        self.scope = scope
        # Writes Tensorboard summaries to disk
        self.summary_writer = None
        with tf.variable_scope(scope):
            # Build the graph
            self._build_model()
            if summaries_dir:
                summary_dir = os.path.join(summaries_dir, "summaries_{}".format(scope))
                if not os.path.exists(summary_dir):
                    os.makedirs(summary_dir)
                self.summary_writer = tf.summary.FileWriter(summary_dir)

    def _build_model(self):
        """
        Builds the Tensorflow graph.
        """

        # Placeholders for our input
        # Our input are 4 RGB frames of shape 160, 160 each
        self.X_pl = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X")
        # The TD target value
        self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
        # Integer id of which action was selected
        self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")

        X = tf.to_float(self.X_pl) / 255.0
        batch_size = tf.shape(self.X_pl)[0]

        # Three convolutional layers
        conv1 = tf.contrib.layers.conv2d(
            X, 32, 8, 4, activation_fn=tf.nn.relu)
        conv2 = tf.contrib.layers.conv2d(
            conv1, 64, 4, 2, activation_fn=tf.nn.relu)
        conv3 = tf.contrib.layers.conv2d(
            conv2, 64, 3, 1, activation_fn=tf.nn.relu)

        # Fully connected layers
        flattened = tf.contrib.layers.flatten(conv3)
        fc1 = tf.contrib.layers.fully_connected(flattened, 512)
        self.predictions = tf.contrib.layers.fully_connected(fc1, len(VALID_ACTIONS))

        # Get the predictions for the chosen actions only
        gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1] + self.actions_pl
        self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices)

        # Calcualte the loss
        self.losses = tf.squared_difference(self.y_pl, self.action_predictions)
        self.loss = tf.reduce_mean(self.losses)

        # Optimizer Parameters from original paper
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())

        # Summaries for Tensorboard
        self.summaries = tf.summary.merge([
            tf.summary.scalar("loss", self.loss),
            tf.summary.histogram("loss_hist", self.losses),
            tf.summary.histogram("q_values_hist", self.predictions),
            tf.summary.scalar("max_q_value", tf.reduce_max(self.predictions))
        ])


    def predict(self, sess, s):
        """
        Predicts action values.

        Args:
          sess: Tensorflow session
          s: State input of shape [batch_size, 4, 160, 160, 3]

        Returns:
          Tensor of shape [batch_size, NUM_VALID_ACTIONS] containing the estimated 
          action values.
        """
        return sess.run(self.predictions, { self.X_pl: s })

    def update(self, sess, s, a, y):
        """
        Updates the estimator towards the given targets.

        Args:
          sess: Tensorflow session object
          s: State input of shape [batch_size, 4, 160, 160, 3]
          a: Chosen actions of shape [batch_size]
          y: Targets of shape [batch_size]

        Returns:
          The calculated loss on the batch.
        """
        feed_dict = { self.X_pl: s, self.y_pl: y, self.actions_pl: a }
        summaries, global_step, _, loss = sess.run(
            [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],
            feed_dict)
        if self.summary_writer:
            self.summary_writer.add_summary(summaries, global_step)
        return loss

In [6]:
# For Testing....

tf.reset_default_graph()
global_step = tf.Variable(0, name="global_step", trainable=False)

e = Estimator(scope="test")
sp = StateProcessor()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Example observation batch
    observation = env.reset()
    
    observation_p = sp.process(sess, observation)
    observation = np.stack([observation_p] * 4, axis=2)
    observations = np.array([observation] * 2)
    
    # Test Prediction
    print(e.predict(sess, observations))

    # Test training step
    y = np.array([10.0, 10.0])
    a = np.array([1, 3])
    print(e.update(sess, observations, a, y))

Instructions for updating:
Please switch to tf.train.get_global_step


[2018-01-12 14:03:09,356] From <ipython-input-5-d6e4720ffac5>:59: get_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.get_global_step
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


[[ 0.02933731  0.02895204  0.          0.        ]
 [ 0.02933731  0.02895204  0.          0.        ]]
99.7109


In [7]:
def copy_model_parameters(sess, estimator1, estimator2):
    """
    Copies the model parameters of one estimator to another.

    Args:
      sess: Tensorflow session instance
      estimator1: Estimator to copy the paramters from
      estimator2: Estimator to copy the parameters to
    """
    e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
    e1_params = sorted(e1_params, key=lambda v: v.name)
    e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
    e2_params = sorted(e2_params, key=lambda v: v.name)

    update_ops = []
    for e1_v, e2_v in zip(e1_params, e2_params):
        op = e2_v.assign(e1_v)
        update_ops.append(op)

    sess.run(update_ops)

In [8]:
def make_epsilon_greedy_policy(estimator, nA):
    """
    Creates an epsilon-greedy policy based on a given Q-function approximator and epsilon.

    Args:
        estimator: An estimator that returns q values for a given state
        nA: Number of actions in the environment.

    Returns:
        A function that takes the (sess, observation, epsilon) as an argument and returns
        the probabilities for each action in the form of a numpy array of length nA.

    """
    def policy_fn(sess, observation, epsilon):
        A = np.ones(nA, dtype=float) * epsilon / nA
        q_values = estimator.predict(sess, np.expand_dims(observation, 0))[0]
        best_action = np.argmax(q_values)
        A[best_action] += (1.0 - epsilon)
        return A
    return policy_fn

In [9]:
def deep_q_learning(sess,
                    env,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    num_episodes,
                    experiment_dir,
                    replay_memory_size=500000,
                    replay_memory_init_size=50000,
                    update_target_estimator_every=10000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=500000,
                    batch_size=32,
                    record_video_every=50):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.

    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing 
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the 
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes

    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """

    Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])

    # The replay memory
    replay_memory = []

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    monitor_path = os.path.join(experiment_dir, "monitor")

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)

    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)
    
    # Get the current time step
    total_t = sess.run(tf.train.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(
        q_estimator,
        len(VALID_ACTIONS))

    # Populate the replay memory with initial experience
    print("Populating replay memory...")
    state = env.reset()
    state = state_processor.process(sess, state)
    for i in range(replay_memory_init_size):
        print("\r@ Step {}/{}".format(
                    i + 1, replay_memory_init_size), end="")
        sys.stdout.flush()
        prob = policy(sess, np.stack([state] * 4, axis=2), epsilon_start)
        action = np.random.choice(range(len(VALID_ACTIONS)), p=prob)
        next_state, reward, done, _ = env.step(action)
        next_state = state_processor.process(sess, next_state)
        replay_memory.append(Transition(state, action, reward, next_state, done))
        if done:
            state = env.reset()
            state = state_processor.process(sess, state)
        else:
            state = next_state

    # Record videos
    env= Monitor(env,
                 directory=monitor_path,
                 resume=True,
                 video_callable=lambda count: count % record_video_every == 0)

    for i_episode in range(num_episodes):

        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)

        # Reset the environment
        state = env.reset()
        state = state_processor.process(sess, state)
#         state = np.stack([state] * 4, axis=2)
        loss = None

        # One step in the environment
        for t in itertools.count():

            # Epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]

            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag="epsilon")
            q_estimator.summary_writer.add_summary(episode_summary, total_t)

            # TODO: Maybe update the target estimator
            if total_t % update_target_estimator_every == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)

            # Print out which step we're on, useful for debugging.
            print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                    t, total_t, i_episode + 1, num_episodes, loss), end="")
            sys.stdout.flush()

            # Take a step in the environment
            prob = policy(sess, np.stack([state]*4, axis=2), epsilon)
            action = np.random.choice(range(len(VALID_ACTIONS)), p=prob)
            next_state, reward, done, _ = env.step(action)
            next_state = state_processor.process(sess, next_state)
            replay_memory.append(Transition(state, action, reward, next_state, done))

            # If our replay memory is full, pop the first element
            if len(replay_memory) == replay_memory_size:
                replay_memory.pop(0)

            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] = t

            # TODO: Sample a minibatch from the replay memory
            # TODO: Calculate q values and targets
            # TODO Perform gradient descent update

            sample_transitions = random.sample(replay_memory, batch_size)

            Q_ns = target_estimator.predict(sess,
                                            np.array([np.stack([s.next_state]*4, axis=2)
                                                      for s in sample_transitions]))
            rewards = np.array([s.reward for s in sample_transitions])
            dones = np.array([s.done for s in sample_transitions])
            targets = rewards + discount_factor*np.max(Q_ns, axis=1)
            targets[dones] = rewards[dones]
            loss = q_estimator.update(sess,
                               np.array([np.stack([s.state]*4, axis=2) for s in sample_transitions]),
                               np.array([s.action for s in sample_transitions]),
                               targets
                              )

            if done:
                break

            state = next_state
            total_t += 1

        # Add summaries to tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward")
        episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length")
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])

    env.close()
    return stats

In [10]:
tf.reset_default_graph()

# Where we save our checkpoints and graphs
experiment_dir = os.path.abspath("./experiments/{}".format(env.spec.id))

# Create a glboal step variable
global_step = tf.Variable(0, name='global_step', trainable=False)

# Create estimators
q_estimator = Estimator(scope="q", summaries_dir=experiment_dir)
target_estimator = Estimator(scope="target_q")

# State processor
state_processor = StateProcessor()

# Run it!
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    for t, stats in deep_q_learning(sess,
                                    env,
                                    q_estimator=q_estimator,
                                    target_estimator=target_estimator,
                                    state_processor=state_processor,
                                    experiment_dir=experiment_dir,
                                    num_episodes=2500,
                                    replay_memory_size=500000,
                                    replay_memory_init_size=50000,
                                    update_target_estimator_every=10000,
                                    epsilon_start=1.0,
                                    epsilon_end=0.1,
                                    epsilon_decay_steps=500000,
                                    discount_factor=0.99,
                                    batch_size=32):
#     for t, stats in deep_q_learning(sess,
#                                     env,
#                                     q_estimator=q_estimator,
#                                     target_estimator=target_estimator,
#                                     state_processor=state_processor,
#                                     experiment_dir=experiment_dir,
#                                     num_episodes=0,
#                                     replay_memory_size=500,
#                                     replay_memory_init_size=50,
#                                     update_target_estimator_every=10,
#                                     epsilon_start=1.0,
#                                     epsilon_end=0.1,
#                                     epsilon_decay_steps=5,
#                                     discount_factor=0.99,
#                                     batch_size=32):

        print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Instructions for updating:
Use `tf.global_variables_initializer` instead.


[2018-01-12 14:03:12,781] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/tf_should_use.py:107: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.


Loading model checkpoint /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/checkpoints/model...

INFO:tensorflow:Restoring parameters from /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/checkpoints/model


[2018-01-12 14:03:13,355] Restoring parameters from /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/checkpoints/model


Populating replay memory...
@ Step 50000/50000

[2018-01-12 14:12:29,139] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000000.mp4


Step 482 (1411403) @ Episode 1/2500, loss: 0.0103228325024247175
Episode Reward: 3.0
Step 352 (1411755) @ Episode 2/2500, loss: 0.0083486642688512845
Episode Reward: 3.0
Step 429 (1412184) @ Episode 3/2500, loss: 0.0101576531305909162
Episode Reward: 4.0
Step 569 (1412753) @ Episode 4/2500, loss: 0.0190174505114555365
Episode Reward: 4.0
Step 495 (1413248) @ Episode 5/2500, loss: 0.0130521524697542197
Episode Reward: 4.0
Step 495 (1413743) @ Episode 6/2500, loss: 0.0175666287541389473
Episode Reward: 3.0
Step 426 (1414169) @ Episode 7/2500, loss: 0.0121017359197139745
Episode Reward: 4.0
Step 336 (1414505) @ Episode 8/2500, loss: 0.0151020158082246784
Episode Reward: 2.0
Step 227 (1414732) @ Episode 9/2500, loss: 0.0043418207205832005
Episode Reward: 0.0
Step 242 (1414974) @ Episode 10/2500, loss: 0.0042044222354888922
Episode Reward: 1.0
Step 471 (1415445) @ Episode 11/2500, loss: 0.0097195189446210865
Episode Reward: 4.0
Step 546 (1415991) @ Episode 12/2500, loss: 0.27448084950447084

[2018-01-12 14:50:41,943] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000050.mp4


Step 375 (1432501) @ Episode 51/2500, loss: 0.0037570721469819546
Episode Reward: 3.0
Step 383 (1432884) @ Episode 52/2500, loss: 0.0075995679944753656
Episode Reward: 3.0
Step 452 (1433336) @ Episode 53/2500, loss: 0.0120565481483936315
Episode Reward: 5.0
Step 376 (1433712) @ Episode 54/2500, loss: 0.0065718628466129395
Episode Reward: 3.0
Step 342 (1434054) @ Episode 55/2500, loss: 0.0044920328073203565
Episode Reward: 3.0
Step 278 (1434332) @ Episode 56/2500, loss: 0.0054495329968631275
Episode Reward: 2.0
Step 488 (1434820) @ Episode 57/2500, loss: 0.0032777022570371628
Episode Reward: 5.0
Step 314 (1435134) @ Episode 58/2500, loss: 0.2480518072843551635
Episode Reward: 3.0
Step 221 (1435355) @ Episode 59/2500, loss: 0.0042437664233148123
Episode Reward: 1.0
Step 449 (1435804) @ Episode 60/2500, loss: 0.0037320833653211594
Episode Reward: 5.0
Step 539 (1436343) @ Episode 61/2500, loss: 0.1221672147512435958
Episode Reward: 3.0
Step 641 (1436984) @ Episode 62/2500, loss: 0.00619023

[2018-01-12 15:27:52,310] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000100.mp4


Step 367 (1453037) @ Episode 101/2500, loss: 0.2611456215381622395
Episode Reward: 3.0
Step 352 (1453389) @ Episode 102/2500, loss: 0.0070146834477782254
Episode Reward: 3.0
Step 405 (1453794) @ Episode 103/2500, loss: 0.0072993570938706465
Episode Reward: 4.0
Step 323 (1454117) @ Episode 104/2500, loss: 0.0254225321114063267
Episode Reward: 2.0
Step 261 (1454378) @ Episode 105/2500, loss: 0.0050727976486086845
Episode Reward: 1.0
Step 376 (1454754) @ Episode 106/2500, loss: 0.0041808560490608215
Episode Reward: 4.0
Step 317 (1455071) @ Episode 107/2500, loss: 0.0102879013866186144
Episode Reward: 2.0
Step 355 (1455426) @ Episode 108/2500, loss: 0.0164159592241048875
Episode Reward: 3.0
Step 320 (1455746) @ Episode 109/2500, loss: 0.1023698523640632686
Episode Reward: 3.0
Step 442 (1456188) @ Episode 110/2500, loss: 0.0226458273828029636
Episode Reward: 4.0
Step 393 (1456581) @ Episode 111/2500, loss: 0.0175512246787548075
Episode Reward: 3.0
Step 341 (1456922) @ Episode 112/2500, loss

[2018-01-12 16:01:16,352] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000150.mp4


Step 349 (1471089) @ Episode 151/2500, loss: 0.0077546313405036935
Episode Reward: 4.0
Step 380 (1471469) @ Episode 152/2500, loss: 0.0064271991141140465
Episode Reward: 4.0
Step 291 (1471760) @ Episode 153/2500, loss: 0.0096812639385461876
Episode Reward: 2.0
Step 509 (1472269) @ Episode 154/2500, loss: 0.0028244014829397235
Episode Reward: 4.0
Step 294 (1472563) @ Episode 155/2500, loss: 0.0116036292165517815
Episode Reward: 2.0
Step 236 (1472799) @ Episode 156/2500, loss: 0.0040653413161635465
Episode Reward: 1.0
Step 316 (1473115) @ Episode 157/2500, loss: 0.0073161786422133455
Episode Reward: 2.0
Step 292 (1473407) @ Episode 158/2500, loss: 0.0059024705551564697
Episode Reward: 3.0
Step 479 (1473886) @ Episode 159/2500, loss: 0.0575007572770118725
Episode Reward: 6.0
Step 428 (1474314) @ Episode 160/2500, loss: 0.0106258448213338856
Episode Reward: 4.0
Step 316 (1474630) @ Episode 161/2500, loss: 0.0141163580119609834
Episode Reward: 4.0
Step 431 (1475061) @ Episode 162/2500, loss

[2018-01-12 16:36:36,089] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000200.mp4


Step 263 (1489510) @ Episode 201/2500, loss: 0.0147779723629355435
Episode Reward: 1.0
Step 400 (1489910) @ Episode 202/2500, loss: 0.0099155846983194355
Episode Reward: 4.0
Step 398 (1490308) @ Episode 203/2500, loss: 0.0132350008934736255
Episode Reward: 4.0
Step 420 (1490728) @ Episode 204/2500, loss: 0.0314476527273654946
Episode Reward: 4.0
Step 308 (1491036) @ Episode 205/2500, loss: 0.0216068513691425328
Episode Reward: 3.0
Step 279 (1491315) @ Episode 206/2500, loss: 0.0105506442487239846
Episode Reward: 2.0
Step 333 (1491648) @ Episode 207/2500, loss: 0.0080774445086717625
Episode Reward: 3.0
Step 223 (1491871) @ Episode 208/2500, loss: 0.0473830327391624455
Episode Reward: 1.0
Step 388 (1492259) @ Episode 209/2500, loss: 0.0102135110646486286
Episode Reward: 4.0
Step 311 (1492570) @ Episode 210/2500, loss: 0.0063206925988197335
Episode Reward: 2.0
Step 310 (1492880) @ Episode 211/2500, loss: 0.0057586329057812694
Episode Reward: 2.0
Step 272 (1493152) @ Episode 212/2500, loss

[2018-01-12 17:09:19,197] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000250.mp4


Step 315 (1507018) @ Episode 251/2500, loss: 0.0063853939063847065
Episode Reward: 2.0
Step 630 (1507648) @ Episode 252/2500, loss: 0.0172880087047815325
Episode Reward: 8.0
Step 486 (1508134) @ Episode 253/2500, loss: 0.0192913636565208445
Episode Reward: 6.0
Step 427 (1508561) @ Episode 254/2500, loss: 0.0114806154742836955
Episode Reward: 7.0
Step 373 (1508934) @ Episode 255/2500, loss: 0.2569680511951446574
Episode Reward: 3.0
Step 336 (1509270) @ Episode 256/2500, loss: 0.0365872867405414665
Episode Reward: 3.0
Step 358 (1509628) @ Episode 257/2500, loss: 0.0209040585905313595
Episode Reward: 3.0
Step 321 (1509949) @ Episode 258/2500, loss: 0.0091123767197132114
Episode Reward: 3.0
Step 290 (1510239) @ Episode 259/2500, loss: 0.0456504300236702354
Episode Reward: 2.0
Step 456 (1510695) @ Episode 260/2500, loss: 0.0082195512950420384
Episode Reward: 3.0
Step 299 (1510994) @ Episode 261/2500, loss: 0.0991817414760589604
Episode Reward: 2.0
Step 483 (1511477) @ Episode 262/2500, loss

[2018-01-12 17:42:26,655] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000300.mp4


Step 202 (1524848) @ Episode 301/2500, loss: 0.0120208328589797025
Episode Reward: 1.0
Step 266 (1525114) @ Episode 302/2500, loss: 0.0179473366588354155
Episode Reward: 1.0
Step 277 (1525391) @ Episode 303/2500, loss: 0.0096497172489762338
Episode Reward: 1.0
Step 435 (1525826) @ Episode 304/2500, loss: 0.0285458136349916465
Episode Reward: 4.0
Step 218 (1526044) @ Episode 305/2500, loss: 0.0126711549237370495
Episode Reward: 1.0
Step 389 (1526433) @ Episode 306/2500, loss: 0.0057538449764251715
Episode Reward: 4.0
Step 328 (1526761) @ Episode 307/2500, loss: 0.0130428243428468746
Episode Reward: 3.0
Step 348 (1527109) @ Episode 308/2500, loss: 0.0086313709616661076
Episode Reward: 3.0
Step 305 (1527414) @ Episode 309/2500, loss: 0.0041708424687385566
Episode Reward: 3.0
Step 373 (1527787) @ Episode 310/2500, loss: 0.0367941185832023645
Episode Reward: 3.0
Step 250 (1528037) @ Episode 311/2500, loss: 0.0325993001461029053
Episode Reward: 1.0
Step 466 (1528503) @ Episode 312/2500, loss

[2018-01-12 18:14:22,632] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000350.mp4


Step 560 (1543056) @ Episode 351/2500, loss: 0.0712928995490074287
Episode Reward: 2.0
Step 267 (1543323) @ Episode 352/2500, loss: 0.0413957759737968444
Episode Reward: 2.0
Step 352 (1543675) @ Episode 353/2500, loss: 0.0099451690912246784
Episode Reward: 4.0
Step 336 (1544011) @ Episode 354/2500, loss: 0.0069322525523602966
Episode Reward: 3.0
Step 221 (1544232) @ Episode 355/2500, loss: 0.0187873542308807376
Episode Reward: 1.0
Step 385 (1544617) @ Episode 356/2500, loss: 0.0077747898176312454
Episode Reward: 3.0
Step 432 (1545049) @ Episode 357/2500, loss: 0.0084628053009510045
Episode Reward: 5.0
Step 440 (1545489) @ Episode 358/2500, loss: 0.0048957392573356635
Episode Reward: 3.0
Step 379 (1545868) @ Episode 359/2500, loss: 0.0112022999674081855
Episode Reward: 4.0
Step 451 (1546319) @ Episode 360/2500, loss: 0.1056980118155479414
Episode Reward: 4.0
Step 269 (1546588) @ Episode 361/2500, loss: 0.0098931975662708286
Episode Reward: 2.0
Step 363 (1546951) @ Episode 362/2500, loss

[2018-01-12 18:47:33,055] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000400.mp4


Step 743 (1561566) @ Episode 401/2500, loss: 0.0021206224337220195
Episode Reward: 8.0
Step 331 (1561897) @ Episode 402/2500, loss: 0.0143137313425540925
Episode Reward: 2.0
Step 288 (1562185) @ Episode 403/2500, loss: 0.0223927199840545655
Episode Reward: 2.0
Step 307 (1562492) @ Episode 404/2500, loss: 0.0036357925273478035
Episode Reward: 2.0
Step 349 (1562841) @ Episode 405/2500, loss: 0.0835951417684555514
Episode Reward: 1.0
Step 277 (1563118) @ Episode 406/2500, loss: 0.0361878983676433565
Episode Reward: 2.0
Step 385 (1563503) @ Episode 407/2500, loss: 0.0063402922824025155
Episode Reward: 3.0
Step 237 (1563740) @ Episode 408/2500, loss: 0.0102023798972368245
Episode Reward: 1.0
Step 342 (1564082) @ Episode 409/2500, loss: 0.0230842493474483516
Episode Reward: 3.0
Step 416 (1564498) @ Episode 410/2500, loss: 0.1070183664560318765
Episode Reward: 4.0
Step 232 (1564730) @ Episode 411/2500, loss: 0.0052954088896512985
Episode Reward: 1.0
Step 396 (1565126) @ Episode 412/2500, loss

[2018-01-12 19:22:40,709] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000450.mp4


Step 310 (1580145) @ Episode 451/2500, loss: 0.0087531963363289835
Episode Reward: 3.0
Step 336 (1580481) @ Episode 452/2500, loss: 0.0176361761987209324
Episode Reward: 3.0
Step 328 (1580809) @ Episode 453/2500, loss: 0.0173858702182769785
Episode Reward: 2.0
Step 312 (1581121) @ Episode 454/2500, loss: 0.0072699645534157754
Episode Reward: 2.0
Step 502 (1581623) @ Episode 455/2500, loss: 0.0078440466895699545
Episode Reward: 6.0
Step 278 (1581901) @ Episode 456/2500, loss: 0.0094484947621822365
Episode Reward: 2.0
Step 361 (1582262) @ Episode 457/2500, loss: 0.0151684926822781565
Episode Reward: 4.0
Step 354 (1582616) @ Episode 458/2500, loss: 0.0096692591905593877
Episode Reward: 3.0
Step 288 (1582904) @ Episode 459/2500, loss: 0.0111835338175296785
Episode Reward: 2.0
Step 360 (1583264) @ Episode 460/2500, loss: 0.2091653347015380965
Episode Reward: 3.0
Step 372 (1583636) @ Episode 461/2500, loss: 0.0143605563789606136
Episode Reward: 2.0
Step 537 (1584173) @ Episode 462/2500, loss

[2018-01-12 19:58:35,641] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000500.mp4


Step 291 (1599419) @ Episode 501/2500, loss: 0.0070132981054484846
Episode Reward: 1.0
Step 440 (1599859) @ Episode 502/2500, loss: 0.0191306360065937045
Episode Reward: 4.0
Step 370 (1600229) @ Episode 503/2500, loss: 0.0084934420883655557
Episode Reward: 3.0
Step 314 (1600543) @ Episode 504/2500, loss: 0.0149475485086441045
Episode Reward: 3.0
Step 503 (1601046) @ Episode 505/2500, loss: 0.0077398358844220645
Episode Reward: 5.0
Step 229 (1601275) @ Episode 506/2500, loss: 0.2310506552457809436
Episode Reward: 1.0
Step 337 (1601612) @ Episode 507/2500, loss: 0.0060793580487370495
Episode Reward: 3.0
Step 404 (1602016) @ Episode 508/2500, loss: 0.0399171784520149256
Episode Reward: 5.0
Step 397 (1602413) @ Episode 509/2500, loss: 0.0064097400754690175
Episode Reward: 4.0
Step 384 (1602797) @ Episode 510/2500, loss: 0.0123771652579307564
Episode Reward: 4.0
Step 398 (1603195) @ Episode 511/2500, loss: 0.1732791066169738855
Episode Reward: 3.0
Step 335 (1603530) @ Episode 512/2500, loss

[2018-01-12 20:36:21,255] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000550.mp4


Step 370 (1619522) @ Episode 551/2500, loss: 0.0068280166015028955
Episode Reward: 3.0
Step 475 (1619997) @ Episode 552/2500, loss: 0.0394440181553363835
Episode Reward: 3.0
Step 351 (1620348) @ Episode 553/2500, loss: 0.0049247900024056435
Episode Reward: 3.0
Step 483 (1620831) @ Episode 554/2500, loss: 0.0082861324772238735
Episode Reward: 5.0
Step 349 (1621180) @ Episode 555/2500, loss: 0.0175481513142585755
Episode Reward: 3.0
Step 628 (1621808) @ Episode 556/2500, loss: 0.0310445576906204224
Episode Reward: 7.0
Step 471 (1622279) @ Episode 557/2500, loss: 0.0088342595845460985
Episode Reward: 4.0
Step 321 (1622600) @ Episode 558/2500, loss: 0.0109408963471651085
Episode Reward: 2.0
Step 311 (1622911) @ Episode 559/2500, loss: 0.0157030392438173335
Episode Reward: 2.0
Step 405 (1623316) @ Episode 560/2500, loss: 0.0073946746997535235
Episode Reward: 2.0
Step 281 (1623597) @ Episode 561/2500, loss: 0.0102373603731393815
Episode Reward: 2.0
Step 416 (1624013) @ Episode 562/2500, loss

[2018-01-12 21:18:05,740] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000600.mp4


Step 484 (1640951) @ Episode 601/2500, loss: 0.0188989564776420665
Episode Reward: 8.0
Step 426 (1641377) @ Episode 602/2500, loss: 0.0049057458527386195
Episode Reward: 5.0
Step 649 (1642026) @ Episode 603/2500, loss: 0.0082448525354266176
Episode Reward: 6.0
Step 534 (1642560) @ Episode 604/2500, loss: 0.0112693635746836665
Episode Reward: 4.0
Step 331 (1642891) @ Episode 605/2500, loss: 0.0092056449502706534
Episode Reward: 3.0
Step 427 (1643318) @ Episode 606/2500, loss: 0.0102420002222061166
Episode Reward: 4.0
Step 521 (1643839) @ Episode 607/2500, loss: 0.0077132480219006546
Episode Reward: 7.0
Step 379 (1644218) @ Episode 608/2500, loss: 0.0075261127203702934
Episode Reward: 3.0
Step 448 (1644666) @ Episode 609/2500, loss: 0.0087871458381414414
Episode Reward: 4.0
Step 405 (1645071) @ Episode 610/2500, loss: 0.0094790784642100334
Episode Reward: 5.0
Step 545 (1645616) @ Episode 611/2500, loss: 0.0232382994145154955
Episode Reward: 6.0
Step 488 (1646104) @ Episode 612/2500, loss

[2018-01-12 21:56:26,432] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000650.mp4


Step 446 (1661905) @ Episode 651/2500, loss: 0.0141666308045387275
Episode Reward: 4.0
Step 403 (1662308) @ Episode 652/2500, loss: 0.0125000085681676865
Episode Reward: 3.0
Step 459 (1662767) @ Episode 653/2500, loss: 0.0236716270446777343
Episode Reward: 4.0
Step 430 (1663197) @ Episode 654/2500, loss: 0.0096370773389935525
Episode Reward: 3.0
Step 459 (1663656) @ Episode 655/2500, loss: 0.0136089958250522615
Episode Reward: 4.0
Step 561 (1664217) @ Episode 656/2500, loss: 0.0163661129772663125
Episode Reward: 5.0
Step 862 (1665079) @ Episode 657/2500, loss: 0.0160566009581089028
Episode Reward: 10.0
Step 251 (1665330) @ Episode 658/2500, loss: 0.0087602231651544575
Episode Reward: 1.0
Step 367 (1665697) @ Episode 659/2500, loss: 0.0098861241713166246
Episode Reward: 2.0
Step 489 (1666186) @ Episode 660/2500, loss: 0.0125851538032293325
Episode Reward: 4.0
Step 523 (1666709) @ Episode 661/2500, loss: 0.0104979295283555985
Episode Reward: 6.0
Step 346 (1667055) @ Episode 662/2500, los

[2018-01-12 22:41:30,957] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000700.mp4


Step 273 (1684560) @ Episode 701/2500, loss: 0.0150768235325813365
Episode Reward: 2.0
Step 291 (1684851) @ Episode 702/2500, loss: 0.0086164977401494985
Episode Reward: 2.0
Step 388 (1685239) @ Episode 703/2500, loss: 0.0030858335085213184
Episode Reward: 4.0
Step 307 (1685546) @ Episode 704/2500, loss: 0.0125677147880196575
Episode Reward: 2.0
Step 602 (1686148) @ Episode 705/2500, loss: 0.0136638432741165165
Episode Reward: 4.0
Step 467 (1686615) @ Episode 706/2500, loss: 0.0078895557671785355
Episode Reward: 8.0
Step 486 (1687101) @ Episode 707/2500, loss: 0.0047595528885722165
Episode Reward: 4.0
Step 480 (1687581) @ Episode 708/2500, loss: 0.0176772642880678185
Episode Reward: 4.0
Step 384 (1687965) @ Episode 709/2500, loss: 0.0062147872522473335
Episode Reward: 3.0
Step 367 (1688332) @ Episode 710/2500, loss: 0.2606035470962524435
Episode Reward: 4.0
Step 602 (1688934) @ Episode 711/2500, loss: 0.0072170156054198745
Episode Reward: 5.0
Step 292 (1689226) @ Episode 712/2500, loss

[2018-01-12 23:21:39,116] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000750.mp4


Step 487 (1705820) @ Episode 751/2500, loss: 0.0079486528411507665
Episode Reward: 6.0
Step 342 (1706162) @ Episode 752/2500, loss: 0.0073199104517698295
Episode Reward: 3.0
Step 487 (1706649) @ Episode 753/2500, loss: 0.0116798169910907755
Episode Reward: 5.0
Step 308 (1706957) @ Episode 754/2500, loss: 0.0069802328944206245
Episode Reward: 3.0
Step 420 (1707377) @ Episode 755/2500, loss: 0.0095345191657543185
Episode Reward: 3.0
Step 385 (1707762) @ Episode 756/2500, loss: 0.0121160019189119345
Episode Reward: 4.0
Step 304 (1708066) @ Episode 757/2500, loss: 0.0145576503127813344
Episode Reward: 3.0
Step 254 (1708320) @ Episode 758/2500, loss: 0.0108723193407058725
Episode Reward: 2.0
Step 596 (1708916) @ Episode 759/2500, loss: 0.0074675632640719415
Episode Reward: 6.0
Step 400 (1709316) @ Episode 760/2500, loss: 0.0119972387328743935
Episode Reward: 4.0
Step 665 (1709981) @ Episode 761/2500, loss: 0.0105310445651412015
Episode Reward: 7.0
Step 455 (1710436) @ Episode 762/2500, loss

[2018-01-13 00:00:08,935] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000800.mp4


Step 211 (1726115) @ Episode 801/2500, loss: 0.0479013323783874564
Episode Reward: 1.0
Step 388 (1726503) @ Episode 802/2500, loss: 0.0137938354164361955
Episode Reward: 3.0
Step 451 (1726954) @ Episode 803/2500, loss: 0.0135669726878404626
Episode Reward: 4.0
Step 373 (1727327) @ Episode 804/2500, loss: 0.0480395779013633775
Episode Reward: 3.0
Step 583 (1727910) @ Episode 805/2500, loss: 0.0115700922906398775
Episode Reward: 11.0
Step 415 (1728325) @ Episode 806/2500, loss: 0.0507163517177104954
Episode Reward: 5.0
Step 387 (1728712) @ Episode 807/2500, loss: 0.0211307965219020844
Episode Reward: 4.0
Step 427 (1729139) @ Episode 808/2500, loss: 0.0257041472941637045
Episode Reward: 4.0
Step 413 (1729552) @ Episode 809/2500, loss: 0.0114728538319468515
Episode Reward: 2.0
Step 406 (1729958) @ Episode 810/2500, loss: 0.0046446914784610273
Episode Reward: 4.0
Step 380 (1730338) @ Episode 811/2500, loss: 0.0069884806871414185
Episode Reward: 3.0
Step 451 (1730789) @ Episode 812/2500, los

[2018-01-13 00:39:42,940] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000850.mp4


Step 289 (1747055) @ Episode 851/2500, loss: 0.0132409911602735525
Episode Reward: 3.0
Step 449 (1747504) @ Episode 852/2500, loss: 0.0168018415570259195
Episode Reward: 4.0
Step 521 (1748025) @ Episode 853/2500, loss: 0.0132175404578447344
Episode Reward: 4.0
Step 275 (1748300) @ Episode 854/2500, loss: 0.0281932894140481956
Episode Reward: 2.0
Step 371 (1748671) @ Episode 855/2500, loss: 0.0188174638897180565
Episode Reward: 2.0
Step 354 (1749025) @ Episode 856/2500, loss: 0.0097053805366158495
Episode Reward: 3.0
Step 396 (1749421) @ Episode 857/2500, loss: 0.0257789473980665296
Episode Reward: 4.0
Step 351 (1749772) @ Episode 858/2500, loss: 0.0159740876406431225
Episode Reward: 3.0
Step 241 (1750013) @ Episode 859/2500, loss: 0.0314069651067256944
Episode Reward: 1.0
Step 379 (1750392) @ Episode 860/2500, loss: 0.0120507813990116123
Episode Reward: 4.0
Step 205 (1750597) @ Episode 861/2500, loss: 0.0722492784261703534
Episode Reward: 1.0
Step 407 (1751004) @ Episode 862/2500, loss

[2018-01-13 01:20:49,289] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000900.mp4


Step 288 (1768482) @ Episode 901/2500, loss: 0.0127215357497334485
Episode Reward: 3.0
Step 521 (1769003) @ Episode 902/2500, loss: 0.0071050380356609825
Episode Reward: 6.0
Step 691 (1769694) @ Episode 903/2500, loss: 0.0167134944349527366
Episode Reward: 7.0
Step 426 (1770120) @ Episode 904/2500, loss: 0.0104438550770282754
Episode Reward: 5.0
Step 391 (1770511) @ Episode 905/2500, loss: 0.0168935917317867285
Episode Reward: 4.0
Step 330 (1770841) @ Episode 906/2500, loss: 0.0324244275689125066
Episode Reward: 3.0
Step 485 (1771326) @ Episode 907/2500, loss: 0.0040041124448180263
Episode Reward: 5.0
Step 467 (1771793) @ Episode 908/2500, loss: 0.0056236577220261115
Episode Reward: 6.0
Step 497 (1772290) @ Episode 909/2500, loss: 0.2870000004768371615
Episode Reward: 4.0
Step 302 (1772592) @ Episode 910/2500, loss: 0.0101984599605202672
Episode Reward: 2.0
Step 266 (1772858) @ Episode 911/2500, loss: 0.0154464123770594695
Episode Reward: 1.0
Step 439 (1773297) @ Episode 912/2500, loss

[2018-01-13 02:04:08,973] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video000950.mp4


Step 448 (1790856) @ Episode 951/2500, loss: 0.0084615983068943026
Episode Reward: 3.0
Step 412 (1791268) @ Episode 952/2500, loss: 0.0126124816015362745
Episode Reward: 8.0
Step 529 (1791797) @ Episode 953/2500, loss: 0.0221200063824653636
Episode Reward: 6.0
Step 354 (1792151) @ Episode 954/2500, loss: 0.0085283787921071054
Episode Reward: 3.0
Step 506 (1792657) @ Episode 955/2500, loss: 0.0084952265024185187
Episode Reward: 5.0
Step 467 (1793124) @ Episode 956/2500, loss: 0.0356588289141655835
Episode Reward: 3.0
Step 396 (1793520) @ Episode 957/2500, loss: 0.0059344992041587835
Episode Reward: 3.0
Step 277 (1793797) @ Episode 958/2500, loss: 0.0097067849710583695
Episode Reward: 2.0
Step 474 (1794271) @ Episode 959/2500, loss: 0.0611265189945697855
Episode Reward: 5.0
Step 249 (1794520) @ Episode 960/2500, loss: 0.0077809565700590615
Episode Reward: 1.0
Step 737 (1795257) @ Episode 961/2500, loss: 0.0074505475349724297
Episode Reward: 8.0
Step 607 (1795864) @ Episode 962/2500, loss

[2018-01-13 02:45:40,425] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001000.mp4


Step 511 (1812543) @ Episode 1001/2500, loss: 0.0137297101318836215
Episode Reward: 5.0
Step 490 (1813033) @ Episode 1002/2500, loss: 0.0166382957249879846
Episode Reward: 4.0
Step 588 (1813621) @ Episode 1003/2500, loss: 0.0534296743571758345
Episode Reward: 5.0
Step 458 (1814079) @ Episode 1004/2500, loss: 0.0083469171077013025
Episode Reward: 4.0
Step 366 (1814445) @ Episode 1005/2500, loss: 0.0164407100528478624
Episode Reward: 4.0
Step 491 (1814936) @ Episode 1006/2500, loss: 0.0132138673216104554
Episode Reward: 5.0
Step 566 (1815502) @ Episode 1007/2500, loss: 0.0084052057936787615
Episode Reward: 3.0
Step 455 (1815957) @ Episode 1008/2500, loss: 0.0324347317218780525
Episode Reward: 5.0
Step 467 (1816424) @ Episode 1009/2500, loss: 0.0070532262325286865
Episode Reward: 5.0
Step 420 (1816844) @ Episode 1010/2500, loss: 0.0052824812009930617
Episode Reward: 2.0
Step 462 (1817306) @ Episode 1011/2500, loss: 0.0193665437400341036
Episode Reward: 4.0
Step 218 (1817524) @ Episode 101

[2018-01-13 03:26:14,567] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001050.mp4


Step 509 (1834123) @ Episode 1051/2500, loss: 0.2685453593730926565
Episode Reward: 6.0
Step 525 (1834648) @ Episode 1052/2500, loss: 0.0116945747286081316
Episode Reward: 4.0
Step 698 (1835346) @ Episode 1053/2500, loss: 0.0106176361441612245
Episode Reward: 6.0
Step 627 (1835973) @ Episode 1054/2500, loss: 0.0117264240980148325
Episode Reward: 9.0
Step 497 (1836470) @ Episode 1055/2500, loss: 0.0086977956816554075
Episode Reward: 5.0
Step 496 (1836966) @ Episode 1056/2500, loss: 0.0103915333747863775
Episode Reward: 5.0
Step 277 (1837243) @ Episode 1057/2500, loss: 0.0176645684987306665
Episode Reward: 1.0
Step 603 (1837846) @ Episode 1058/2500, loss: 0.0075585371814668185
Episode Reward: 6.0
Step 509 (1838355) @ Episode 1059/2500, loss: 0.0080699874088168145
Episode Reward: 6.0
Step 515 (1838870) @ Episode 1060/2500, loss: 0.0126265818253159525
Episode Reward: 4.0
Step 300 (1839170) @ Episode 1061/2500, loss: 0.0052575757727026944
Episode Reward: 2.0
Step 415 (1839585) @ Episode 106

[2018-01-13 04:07:35,008] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001100.mp4


Step 421 (1856179) @ Episode 1101/2500, loss: 0.0661471411585807825
Episode Reward: 4.0
Step 329 (1856508) @ Episode 1102/2500, loss: 0.0324151143431663545
Episode Reward: 3.0
Step 368 (1856876) @ Episode 1103/2500, loss: 0.0477697998285293695
Episode Reward: 3.0
Step 362 (1857238) @ Episode 1104/2500, loss: 0.0111701861023902984
Episode Reward: 4.0
Step 480 (1857718) @ Episode 1105/2500, loss: 0.0051494501531124115
Episode Reward: 6.0
Step 612 (1858330) @ Episode 1106/2500, loss: 0.0465282127261161875
Episode Reward: 7.0
Step 394 (1858724) @ Episode 1107/2500, loss: 0.0057194242253899576
Episode Reward: 3.0
Step 391 (1859115) @ Episode 1108/2500, loss: 0.0110898427665233615
Episode Reward: 3.0
Step 481 (1859596) @ Episode 1109/2500, loss: 0.0072591356001794345
Episode Reward: 5.0
Step 653 (1860249) @ Episode 1110/2500, loss: 0.0080109164118766785
Episode Reward: 5.0
Step 284 (1860533) @ Episode 1111/2500, loss: 0.0132607994601130496
Episode Reward: 1.0
Step 253 (1860786) @ Episode 111

[2018-01-13 04:49:47,234] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001150.mp4


Step 586 (1878627) @ Episode 1151/2500, loss: 0.0108698513358831428
Episode Reward: 6.0
Step 377 (1879004) @ Episode 1152/2500, loss: 0.0069035710766911515
Episode Reward: 3.0
Step 617 (1879621) @ Episode 1153/2500, loss: 0.0048620691522955894
Episode Reward: 7.0
Step 560 (1880181) @ Episode 1154/2500, loss: 0.0084491474553942685
Episode Reward: 5.0
Step 468 (1880649) @ Episode 1155/2500, loss: 0.0083530712872743643
Episode Reward: 2.0
Step 703 (1881352) @ Episode 1156/2500, loss: 0.0092098806053400045
Episode Reward: 6.0
Step 285 (1881637) @ Episode 1157/2500, loss: 0.0125695094466209415
Episode Reward: 2.0
Step 286 (1881923) @ Episode 1158/2500, loss: 0.0306557938456535345
Episode Reward: 2.0
Step 440 (1882363) @ Episode 1159/2500, loss: 0.0133660361170768745
Episode Reward: 4.0
Step 278 (1882641) @ Episode 1160/2500, loss: 0.0392016395926475535
Episode Reward: 2.0
Step 267 (1882908) @ Episode 1161/2500, loss: 0.0074690319597721145
Episode Reward: 1.0
Step 311 (1883219) @ Episode 116

[2018-01-13 05:35:00,293] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001200.mp4


Step 435 (1901962) @ Episode 1201/2500, loss: 0.0093409270048141484
Episode Reward: 5.0
Step 351 (1902313) @ Episode 1202/2500, loss: 0.0042794998735189445
Episode Reward: 4.0
Step 608 (1902921) @ Episode 1203/2500, loss: 0.0683597698807716495
Episode Reward: 6.0
Step 472 (1903393) @ Episode 1204/2500, loss: 0.0073599601164460185
Episode Reward: 5.0
Step 269 (1903662) @ Episode 1205/2500, loss: 0.0207064319401979456
Episode Reward: 2.0
Step 599 (1904261) @ Episode 1206/2500, loss: 0.0417982973158359545
Episode Reward: 6.0
Step 523 (1904784) @ Episode 1207/2500, loss: 0.0080466819927096376
Episode Reward: 4.0
Step 452 (1905236) @ Episode 1208/2500, loss: 0.0057834740728139886
Episode Reward: 4.0
Step 588 (1905824) @ Episode 1209/2500, loss: 0.0126179233193397525
Episode Reward: 5.0
Step 325 (1906149) @ Episode 1210/2500, loss: 0.0401010923087596994
Episode Reward: 3.0
Step 448 (1906597) @ Episode 1211/2500, loss: 0.0109738726168870936
Episode Reward: 5.0
Step 321 (1906918) @ Episode 121

[2018-01-13 06:17:17,211] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001250.mp4


Step 472 (1923738) @ Episode 1251/2500, loss: 0.0030960266012698415
Episode Reward: 6.0
Step 372 (1924110) @ Episode 1252/2500, loss: 0.0094426274299621585
Episode Reward: 4.0
Step 336 (1924446) @ Episode 1253/2500, loss: 0.0090099684894084935
Episode Reward: 3.0
Step 426 (1924872) @ Episode 1254/2500, loss: 0.0081196362152695665
Episode Reward: 5.0
Step 307 (1925179) @ Episode 1255/2500, loss: 0.0163475498557090766
Episode Reward: 3.0
Step 282 (1925461) @ Episode 1256/2500, loss: 0.0085385255515575416
Episode Reward: 2.0
Step 541 (1926002) @ Episode 1257/2500, loss: 0.0138831334188580515
Episode Reward: 7.0
Step 580 (1926582) @ Episode 1258/2500, loss: 0.0134306102991104135
Episode Reward: 6.0
Step 285 (1926867) @ Episode 1259/2500, loss: 0.0039305575191974643
Episode Reward: 2.0
Step 606 (1927473) @ Episode 1260/2500, loss: 0.0079089840874075895
Episode Reward: 8.0
Step 403 (1927876) @ Episode 1261/2500, loss: 0.0181829147040843965
Episode Reward: 5.0
Step 449 (1928325) @ Episode 126

[2018-01-13 06:55:36,287] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001300.mp4


Step 438 (1943569) @ Episode 1301/2500, loss: 0.0104446820914745332
Episode Reward: 5.0
Step 415 (1943984) @ Episode 1302/2500, loss: 0.0139489807188510925
Episode Reward: 5.0
Step 389 (1944373) @ Episode 1303/2500, loss: 0.0154719930142164235
Episode Reward: 5.0
Step 545 (1944918) @ Episode 1304/2500, loss: 0.0074298344552516945
Episode Reward: 7.0
Step 445 (1945363) @ Episode 1305/2500, loss: 0.0065840105526149274
Episode Reward: 5.0
Step 531 (1945894) @ Episode 1306/2500, loss: 0.0050734551623463635
Episode Reward: 7.0
Step 488 (1946382) @ Episode 1307/2500, loss: 0.0068763028830289845
Episode Reward: 6.0
Step 329 (1946711) @ Episode 1308/2500, loss: 0.0071266959421336655
Episode Reward: 2.0
Step 552 (1947263) @ Episode 1309/2500, loss: 0.0112655982375144965
Episode Reward: 4.0
Step 271 (1947534) @ Episode 1310/2500, loss: 0.0437657907605171276
Episode Reward: 2.0
Step 494 (1948028) @ Episode 1311/2500, loss: 0.0057212277315557625
Episode Reward: 7.0
Step 459 (1948487) @ Episode 131

[2018-01-13 07:35:21,395] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001350.mp4


Step 336 (1964234) @ Episode 1351/2500, loss: 0.0125368591398000723
Episode Reward: 3.0
Step 483 (1964717) @ Episode 1352/2500, loss: 0.0062138927169144156
Episode Reward: 6.0
Step 508 (1965225) @ Episode 1353/2500, loss: 0.0125611443072557457
Episode Reward: 6.0
Step 481 (1965706) @ Episode 1354/2500, loss: 0.0103078829124569934
Episode Reward: 6.0
Step 474 (1966180) @ Episode 1355/2500, loss: 0.0094542102888226515
Episode Reward: 6.0
Step 504 (1966684) @ Episode 1356/2500, loss: 0.0182497613131999974
Episode Reward: 7.0
Step 445 (1967129) @ Episode 1357/2500, loss: 0.0057582454755902296
Episode Reward: 5.0
Step 436 (1967565) @ Episode 1358/2500, loss: 0.0086115859448909765
Episode Reward: 5.0
Step 516 (1968081) @ Episode 1359/2500, loss: 0.0141129232943058015
Episode Reward: 6.0
Step 655 (1968736) @ Episode 1360/2500, loss: 0.0063836015760898595
Episode Reward: 9.0
Step 303 (1969039) @ Episode 1361/2500, loss: 0.0058889822103083135
Episode Reward: 3.0
Step 284 (1969323) @ Episode 136

[2018-01-13 08:14:47,754] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001400.mp4


Step 333 (1984751) @ Episode 1401/2500, loss: 0.1279128938913345346
Episode Reward: 4.0
Step 614 (1985365) @ Episode 1402/2500, loss: 0.0207343660295009635
Episode Reward: 7.0
Step 329 (1985694) @ Episode 1403/2500, loss: 0.0089115342125296685
Episode Reward: 3.0
Step 461 (1986155) @ Episode 1404/2500, loss: 0.0206845551729202274
Episode Reward: 6.0
Step 422 (1986577) @ Episode 1405/2500, loss: 0.0089012458920478824
Episode Reward: 4.0
Step 374 (1986951) @ Episode 1406/2500, loss: 0.0221487209200859075
Episode Reward: 4.0
Step 363 (1987314) @ Episode 1407/2500, loss: 0.0140751488506793985
Episode Reward: 3.0
Step 158 (1987472) @ Episode 1408/2500, loss: 0.0183463096618652345
Episode Reward: 0.0
Step 428 (1987900) @ Episode 1409/2500, loss: 0.0070365760475397115
Episode Reward: 5.0
Step 376 (1988276) @ Episode 1410/2500, loss: 0.0069151581265032295
Episode Reward: 4.0
Step 445 (1988721) @ Episode 1411/2500, loss: 0.0073304381221532826
Episode Reward: 4.0
Step 243 (1988964) @ Episode 141

[2018-01-13 08:53:39,562] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001450.mp4


Step 474 (2005349) @ Episode 1451/2500, loss: 0.1967827826738357584
Episode Reward: 7.0
Step 357 (2005706) @ Episode 1452/2500, loss: 0.0025430205278098583
Episode Reward: 4.0
Step 606 (2006312) @ Episode 1453/2500, loss: 0.0136544592678546965
Episode Reward: 3.0
Step 703 (2007015) @ Episode 1454/2500, loss: 0.0071528386324644094
Episode Reward: 8.0
Step 489 (2007504) @ Episode 1455/2500, loss: 0.0126610789448022845
Episode Reward: 6.0
Step 441 (2007945) @ Episode 1456/2500, loss: 0.0096893887966871265
Episode Reward: 6.0
Step 597 (2008542) @ Episode 1457/2500, loss: 0.0081329401582479485
Episode Reward: 9.0
Step 617 (2009159) @ Episode 1458/2500, loss: 0.0087370788678526883
Episode Reward: 8.0
Step 300 (2009459) @ Episode 1459/2500, loss: 0.0049390513449907354
Episode Reward: 3.0
Step 564 (2010023) @ Episode 1460/2500, loss: 0.0052282353863120085
Episode Reward: 5.0
Step 269 (2010292) @ Episode 1461/2500, loss: 0.0351530089974403443
Episode Reward: 2.0
Step 228 (2010520) @ Episode 146

[2018-01-13 09:37:13,602] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001500.mp4


Step 438 (2028053) @ Episode 1501/2500, loss: 0.0096332821995019914
Episode Reward: 5.0
Step 443 (2028496) @ Episode 1502/2500, loss: 0.0053360732272267343
Episode Reward: 6.0
Step 458 (2028954) @ Episode 1503/2500, loss: 0.0047627948224544525
Episode Reward: 6.0
Step 448 (2029402) @ Episode 1504/2500, loss: 0.0054695331491529948
Episode Reward: 5.0
Step 594 (2029996) @ Episode 1505/2500, loss: 0.0156287252902984625
Episode Reward: 12.0
Step 249 (2030245) @ Episode 1506/2500, loss: 0.0040607936680316925
Episode Reward: 2.0
Step 444 (2030689) @ Episode 1507/2500, loss: 0.0125881507992744457
Episode Reward: 6.0
Step 589 (2031278) @ Episode 1508/2500, loss: 0.0122718941420316785
Episode Reward: 7.0
Step 430 (2031708) @ Episode 1509/2500, loss: 0.0050064520910382274
Episode Reward: 4.0
Step 401 (2032109) @ Episode 1510/2500, loss: 0.0074809072539210322
Episode Reward: 4.0
Step 269 (2032378) @ Episode 1511/2500, loss: 0.0119973132386803636
Episode Reward: 2.0
Step 290 (2032668) @ Episode 15

[2018-01-13 10:18:58,980] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001550.mp4


Step 389 (2050167) @ Episode 1551/2500, loss: 0.0061094155535101894
Episode Reward: 4.0
Step 347 (2050514) @ Episode 1552/2500, loss: 0.0522419326007366275
Episode Reward: 3.0
Step 356 (2050870) @ Episode 1553/2500, loss: 0.0048052528873085976
Episode Reward: 3.0
Step 376 (2051246) @ Episode 1554/2500, loss: 0.0140055073425173765
Episode Reward: 4.0
Step 394 (2051640) @ Episode 1555/2500, loss: 0.0074644973501563075
Episode Reward: 4.0
Step 415 (2052055) @ Episode 1556/2500, loss: 0.0037995188031345606
Episode Reward: 5.0
Step 671 (2052726) @ Episode 1557/2500, loss: 0.0126656387001276025
Episode Reward: 9.0
Step 649 (2053375) @ Episode 1558/2500, loss: 0.0144689492881298075
Episode Reward: 7.0
Step 435 (2053810) @ Episode 1559/2500, loss: 0.0171862002462148674
Episode Reward: 5.0
Step 425 (2054235) @ Episode 1560/2500, loss: 0.0060940971598029145
Episode Reward: 4.0
Step 323 (2054558) @ Episode 1561/2500, loss: 0.0194088444113731414
Episode Reward: 3.0
Step 533 (2055091) @ Episode 156

[2018-01-13 10:59:24,447] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001600.mp4


Step 332 (2071036) @ Episode 1601/2500, loss: 0.0037530905101448298
Episode Reward: 3.0
Step 587 (2071623) @ Episode 1602/2500, loss: 0.0056184465065598495
Episode Reward: 8.0
Step 465 (2072088) @ Episode 1603/2500, loss: 0.0816657692193985025
Episode Reward: 5.0
Step 708 (2072796) @ Episode 1604/2500, loss: 0.0063740410842001445
Episode Reward: 14.0
Step 332 (2073128) @ Episode 1605/2500, loss: 0.0073426575399935246
Episode Reward: 3.0
Step 404 (2073532) @ Episode 1606/2500, loss: 0.0171864405274391175
Episode Reward: 5.0
Step 411 (2073943) @ Episode 1607/2500, loss: 0.0102032292634248735
Episode Reward: 4.0
Step 490 (2074433) @ Episode 1608/2500, loss: 0.2726681530475616514
Episode Reward: 7.0
Step 536 (2074969) @ Episode 1609/2500, loss: 0.0136036826297640865
Episode Reward: 8.0
Step 498 (2075467) @ Episode 1610/2500, loss: 0.0086755212396383295
Episode Reward: 5.0
Step 271 (2075738) @ Episode 1611/2500, loss: 0.0752328038215637237
Episode Reward: 2.0
Step 381 (2076119) @ Episode 16

[2018-01-13 11:42:38,155] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001650.mp4


Step 361 (2093498) @ Episode 1651/2500, loss: 0.0134387342259287834
Episode Reward: 4.0
Step 396 (2093894) @ Episode 1652/2500, loss: 0.0591756477952003587
Episode Reward: 4.0
Step 417 (2094311) @ Episode 1653/2500, loss: 0.0086455317214131365
Episode Reward: 4.0
Step 467 (2094778) @ Episode 1654/2500, loss: 0.0147455008700491995
Episode Reward: 6.0
Step 604 (2095382) @ Episode 1655/2500, loss: 0.0069547742605209355
Episode Reward: 5.0
Step 499 (2095881) @ Episode 1656/2500, loss: 0.0092813856899738315
Episode Reward: 3.0
Step 326 (2096207) @ Episode 1657/2500, loss: 0.0080213975161314015
Episode Reward: 1.0
Step 391 (2096598) @ Episode 1658/2500, loss: 0.0129167232662439357
Episode Reward: 4.0
Step 242 (2096840) @ Episode 1659/2500, loss: 0.0065993443131446845
Episode Reward: 1.0
Step 504 (2097344) @ Episode 1660/2500, loss: 0.0114176161587238317
Episode Reward: 6.0
Step 343 (2097687) @ Episode 1661/2500, loss: 0.0061218515038490295
Episode Reward: 3.0
Step 677 (2098364) @ Episode 166

[2018-01-13 12:22:08,831] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001700.mp4


Step 328 (2114292) @ Episode 1701/2500, loss: 0.0092379916459321985
Episode Reward: 3.0
Step 353 (2114645) @ Episode 1702/2500, loss: 0.0069830538704991345
Episode Reward: 3.0
Step 519 (2115164) @ Episode 1703/2500, loss: 0.0041491077281534676
Episode Reward: 6.0
Step 429 (2115593) @ Episode 1704/2500, loss: 0.0113976262509822854
Episode Reward: 5.0
Step 369 (2115962) @ Episode 1705/2500, loss: 0.0074056424200534827
Episode Reward: 4.0
Step 416 (2116378) @ Episode 1706/2500, loss: 0.0088967401534318923
Episode Reward: 5.0
Step 400 (2116778) @ Episode 1707/2500, loss: 0.0056168586015701295
Episode Reward: 4.0
Step 381 (2117159) @ Episode 1708/2500, loss: 0.0130192711949348458
Episode Reward: 4.0
Step 660 (2117819) @ Episode 1709/2500, loss: 0.0049012685194611556
Episode Reward: 8.0
Step 321 (2118140) @ Episode 1710/2500, loss: 0.0039945989847183236
Episode Reward: 1.0
Step 492 (2118632) @ Episode 1711/2500, loss: 0.0100264307111501773
Episode Reward: 6.0
Step 604 (2119236) @ Episode 171

[2018-01-13 13:00:09,815] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001750.mp4


Step 351 (2135100) @ Episode 1751/2500, loss: 0.0061526228673756125
Episode Reward: 3.0
Step 441 (2135541) @ Episode 1752/2500, loss: 0.0156829189509153378
Episode Reward: 5.0
Step 542 (2136083) @ Episode 1753/2500, loss: 0.0084838056936860085
Episode Reward: 4.0
Step 352 (2136435) @ Episode 1754/2500, loss: 0.0062515400350093845
Episode Reward: 3.0
Step 343 (2136778) @ Episode 1755/2500, loss: 0.0086747938767075547
Episode Reward: 3.0
Step 415 (2137193) @ Episode 1756/2500, loss: 0.0058142622001469135
Episode Reward: 4.0
Step 440 (2137633) @ Episode 1757/2500, loss: 0.0127521958202123647
Episode Reward: 5.0
Step 375 (2138008) @ Episode 1758/2500, loss: 0.0073568006046116355
Episode Reward: 4.0
Step 452 (2138460) @ Episode 1759/2500, loss: 0.0157028883695602475
Episode Reward: 2.0
Step 342 (2138802) @ Episode 1760/2500, loss: 0.0078231152147054675
Episode Reward: 3.0
Step 280 (2139082) @ Episode 1761/2500, loss: 0.0062458752654492855
Episode Reward: 1.0
Step 432 (2139514) @ Episode 176

[2018-01-13 13:44:04,349] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001800.mp4


Step 338 (2158421) @ Episode 1801/2500, loss: 0.0216167960315942765
Episode Reward: 2.0
Step 387 (2158808) @ Episode 1802/2500, loss: 0.0061243614181876185
Episode Reward: 4.0
Step 424 (2159232) @ Episode 1803/2500, loss: 0.0087853930890560154
Episode Reward: 2.0
Step 395 (2159627) @ Episode 1804/2500, loss: 0.0251735113561153427
Episode Reward: 3.0
Step 265 (2159892) @ Episode 1805/2500, loss: 0.0118779214099049575
Episode Reward: 1.0
Step 393 (2160285) @ Episode 1806/2500, loss: 0.0067676100879907616
Episode Reward: 3.0
Step 384 (2160669) @ Episode 1807/2500, loss: 0.0063040843233466155
Episode Reward: 4.0
Step 371 (2161040) @ Episode 1808/2500, loss: 0.0147111872211098675
Episode Reward: 2.0
Step 489 (2161529) @ Episode 1809/2500, loss: 0.0146638043224811555
Episode Reward: 5.0
Step 576 (2162105) @ Episode 1810/2500, loss: 0.0030647078529000282
Episode Reward: 5.0
Step 878 (2162983) @ Episode 1811/2500, loss: 0.0312513411045074465
Episode Reward: 10.0
Step 539 (2163522) @ Episode 18

[2018-01-13 14:27:22,091] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001850.mp4


Step 478 (2181620) @ Episode 1851/2500, loss: 0.0091157369315624243
Episode Reward: 4.0
Step 295 (2181915) @ Episode 1852/2500, loss: 0.0064338976517319685
Episode Reward: 2.0
Step 424 (2182339) @ Episode 1853/2500, loss: 0.0091622881591320045
Episode Reward: 4.0
Step 393 (2182732) @ Episode 1854/2500, loss: 0.0212063305079936985
Episode Reward: 4.0
Step 249 (2182981) @ Episode 1855/2500, loss: 0.0113874450325965886
Episode Reward: 1.0
Step 482 (2183463) @ Episode 1856/2500, loss: 0.0127577465027570725
Episode Reward: 6.0
Step 645 (2184108) @ Episode 1857/2500, loss: 0.0124128283932805065
Episode Reward: 7.0
Step 394 (2184502) @ Episode 1858/2500, loss: 0.0049145962111651955
Episode Reward: 4.0
Step 375 (2184877) @ Episode 1859/2500, loss: 0.0062196892686188225
Episode Reward: 3.0
Step 536 (2185413) @ Episode 1860/2500, loss: 0.0068000410683453085
Episode Reward: 4.0
Step 385 (2185798) @ Episode 1861/2500, loss: 0.0086654378101229675
Episode Reward: 4.0
Step 479 (2186277) @ Episode 186

[2018-01-13 15:10:59,317] Starting new video recorder writing to /playpen2/chaonan99/course/David_RL/code/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.0.10190.video001900.mp4


Step 449 (2204420) @ Episode 1901/2500, loss: 0.0085255876183509837
Episode Reward: 5.0
Step 553 (2204973) @ Episode 1902/2500, loss: 0.0099837556481361395
Episode Reward: 7.0
Step 450 (2205423) @ Episode 1903/2500, loss: 0.0140505228191614155
Episode Reward: 5.0
Step 304 (2205727) @ Episode 1904/2500, loss: 0.0083643347024917655
Episode Reward: 2.0
Step 424 (2206151) @ Episode 1905/2500, loss: 0.1376144289970398576
Episode Reward: 6.0
Step 333 (2206484) @ Episode 1906/2500, loss: 0.0225930884480476385
Episode Reward: 3.0
Step 495 (2206979) @ Episode 1907/2500, loss: 0.0104683227837085725
Episode Reward: 5.0
Step 248 (2207227) @ Episode 1908/2500, loss: 0.0095704067498445516
Episode Reward: 1.0
Step 282 (2207509) @ Episode 1909/2500, loss: 0.0036712510045617825
Episode Reward: 3.0
Step 215 (2207724) @ Episode 1910/2500, loss: 0.0223136395215988165
Episode Reward: 1.0
Step 266 (2207990) @ Episode 1911/2500, loss: 0.0154102873057127755
Episode Reward: 2.0
Step 406 (2208396) @ Episode 191

KeyboardInterrupt: 