In [1]:
!pip install gymnasium[atari]
!pip install gymnasium[accept-rom-license]
!pip install moviepy
!pip install pysdl2
!pip install pyvirtualdisplay



In [2]:
!pip install opencv-python



In [3]:
import gymnasium as gym
import random 
import torch
import numpy as np
from torch import nn
from torch.distributions import Bernoulli

from gymnasium import wrappers
import matplotlib.pyplot as plt
from IPython import display
from tqdm.notebook import tqdm

env = gym.make("ALE/Pong-v5", render_mode="rgb_array")
# record the game as as an mp4 file
env = wrappers.RecordVideo(env, 'video/pong-dqn', episode_trigger=lambda n: n%200==0)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [4]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
from collections import deque
import cv2
cv2.ocl.setUseOpenCL(False)


class LazyFrames(object):
    def __init__(self, frames):
        """This object ensures that common frames between the observations are only stored once.
        It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
        buffers.
        This object should only be converted to numpy array before being passed to the model.
        You'd not believe how complex the previous solution was."""
        self._frames = frames
        self._out = None

    def _force(self):
        if self._out is None:
            self._out = np.concatenate(self._frames, axis=2)
            self._frames = None
        return self._out

    def __array__(self, dtype=None):
        out = self._force()
        if dtype is not None:
            out = out.astype(dtype)
        return out

    def __len__(self):
        return len(self._force())

    def __getitem__(self, i):
        return self._force()[i]

class FrameStack(gym.Wrapper):
    def __init__(self, env, k):
        """Stack k last frames.
        Returns lazy array, which is much more memory efficient.
        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)

    def reset(self, **kwargs):
        ob, info = self.env.reset(**kwargs)
        for _ in range(self.k):
            self.frames.append(ob)
        return self._get_ob(), info

    def step(self, action):
        ob, reward, done, truncated, info = self.env.step(action)
        self.frames.append(ob)
        return self._get_ob(), reward, done, truncated, info

    def _get_ob(self):
        assert len(self.frames) == self.k
        return LazyFrames(list(self.frames))


class WarpFrame(gym.ObservationWrapper):
    def __init__(self, env):
        """Warp frames to 84x84 as done in the Nature paper and later work."""
        gym.ObservationWrapper.__init__(self, env)
        self.width = 84
        self.height = 84
        self.observation_space = gym.spaces.Box(low=0, high=255,
            shape=(self.height, self.width, 1), dtype=np.uint8)

    def observation(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (self.width, self.height), interpolation=cv2.INTER_AREA)
        return frame[:, :, None]

class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env=None):
        """Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        """
        super(EpisodicLifeEnv, self).__init__(env)
        self.lives = 0
        self.was_real_done = True
        self.was_real_reset = False

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert somtimes we stay in lives == 0 condtion for a few frames
            # so its important to keep lives > 0, so that we only reset once
            # the environment advertises done.
            done = True
        self.lives = lives
        return obs, reward, done, truncated, info

    def reset(self, **kwargs):
        """Reset only when lives are exhausted.
        This way all states are still reachable even though lives are episodic,
        and the learner need not know about any of this behind-the-scenes.
        """
        if self.was_real_done:
            obs, info = self.env.reset(**kwargs)
            self.was_real_reset = True
        else:
            # no-op step to advance from terminal/lost life state
            obs, _, _, _, info = self.env.step(0)
            self.was_real_reset = False
        self.lives = self.env.unwrapped.ale.lives()
        return obs, info
    
class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env=None, skip=4):
        """Return only every `skip`-th frame"""
        super(MaxAndSkipEnv, self).__init__(env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = deque(maxlen=2)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        done = None
        for _ in range(self._skip):
            obs, reward, done, truncated, info = self.env.step(action)
            self._obs_buffer.append(obs)
            total_reward += reward
            if done:
                break

        max_frame = np.max(np.stack(self._obs_buffer), axis=0)

        return max_frame, total_reward, done, truncated, info

    def reset(self, **kwargs):
        """Clear past frame buffer and init. to first obs. from inner env."""
        self._obs_buffer.clear()
        obs, info = self.env.reset(**kwargs)
        self._obs_buffer.append(obs)
        return obs, info

    
class NoopResetEnv(gym.Wrapper):
    def __init__(self, env=None, noop_max=30):
        """Sample initial states by taking random number of no-ops on reset.
        No-op is assumed to be action 0.
        """
        super(NoopResetEnv, self).__init__(env)
        self.noop_max = noop_max
        self.override_num_noops = None
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def step(self, action):
        return self.env.step(action)

    def reset(self, **kwargs):
        """ Do no-op action for a number of steps in [1, noop_max]."""
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = np.random.randint(1, self.noop_max + 1)
        assert noops > 0
        obs = None
        info = None
        for _ in range(noops):
            obs, _, done, _, info = self.env.step(0)
            if done:
                obs, info = self.env.reset(**kwargs)
        return obs, info

In [7]:
def make_env(env, stack_frames=True, episodic_life=True):
    if episodic_life:
        env = EpisodicLifeEnv(env)

    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    #if 'FIRE' in env.unwrapped.get_action_meanings():
    #    env = FireResetEnv(env)

    env = WarpFrame(env)
    if stack_frames:
        env = FrameStack(env, 4)
  
    return env


In [8]:
env = make_env(env)

In [9]:
def render(env):
    img.set_data(env.render())
    display.display(plt.gcf())
    display.clear_output(wait=True)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

class DQN(nn.Module):
    def __init__(self, in_channels=4, n_actions=14):
        """
        Initialize Deep Q Network
        Args:
            in_channels (int): number of input channels
            n_actions (int): number of outputs
        """
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)
        # self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        # self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        # self.bn3 = nn.BatchNorm2d(64)
        self.fc4 = nn.Linear(7 * 7 * 64, 512)
        self.head = nn.Linear(512, n_actions)
        
    def forward(self, x):
        x = x.float() / 255
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc4(x.view(x.size(0), -1)))
        return self.head(x)

In [11]:
from collections import namedtuple
import random

Transition = namedtuple('Transition', 
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0
        
    def push(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity
        
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)

In [13]:
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class DQNPolicy:
    
    BATCH_SIZE = 16636
    GAMMA = 0.99
    EPS_START = 1
    EPS_END = 0.02
    EPS_DECAY = 1000000
    TARGET_UPDATE = 1000
    RENDER = False
    INITIAL_MEMORY = 10000
    MEMORY_SIZE = 10 * INITIAL_MEMORY
    
    def __init__(self, lr=1e-4):
        self.n_actions = 4
        self.steps_done = 0
        self.mean_reward = None
        self.policy_net = DQN(n_actions=self.n_actions).to(device)
        self.target_net = DQN(n_actions=self.n_actions).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())

        self.memory = ReplayMemory(DQNPolicy.MEMORY_SIZE)
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
        
    def get_state(self, obs):
        state = np.array(obs)
        state = state.transpose((2, 0, 1))
        state = torch.from_numpy(state)
        return state.unsqueeze(0)

    def __call__(self, observation):
        state = self.get_state(observation)
        sample = random.random()
        eps_threshold = DQNPolicy.EPS_END + (DQNPolicy.EPS_START - DQNPolicy.EPS_END)* math.exp(-1. * self.steps_done / DQNPolicy.EPS_DECAY)
        self.steps_done += 1
        if sample > eps_threshold:
            with torch.no_grad():
                return self.policy_net(state.to(device)).max(1)[1].view(1,1)
        else:
            return torch.tensor([[random.randrange(self.n_actions)]], device=device, dtype=torch.long)
        
    def init_game(self, observation):
        self.state = self.get_state(observation)
        self.total_reward = 0.0
    
    def update(self, obs, reward, terminated, truncated, info, pbar):
        self.total_reward += reward
        if not terminated:
            self.next_state = self.get_state(obs)
        else:
            if self.mean_reward is None:
                self.mean_reward = self.total_reward
            else:
                self.mean_reward = self.mean_reward * 0.95 + self.total_reward * (1.0 - 0.95)
            pbar.set_postfix({'total_reward': self.total_reward, 'mean_reward': self.mean_reward, 'steps': self.steps_done})
            self.next_state = None
            
        reward = torch.tensor([reward], device=device)
        self.memory.push(self.state, action.to('cpu'), self.next_state, reward.to('cpu'))
        self.state = self.next_state

        if terminated and self.steps_done > DQNPolicy.INITIAL_MEMORY:
            self.optimize_model()

        if self.steps_done % DQNPolicy.TARGET_UPDATE == 0:
            self.target_net.load_state_dict(self.policy_net.state_dict())      
            
        if self.steps_done % 100_000 == 0:
            self.save(f'dqn_{self.steps_done}.pt')

    def optimize_model(self):
        if len(self.memory) < DQNPolicy.BATCH_SIZE:
            return
        transitions = self.memory.sample(DQNPolicy.BATCH_SIZE)
        """
        zip(*transitions) unzips the transitions into
        Transition(*) creates new named tuple
        batch.state - tuple of all the states (each state is a tensor)
        batch.next_state - tuple of all the next states (each state is a tensor)
        batch.reward - tuple of all the rewards (each reward is a float)
        batch.action - tuple of all the actions (each action is an int)    
        """
        batch = Transition(*zip(*transitions))

        actions = tuple((map(lambda a: torch.tensor([[a]], device=device), batch.action))) 
        rewards = tuple((map(lambda r: torch.tensor([r], device=device), batch.reward))) 

        non_final_mask = torch.tensor(
            tuple(map(lambda s: s is not None, batch.next_state)),
            device=device, dtype=torch.bool)

        non_final_next_states = torch.cat([s for s in batch.next_state
                                           if s is not None]).to(device)


        state_batch = torch.cat(batch.state).to(device)
        action_batch = torch.cat(actions)
        reward_batch = torch.cat(rewards)

        state_action_values = self.policy_net(state_batch).gather(1, action_batch)

        next_state_values = torch.zeros(DQNPolicy.BATCH_SIZE, device=device)
        next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
        expected_state_action_values = (next_state_values * DQNPolicy.GAMMA) + reward_batch

        loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

        self.optimizer.zero_grad()
        loss.backward()
        for param in self.policy_net.parameters():
            param.grad.data.clamp_(-1, 1)
        self.optimizer.step()
        

        
    def load(self, PATH):
        checkpoint = torch.load(PATH)
        self.policy_net.load_state_dict(checkpoint['model_state_dict'])
        self.target_net.load_state_dict(self.policy_net.state_dict()) 
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.steps_done = checkpoint['steps_done']
        if "mean_reward" in checkpoint:
            self.mean_reward = checkpoint['mean_reward']
        
    def save(self, PATH):
        torch.save({
                    'steps_done': self.steps_done,
                    'model_state_dict': self.policy_net.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'mean_reward': self.mean_reward}, PATH)

In [14]:
policy = DQNPolicy()
#policy.load("model_8500000.pt")

In [None]:
observation, info = env.reset()
policy.init_game(observation)

plt.ion()
plt.axis('off')
img = plt.imshow(env.render())

with tqdm(total=10000) as pbar:
    while True:
        try:
            action = policy(observation)
            observation, reward, terminated, truncated, info = env.step(action)
            #render(env)
            policy.update(observation, reward, terminated, truncated, info, pbar)

            if terminated or truncated:
                pbar.update()
                observation, info = env.reset()
                policy.init_game(observation)
                
        except KeyboardInterrupt:
            break
env.close()

  0%|          | 0/10000 [00:00<?, ?it/s]

Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  39%|███▉      | 297/766 [00:00<00:00, 2958.08it/s, now=None][A
t:  87%|████████▋ | 665/766 [00:00<00:00, 3380.22it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-400.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 253/766 [00:00<00:00, 2525.68it/s, now=None][A
t:  78%|███████▊  | 601/766 [00:00<00:00, 3082.61it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-600.mp4




t:   0%|          | 0/923 [00:00<?, ?it/s, now=None][A
t:  24%|██▍       | 220/923 [00:00<00:00, 2194.51it/s, now=None][A
t:  59%|█████▉    | 546/923 [00:00<00:00, 2817.45it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-800.mp4




t:   0%|          | 0/845 [00:00<?, ?it/s, now=None][A
t:  27%|██▋       | 226/845 [00:00<00:00, 2255.05it/s, now=None][A
t:  72%|███████▏  | 611/845 [00:00<00:00, 3192.16it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1000.mp4




t:   0%|          | 0/794 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 254/794 [00:00<00:00, 2539.11it/s, now=None][A
t:  83%|████████▎ | 657/794 [00:00<00:00, 3415.29it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1200.mp4




t:   0%|          | 0/1072 [00:00<?, ?it/s, now=None][A
t:  22%|██▏       | 234/1072 [00:00<00:00, 2337.13it/s, now=None][A
t:  55%|█████▌    | 591/1072 [00:00<00:00, 3061.66it/s, now=None][A
t:  96%|█████████▌| 1031/1072 [00:00<00:00, 3669.31it/s, now=None][A
                                                                  [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1400.mp4




t:   0%|          | 0/948 [00:00<?, ?it/s, now=None][A
t:  36%|███▌      | 341/948 [00:00<00:00, 3404.06it/s, now=None][A
t:  87%|████████▋ | 829/948 [00:00<00:00, 4270.80it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1600.mp4




t:   0%|          | 0/967 [00:00<?, ?it/s, now=None][A
t:  24%|██▍       | 234/967 [00:00<00:00, 2339.19it/s, now=None][A
t:  57%|█████▋    | 548/967 [00:00<00:00, 2805.37it/s, now=None][A
t:  98%|█████████▊| 947/967 [00:00<00:00, 3344.39it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  46%|████▌     | 354/766 [00:00<00:00, 3537.86it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-1800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2000.mp4




t:   0%|          | 0/1072 [00:00<?, ?it/s, now=None][A
t:  26%|██▌       | 276/1072 [00:00<00:00, 2754.55it/s, now=None][A
t:  60%|█████▉    | 640/1072 [00:00<00:00, 3273.24it/s, now=None][A
t:  94%|█████████▍| 1005/1072 [00:00<00:00, 3444.35it/s, now=None][A
                                                                  [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2200.mp4




t:   0%|          | 0/1069 [00:00<?, ?it/s, now=None][A
t:  21%|██        | 226/1069 [00:00<00:00, 2257.47it/s, now=None][A
t:  54%|█████▎    | 573/1069 [00:00<00:00, 2969.07it/s, now=None][A
t:  87%|████████▋ | 934/1069 [00:00<00:00, 3258.95it/s, now=None][A
                                                                 [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2400.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  39%|███▉      | 302/766 [00:00<00:00, 3019.00it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 250/766 [00:00<00:00, 2492.92it/s, now=None][A
t:  78%|███████▊  | 601/766 [00:00<00:00, 3082.56it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2800.mp4




t:   0%|          | 0/887 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 156/887 [00:00<00:00, 1553.13it/s, now=None][A
t:  62%|██████▏   | 546/887 [00:00<00:00, 2930.39it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-2800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  41%|████      | 314/766 [00:00<00:00, 3136.48it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3200.mp4




t:   0%|          | 0/939 [00:00<?, ?it/s, now=None][A
t:  30%|██▉       | 279/939 [00:00<00:00, 2785.22it/s, now=None][A
t:  68%|██████▊   | 634/939 [00:00<00:00, 3231.64it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3400.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  34%|███▍      | 280/826 [00:00<00:00, 2799.40it/s, now=None][A
t:  78%|███████▊  | 642/826 [00:00<00:00, 3279.43it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-3400.mp4


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5400.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  41%|████      | 335/826 [00:00<00:00, 3346.92it/s, now=None][A
t:  98%|█████████▊| 806/826 [00:00<00:00, 4147.52it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5600.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  42%|████▏     | 348/826 [00:00<00:00, 3479.41it/s, now=None][A
t:  98%|█████████▊| 810/826 [00:00<00:00, 4150.14it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5800.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  24%|██▍       | 201/826 [00:00<00:00, 2004.70it/s, now=None][A
t:  65%|██████▍   | 535/826 [00:00<00:00, 2785.99it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-5800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  48%|████▊     | 371/766 [00:00<00:00, 3709.25it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6200.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  39%|███▉      | 325/826 [00:00<00:00, 3243.91it/s, now=None][A
t:  94%|█████████▍| 779/826 [00:00<00:00, 4001.46it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6400.mp4




t:   0%|          | 0/1191 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 220/1191 [00:00<00:00, 2196.10it/s, now=None][A
t:  45%|████▍     | 535/1191 [00:00<00:00, 2752.01it/s, now=None][A
t:  80%|████████  | 957/1191 [00:00<00:00, 3419.96it/s, now=None][A
                                                                 [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6600.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  31%|███       | 255/826 [00:00<00:00, 2546.81it/s, now=None][A
t:  87%|████████▋ | 718/826 [00:00<00:00, 3769.46it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6800.mp4




t:   0%|          | 0/844 [00:00<?, ?it/s, now=None][A
t:  43%|████▎     | 365/844 [00:00<00:00, 3642.86it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-6800.mp4


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9000.mp4




t:   0%|          | 0/904 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 290/904 [00:00<00:00, 2881.09it/s, now=None][A
t:  74%|███████▎  | 665/904 [00:00<00:00, 3386.11it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  49%|████▊     | 372/766 [00:00<00:00, 3714.07it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9400.mp4




t:   0%|          | 0/828 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 268/828 [00:00<00:00, 2678.69it/s, now=None][A
t:  81%|████████  | 668/828 [00:00<00:00, 3455.76it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▍      | 265/766 [00:00<00:00, 2646.19it/s, now=None][A
t:  74%|███████▍  | 570/766 [00:00<00:00, 2882.95it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▌      | 269/766 [00:00<00:00, 2684.04it/s, now=None][A
t:  84%|████████▍ | 642/766 [00:00<00:00, 3294.83it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-9800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10000.mp4




t:   0%|          | 0/827 [00:00<?, ?it/s, now=None][A
t:  28%|██▊       | 231/827 [00:00<00:00, 2309.15it/s, now=None][A
t:  70%|███████   | 583/827 [00:00<00:00, 3021.02it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 252/766 [00:00<00:00, 2517.01it/s, now=None][A
t:  87%|████████▋ | 670/766 [00:00<00:00, 3494.08it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10400.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  34%|███▍      | 263/766 [00:00<00:00, 2624.79it/s, now=None][A
t:  78%|███████▊  | 596/766 [00:00<00:00, 3033.96it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10600.mp4




t:   0%|          | 0/846 [00:00<?, ?it/s, now=None][A
t:  39%|███▊      | 327/846 [00:00<00:00, 3269.56it/s, now=None][A
t:  94%|█████████▎| 793/846 [00:00<00:00, 4086.91it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10800.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  30%|██▉       | 244/826 [00:00<00:00, 2437.64it/s, now=None][A
t:  71%|███████   | 588/826 [00:00<00:00, 3024.11it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-10800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11000.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  42%|████▏     | 350/826 [00:00<00:00, 3498.84it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  23%|██▎       | 179/766 [00:00<00:00, 1786.92it/s, now=None][A
t:  73%|███████▎  | 556/766 [00:00<00:00, 2950.09it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11400.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  31%|███       | 255/826 [00:00<00:00, 2547.25it/s, now=None][A
t:  75%|███████▌  | 620/826 [00:00<00:00, 3190.33it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 243/766 [00:00<00:00, 2429.86it/s, now=None][A
t:  80%|███████▉  | 612/766 [00:00<00:00, 3165.47it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11800.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  30%|██▉       | 247/826 [00:00<00:00, 2467.26it/s, now=None][A
t:  72%|███████▏  | 597/826 [00:00<00:00, 3074.19it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-11800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 245/766 [00:00<00:00, 2427.42it/s, now=None][A
t:  81%|████████  | 620/766 [00:00<00:00, 3197.04it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▍      | 265/766 [00:00<00:00, 2646.81it/s, now=None][A
t:  78%|███████▊  | 596/766 [00:00<00:00, 3034.17it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12400.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  38%|███▊      | 288/766 [00:00<00:00, 2878.51it/s, now=None][A
t:  80%|███████▉  | 609/766 [00:00<00:00, 3072.95it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-12400.mp4


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14000.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 262/826 [00:00<00:00, 2615.56it/s, now=None][A
t:  79%|███████▉  | 652/826 [00:00<00:00, 3369.50it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14200.mp4




t:   0%|          | 0/827 [00:00<?, ?it/s, now=None][A
t:  36%|███▌      | 298/827 [00:00<00:00, 2977.24it/s, now=None][A
t:  97%|█████████▋| 802/827 [00:00<00:00, 4186.91it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14400.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 268/826 [00:00<00:00, 2675.82it/s, now=None][A
t:  73%|███████▎  | 602/826 [00:00<00:00, 3062.72it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14600.mp4




t:   0%|          | 0/827 [00:00<?, ?it/s, now=None][A
t:  30%|██▉       | 247/827 [00:00<00:00, 2465.12it/s, now=None][A
t:  72%|███████▏  | 598/827 [00:00<00:00, 3076.67it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  36%|███▌      | 273/766 [00:00<00:00, 2723.08it/s, now=None][A
t:  83%|████████▎ | 637/766 [00:00<00:00, 3258.25it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-14800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15000.mp4




t:   0%|          | 0/888 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 295/888 [00:00<00:00, 2949.64it/s, now=None][A
t:  84%|████████▎ | 742/888 [00:00<00:00, 3841.93it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15200.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  37%|███▋      | 309/826 [00:00<00:00, 3085.49it/s, now=None][A
t:  83%|████████▎ | 688/826 [00:00<00:00, 3498.32it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  34%|███▍      | 259/766 [00:00<00:00, 2586.73it/s, now=None][A
t:  82%|████████▏ | 625/766 [00:00<00:00, 3208.85it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-15800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▍      | 268/766 [00:00<00:00, 2679.93it/s, now=None][A
t:  84%|████████▍ | 645/766 [00:00<00:00, 3315.99it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16200.mp4




t:   0%|          | 0/844 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 269/844 [00:00<00:00, 2683.25it/s, now=None][A
t:  72%|███████▏  | 611/844 [00:00<00:00, 3116.15it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16400.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  49%|████▊     | 372/766 [00:00<00:00, 3716.04it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 253/766 [00:00<00:00, 2526.30it/s, now=None][A
t:  78%|███████▊  | 600/766 [00:00<00:00, 3080.69it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16800.mp4




t:   0%|          | 0/887 [00:00<?, ?it/s, now=None][A
t:  31%|███       | 275/887 [00:00<00:00, 2747.87it/s, now=None][A
t:  77%|███████▋  | 686/887 [00:00<00:00, 3546.82it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-16800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17000.mp4




t:   0%|          | 0/828 [00:00<?, ?it/s, now=None][A
t:  41%|████▏     | 343/828 [00:00<00:00, 3424.49it/s, now=None][A
t:  90%|████████▉ | 745/828 [00:00<00:00, 3771.53it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17200.mp4




t:   0%|          | 0/861 [00:00<?, ?it/s, now=None][A
t:  27%|██▋       | 231/861 [00:00<00:00, 2307.57it/s, now=None][A
t:  63%|██████▎   | 546/861 [00:00<00:00, 2798.36it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17400.mp4




t:   0%|          | 0/845 [00:00<?, ?it/s, now=None][A
t:  34%|███▎      | 284/845 [00:00<00:00, 2838.16it/s, now=None][A
t:  80%|███████▉  | 674/845 [00:00<00:00, 3457.76it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  30%|██▉       | 227/766 [00:00<00:00, 2269.95it/s, now=None][A
t:  79%|███████▉  | 606/766 [00:00<00:00, 3158.57it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17800.mp4




t:   0%|          | 0/827 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 274/827 [00:00<00:00, 2737.30it/s, now=None][A
t:  80%|████████  | 663/827 [00:00<00:00, 3411.80it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-17800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18000.mp4




t:   0%|          | 0/889 [00:00<?, ?it/s, now=None][A
t:  29%|██▉       | 259/889 [00:00<00:00, 2586.01it/s, now=None][A
t:  71%|███████   | 628/889 [00:00<00:00, 3233.31it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  36%|███▋      | 278/766 [00:00<00:00, 2774.95it/s, now=None][A
t:  90%|█████████ | 691/766 [00:00<00:00, 3566.81it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18400.mp4




t:   0%|          | 0/906 [00:00<?, ?it/s, now=None][A
t:  41%|████      | 372/906 [00:00<00:00, 3717.26it/s, now=None][A
t:  93%|█████████▎| 845/906 [00:00<00:00, 4312.23it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 247/766 [00:00<00:00, 2461.75it/s, now=None][A
t:  78%|███████▊  | 598/766 [00:00<00:00, 3074.79it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18800.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  28%|██▊       | 232/826 [00:00<00:00, 2315.71it/s, now=None][A
t:  67%|██████▋   | 550/826 [00:00<00:00, 2821.17it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-18800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 253/766 [00:00<00:00, 2525.54it/s, now=None][A
t:  81%|████████  | 620/766 [00:00<00:00, 3196.02it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19200.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  36%|███▌      | 276/766 [00:00<00:00, 2754.88it/s, now=None][A
t:  85%|████████▌ | 654/766 [00:00<00:00, 3357.16it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19400.mp4




t:   0%|          | 0/863 [00:00<?, ?it/s, now=None][A
t:  29%|██▉       | 253/863 [00:00<00:00, 2525.46it/s, now=None][A
t:  70%|██████▉   | 603/863 [00:00<00:00, 3095.03it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19600.mp4




t:   0%|          | 0/949 [00:00<?, ?it/s, now=None][A
t:  33%|███▎      | 311/949 [00:00<00:00, 3104.30it/s, now=None][A
t:  78%|███████▊  | 737/949 [00:00<00:00, 3782.95it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  41%|████      | 311/766 [00:00<00:00, 3109.43it/s, now=None][A
t:  96%|█████████▋| 738/766 [00:00<00:00, 3789.46it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-19800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20000.mp4




t:   0%|          | 0/827 [00:00<?, ?it/s, now=None][A
t:  28%|██▊       | 229/827 [00:00<00:00, 2283.89it/s, now=None][A
t:  72%|███████▏  | 597/827 [00:00<00:00, 3102.01it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20200.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  27%|██▋       | 220/826 [00:00<00:00, 2197.72it/s, now=None][A
t:  66%|██████▌   | 547/826 [00:00<00:00, 2826.09it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20400.mp4




t:   0%|          | 0/828 [00:00<?, ?it/s, now=None][A
t:  39%|███▉      | 325/828 [00:00<00:00, 3247.14it/s, now=None][A
t:  81%|████████  | 670/828 [00:00<00:00, 3363.56it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  32%|███▏      | 243/766 [00:00<00:00, 2420.42it/s, now=None][A
t:  79%|███████▉  | 604/766 [00:00<00:00, 3114.21it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  45%|████▌     | 347/766 [00:00<00:00, 3463.55it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-20800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21000.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  31%|███       | 252/826 [00:00<00:00, 2514.62it/s, now=None][A
t:  74%|███████▍  | 613/826 [00:00<00:00, 3153.78it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21200.mp4




t:   0%|          | 0/795 [00:00<?, ?it/s, now=None][A
t:  37%|███▋      | 291/795 [00:00<00:00, 2909.91it/s, now=None][A
t:  84%|████████▍ | 670/795 [00:00<00:00, 3424.85it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21400.mp4




t:   0%|          | 0/921 [00:00<?, ?it/s, now=None][A
t:  26%|██▌       | 240/921 [00:00<00:00, 2398.74it/s, now=None][A
t:  62%|██████▏   | 568/921 [00:00<00:00, 2914.38it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21600.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  37%|███▋      | 284/766 [00:00<00:00, 2837.53it/s, now=None][A
t:  83%|████████▎ | 633/766 [00:00<00:00, 3218.83it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21600.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21800.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21800.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▌      | 271/766 [00:00<00:00, 2704.79it/s, now=None][A
t:  81%|████████  | 619/766 [00:00<00:00, 3156.50it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-21800.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22000.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22000.mp4




t:   0%|          | 0/766 [00:00<?, ?it/s, now=None][A
t:  35%|███▍      | 267/766 [00:00<00:00, 2665.79it/s, now=None][A
t:  84%|████████▍ | 642/766 [00:00<00:00, 3302.36it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22000.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22200.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22200.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  31%|███       | 253/826 [00:00<00:00, 2525.97it/s, now=None][A
t:  75%|███████▌  | 622/826 [00:00<00:00, 3206.95it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22200.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22400.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22400.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  26%|██▋       | 218/826 [00:00<00:00, 2178.58it/s, now=None][A
t:  72%|███████▏  | 593/826 [00:00<00:00, 3100.89it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22400.mp4
Moviepy - Building video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22600.mp4.
Moviepy - Writing video /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22600.mp4




t:   0%|          | 0/826 [00:00<?, ?it/s, now=None][A
t:  23%|██▎       | 193/826 [00:00<00:00, 1925.24it/s, now=None][A
t:  67%|██████▋   | 555/826 [00:00<00:00, 2917.51it/s, now=None][A
                                                                [A

Moviepy - Done !
Moviepy - video ready /home/chgaw002/DeepLearning/video/pong-dqn/rl-video-episode-22600.mp4


In [154]:
policy.save("model.pt")