In [46]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from stable_baselines3.common.atari_wrappers import (  
    ClipRewardEnv,
    EpisodicLifeEnv,
    FireResetEnv,
    MaxAndSkipEnv,
    NoopResetEnv,
)

In [47]:
#CONSTANTS
total_timesteps = 1000000
num_envs = 4
num_steps = 128
num_epochs = 4
batch_size = 128
num_updates = total_timesteps // batch_size


In [57]:
def make_env(gym_id, seed, idx, capture_video, run_name):
    def env_create():
        env = gym.make(gym_id, render_mode="rgb_array")
        env = gym.wrappers.RecordEpisodeStatistics(env)
        env = gym.wrappers.RecordVideo(env, f"videos/{run_name}", lambda episode_id: episode_id % 100 == 0)
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)
        env = EpisodicLifeEnv(env)
        if "FIRE" in env.unwrapped.get_action_meanings():
            env = FireResetEnv(env)
        env = ClipRewardEnv(env)
        env = gym.wrappers.ResizeObservation(env, (84, 84))
        env = gym.wrappers.GrayScaleObservation(env)
        env = gym.wrappers.FrameStack(env, 4)
        env.unwrapped.seed(seed)
        env.action_space.seed(seed)
        env.observation_space.seed(seed)
        return env

    return env_create

In [58]:
def layer_initilization(layer, std=np.sqrt(2), bias_const=0.0):
    nn.init.orthogonal_(layer.weight, std)
    nn.init.constant_(layer.bias.data, bias_const)
    return layer

class PPOAgent(nn.Module):
    
    def __init__(self, n_actions, n_frames = 4 ):
        super(PPOAgent, self).__init__()
        
        self.conv = nn.Sequential(
            layer_initilization(nn.Conv2d(n_frames, 32, kernel_size=8, stride=4)),
            nn.ReLU(),
            layer_initilization(nn.Conv2d(32, 64, kernel_size=4, stride=2)),
            nn.ReLU(),
            layer_initilization(nn.Conv2d(64, 64, kernel_size=3, stride=1)),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(3136, 512)
        )
        
        self.actor = nn.Sequential(
            layer_initilization(nn.Linear(512, n_actions))
        )
        
        self.critic = nn.Sequential(
            layer_initilization(nn.Linear(512, 1))
        )
        
        
    def get_value(self, x):
        return self.critic(self.conv(x))
    
    def get_action(self, x, action = None):
            logits = self.actor(self.conv(x))
            dist = torch.distributions.Categorical(logits=logits)
            if action == None:
                action = dist.sample()
            value = self.critic(self.conv(x))
            return action, dist.log_prob(action), dist.entropy(), value
            

In [59]:
def rollout(envs, agent, num_steps, gamma = 0.99, gae_lambda = 0.95):
    
    
    observations = torch.zeros((num_steps , num_envs) + envs.single_observation_space.shape, dtype=torch.float32)
    actions = torch.zeros((num_steps, num_envs) + envs.single_action_space.shape, dtype=torch.int32)
    rewards = torch.zeros((num_steps, num_envs), dtype=torch.float32)
    values = torch.zeros((num_steps, num_envs), dtype=torch.float32)
    logprobs = torch.zeros((num_steps, num_envs), dtype=torch.float32)
    dones = torch.zeros((num_steps, num_envs), dtype=torch.float32)
    #truncs = torch.zeros((num_steps, num_envs), dtype=torch.float32)

    next_obs = torch.Tensor(envs.reset()[0])
    next_done = torch.zeros(num_envs)
    # next_trunc = torch.zeros(num_envs)
    
    for step in range(num_steps):
        observations[step] = next_obs
        dones[step] = torch.Tensor(next_done)
        #truncs[step] = next_trunc
        
        with torch.no_grad():
            action, logprob, _, value = agent.get_action(next_obs)
            
        actions[step] = action
        values[step] = value.view(-1)
        logprobs[step] = logprob
        
        next_obs, reward, next_done, _, _ = envs.step(action.cpu().numpy())
        next_obs = torch.Tensor(next_obs)
        rewards[step] = torch.Tensor(reward)
        
    with torch.no_grad():
            next_value = agent.get_value(next_obs).reshape(1, -1)
            advantages = torch.zeros_like(rewards)
            lastgaelam = 0
            for t in reversed(range(num_steps)):
                if t == num_steps - 1:
                    nextnonterminal = 1.0 - next_done
                    nextvalues = next_value
                else:
                    nextnonterminal = 1.0 - dones[t + 1]
                    nextvalues = values[t + 1]
                delta = rewards[t] + gamma * nextvalues * nextnonterminal - values[t]
                advantages[t] = lastgaelam = delta + gamma * gae_lambda * nextnonterminal * lastgaelam
            returns = advantages + values
    
    return observations, actions, returns, values, advantages, logprobs, dones
        
    

In [60]:
def update_agent(agent, optimizer, observations, actions, returns, values, advantages, logprobs, clip_param = 0.2, vf_coef = 0.5, ent_coef = 0.01):
    
    observations = observations.view(-1, *observations.shape[2:])
    actions = actions.view(-1, *actions.shape[2:])
    returns = returns.view(-1)
    values = values.view(-1)
    advantages = advantages.view(-1)
    logprobs = logprobs.view(-1)
    indices = torch.randperm(observations.size(0))
    observations = observations[indices]
    actions = actions[indices]
    returns = returns[indices]
    advantages = advantages[indices]
    logprobs = logprobs[indices]
    advantages = advantages.view(-1)
    logprobs = logprobs.view(-1)
    num_batches = observations.size(0) // 128
    for _ in range(num_epochs):
        for batch in range(num_batches):
            batch_indices = slice(batch * 128, (batch + 1) * 128)
            
            obs_batch = observations[batch_indices]
            act_batch = actions[batch_indices]
            ret_batch = returns[batch_indices]
            adv_batch = advantages[batch_indices]
            norm_adv_batch = (adv_batch - adv_batch.mean()) / (adv_batch.std() + 1e-8)
            logprob_batch = logprobs[batch_indices]
            
            _, logprob, entropy, value = agent.get_action(obs_batch, act_batch)
            ratio = (logprob - logprob_batch).exp()
            policy_loss = -torch.min(ratio * norm_adv_batch, torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) * norm_adv_batch).mean()
            value_loss = 0.5 * (value - ret_batch).pow(2).mean()
            entropy_loss = entropy.mean()
            loss = policy_loss + value_loss * vf_coef - entropy_loss * ent_coef
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(agent.parameters(), 0.5)
            optimizer.step()
            
    return loss.item(), policy_loss.item(), value_loss.item(), entropy_loss.item()
    
    
    

In [62]:
#FINAL TRAINING LOOP
envs = gym.vector.SyncVectorEnv([make_env("ALE/Pong-v5", 9, i, False, "Pong") for i in range(num_envs)])
agent = PPOAgent(envs.single_action_space.n)
optimizer = torch.optim.Adam(agent.parameters(), lr=2.5e-4, eps=1e-5)
for update in range(num_updates):
    observations, actions, returns, values, advantages, logprobs, dones = rollout(envs, agent, num_steps)
    loss, policy_loss, value_loss, entropy_loss = update_agent(agent, optimizer, observations, actions, returns, values, advantages, logprobs)
    print(f"Update: {update}, Loss: {loss}, Policy Loss: {policy_loss}, Value Loss: {value_loss}, Entropy Loss: {entropy_loss}")

Update: 463, Loss: 0.1704680174589157, Policy Loss: -0.055769290775060654, Value Loss: 0.4720878005027771, Entropy Loss: 0.9806581735610962
Update: 464, Loss: 0.14615431427955627, Policy Loss: -0.04496391862630844, Value Loss: 0.40233680605888367, Entropy Loss: 1.0050170421600342
Update: 465, Loss: -0.009928026236593723, Policy Loss: -0.08135127276182175, Value Loss: 0.163709357380867, Entropy Loss: 1.0431432723999023
Update: 466, Loss: 0.19607031345367432, Policy Loss: -0.03554758429527283, Value Loss: 0.48416972160339355, Entropy Loss: 1.0466958284378052
Update: 467, Loss: 0.011014502495527267, Policy Loss: -0.07798893004655838, Value Loss: 0.19915689527988434, Entropy Loss: 1.0575015544891357
Update: 468, Loss: 0.0933627113699913, Policy Loss: -0.04766623675823212, Value Loss: 0.29999932646751404, Entropy Loss: 0.8970717191696167
Update: 469, Loss: 0.1386347860097885, Policy Loss: -0.03892343118786812, Value Loss: 0.3759137690067291, Entropy Loss: 1.039867639541626
Update: 470, Loss

                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4




Update: 486, Loss: 0.010589079931378365, Policy Loss: -0.07597603648900986, Value Loss: 0.19308465719223022, Entropy Loss: 0.997721254825592
Update: 487, Loss: 0.08747320622205734, Policy Loss: -0.043640896677970886, Value Loss: 0.28317826986312866, Entropy Loss: 1.0475029945373535
Update: 488, Loss: 0.01122464518994093, Policy Loss: -0.05000896751880646, Value Loss: 0.14172163605690002, Entropy Loss: 0.962720513343811
Update: 489, Loss: 0.14002427458763123, Policy Loss: -0.04726022854447365, Value Loss: 0.3901309669017792, Entropy Loss: 0.778099000453949
Update: 490, Loss: 0.031720589846372604, Policy Loss: -0.0785830095410347, Value Loss: 0.24182459712028503, Entropy Loss: 1.0608699321746826
Update: 491, Loss: 0.035679928958415985, Policy Loss: -0.06236526742577553, Value Loss: 0.21460109949111938, Entropy Loss: 0.9255355596542358
Update: 492, Loss: 0.16065163910388947, Policy Loss: -0.06532537937164307, Value Loss: 0.4694317579269409, Entropy Loss: 0.873885452747345
Moviepy - Buildi

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4
Update: 493, Loss: -0.00861695408821106, Policy Loss: -0.09749908745288849, Value Loss: 0.1957366168498993, Entropy Loss: 0.8986175060272217
Update: 494, Loss: 0.054064568132162094, Policy Loss: -0.06832731515169144, Value Loss: 0.26380789279937744, Entropy Loss: 0.9512062072753906
Update: 495, Loss: 0.03707762435078621, Policy Loss: -0.0625455304980278, Value Loss: 0.21971380710601807, Entropy Loss: 1.0233749151229858
Update: 496, Loss: 0.042475830763578415, Policy Loss: -0.053720925003290176, Value Loss: 0.21110792458057404, Entropy Loss: 0.9357208013534546
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4




Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-300.mp4




Update: 497, Loss: 0.13460397720336914, Policy Loss: -0.02380060963332653, Value Loss: 0.3356449604034424, Entropy Loss: 0.9417899250984192
Update: 498, Loss: 0.033962175250053406, Policy Loss: -0.061195582151412964, Value Loss: 0.21073821187019348, Entropy Loss: 1.0211350917816162
Update: 499, Loss: 0.05563916265964508, Policy Loss: -0.051709093153476715, Value Loss: 0.23280854523181915, Entropy Loss: 0.9056017398834229
Update: 500, Loss: 0.0650472640991211, Policy Loss: -0.07159626483917236, Value Loss: 0.29168614745140076, Entropy Loss: 0.9199545383453369
Update: 501, Loss: 0.04661082476377487, Policy Loss: -0.05754489079117775, Value Loss: 0.2270359992980957, Entropy Loss: 0.936228334903717
Update: 502, Loss: 0.0486128143966198, Policy Loss: -0.03089360147714615, Value Loss: 0.17558762431144714, Entropy Loss: 0.8287396430969238
Update: 503, Loss: 0.051711343228816986, Policy Loss: -0.05223289504647255, Value Loss: 0.22686581313610077, Entropy Loss: 0.9488670229911804
Update: 504, L

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4
Update: 786, Loss: 0.0752369835972786, Policy Loss: -0.052737921476364136, Value Loss: 0.2706286311149597, Entropy Loss: 0.7339409589767456
Update: 787, Loss: 0.06341055035591125, Policy Loss: -0.05831415206193924, Value Loss: 0.25781765580177307, Entropy Loss: 0.7184128165245056
Update: 788, Loss: 0.018204234540462494, Policy Loss: -0.03014976903796196, Value Loss: 0.1124303862452507, Entropy Loss: 0.7861188650131226
Update: 789, Loss: 0.026188641786575317, Policy Loss: -0.03065701574087143, Value Loss: 0.1299077868461609, Entropy Loss: 0.8108235597610474
Update: 790, Loss: 0.031247572973370552, Policy Loss: -0.056557394564151764, Value Loss: 0.19048427045345306, Entropy Loss: 0.7437167167663574
Update: 791, Loss: 0.02903391420841217, Policy Loss: -0.059692200273275375, Value Loss: 0.19419406354427338, Entropy Loss: 0.8370916843414307
Update: 792, Loss: -0.033873982727527

                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4
Update: 800, Loss: 0.06710430979728699, Policy Loss: -0.04992540180683136, Value Loss: 0.25124725699424744, Entropy Loss: 0.8593918681144714
Update: 801, Loss: 0.041739776730537415, Policy Loss: -0.04715967923402786, Value Loss: 0.1968585103750229, Entropy Loss: 0.9529799222946167
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4




Update: 802, Loss: 0.0567011795938015, Policy Loss: -0.03941110521554947, Value Loss: 0.20846439898014069, Entropy Loss: 0.8119915723800659
Update: 803, Loss: -0.03117687627673149, Policy Loss: -0.06802453100681305, Value Loss: 0.08906073123216629, Entropy Loss: 0.7682711482048035
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-400.mp4
Update: 804, Loss: 0.002397715114057064, Policy Loss: -0.06587793678045273, Value Loss: 0.15310722589492798, Entropy Loss: 0.8277961611747742
Update: 805, Loss: 0.060475703328847885, Policy Loss: -0.060845885425806046, Value Loss: 0.260013222694397, Entropy Loss: 0.8685025572776794
Update: 806, Loss: 0.02935612201690674, Policy Loss: -0.046135250478982925, Value Loss: 0.16952937841415405, Entropy Loss: 0.9273316264152527
Update: 807, Loss: -0.01787099987268448, Policy Loss: -0.08111713826656342, Value Loss: 0.14519190788269043, Entropy Loss: 0.9349817037582397
Update: 808, Loss: 0.08937278389930725, Policy Loss: -0.056398212909698486, Value Loss: 0.3095092177391052, Entropy Loss: 0.8983610272407532
Update: 809, Loss: -0.0029400279745459557, Policy Loss: -0.06426707655191422, Value Loss: 0.13874879479408264, Entropy Loss: 0.8047349452972412
Update: 810, Loss: 0.050459712743

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4




Update: 1108, Loss: 0.01338792685419321, Policy Loss: -0.05044576898217201, Value Loss: 0.145193412899971, Entropy Loss: 0.8763010501861572
Update: 1109, Loss: 0.03022255003452301, Policy Loss: -0.05410041660070419, Value Loss: 0.18404413759708405, Entropy Loss: 0.7699101567268372
Update: 1110, Loss: 0.03274264186620712, Policy Loss: -0.06527187675237656, Value Loss: 0.21314433217048645, Entropy Loss: 0.8557648658752441
Update: 1111, Loss: 0.05061674490571022, Policy Loss: -0.045545514672994614, Value Loss: 0.20968954265117645, Entropy Loss: 0.8682512044906616
Update: 1112, Loss: 0.06056862697005272, Policy Loss: -0.033927008509635925, Value Loss: 0.20173677802085876, Entropy Loss: 0.6372754573822021
Update: 1113, Loss: 0.027618613094091415, Policy Loss: -0.0598493292927742, Value Loss: 0.19332008063793182, Entropy Loss: 0.9192097187042236
Update: 1114, Loss: -0.009932562708854675, Policy Loss: -0.08110977709293365, Value Loss: 0.1578812152147293, Entropy Loss: 0.7763393521308899
Updat

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4
Update: 1136, Loss: 0.01922471821308136, Policy Loss: -0.06272009015083313, Value Loss: 0.17815713584423065, Entropy Loss: 0.7133759260177612
Update: 1137, Loss: 0.007122846785932779, Policy Loss: -0.06781173497438431, Value Loss: 0.1642673909664154, Entropy Loss: 0.7199113965034485
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4




Update: 1138, Loss: 0.04445093125104904, Policy Loss: -0.056315016001462936, Value Loss: 0.21347497403621674, Entropy Loss: 0.597153902053833
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-500.mp4
Update: 1139, Loss: 0.017666637897491455, Policy Loss: -0.05892478674650192, Value Loss: 0.16839537024497986, Entropy Loss: 0.7606261372566223
Update: 1140, Loss: 0.027572087943553925, Policy Loss: -0.05813390761613846, Value Loss: 0.18538206815719604, Entropy Loss: 0.6985039114952087
Update: 1141, Loss: 0.03284141421318054, Policy Loss: -0.04991224780678749, Value Loss: 0.180679589509964, Entropy Loss: 0.7586132884025574
Update: 1142, Loss: 0.05652294680476189, Policy Loss: -0.005580287426710129, Value Loss: 0.139812171459198, Entropy Loss: 0.7802849411964417
Update: 1143, Loss: 0.019892975687980652, Policy Loss: -0.0654866173863411, Value Loss: 0.18585386872291565, Entropy Loss: 0.7547340989112854
Update: 1144, Loss: 0.037127021700143814, Policy Loss: -0.025021716952323914, Value Loss: 0.13915497064590454, Entropy Loss: 0.7428745627403259
Update: 1145, Loss: 0.0298790819

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-600.mp4
Update: 1503, Loss: 0.010682931169867516, Policy Loss: -0.06218395009636879, Value Loss: 0.1599186807870865, Entropy Loss: 0.7092459797859192
Update: 1504, Loss: 0.04637942090630531, Policy Loss: -0.04790470749139786, Value Loss: 0.2027411311864853, Entropy Loss: 0.7086438536643982
Update: 1505, Loss: 0.017968889325857162, Policy Loss: -0.0598650686442852, Value Loss: 0.17213265597820282, Entropy Loss: 0.8232369422912598
Update: 1506, Loss: 0.030514143407344818, Policy Loss: -0.06552258878946304, Value Loss: 0.20854061841964722, Entropy Loss: 0.8233577013015747
Update: 1507, Loss: 0.012935053557157516, Policy Loss: -0.06182848662137985, Value Loss: 0.16408559679985046, Entropy Loss: 0.7279257774353027
Update: 1508, Loss: -0.005034047178924084, Policy Loss: -0.06505049020051956, Value Loss: 0.136583611369133, Entropy Loss: 0.8275362849235535
Update: 1509, Loss: 0.0378766171

                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-600.mp4




Update: 1531, Loss: 0.009213689714670181, Policy Loss: -0.06462036073207855, Value Loss: 0.1612270176410675, Entropy Loss: 0.6779459118843079
Update: 1532, Loss: -0.001588253304362297, Policy Loss: -0.05373448505997658, Value Loss: 0.11887341737747192, Entropy Loss: 0.7290477156639099
Update: 1533, Loss: 0.02136528678238392, Policy Loss: -0.04874572157859802, Value Loss: 0.1551705300807953, Entropy Loss: 0.7474257349967957
Update: 1534, Loss: 0.04650631546974182, Policy Loss: -0.04266153275966644, Value Loss: 0.19424636662006378, Entropy Loss: 0.7955333590507507
Update: 1535, Loss: 0.015417124144732952, Policy Loss: -0.05784669145941734, Value Loss: 0.16227340698242188, Entropy Loss: 0.7872887849807739
Update: 1536, Loss: -0.0056520551443099976, Policy Loss: -0.05973081290721893, Value Loss: 0.12084560096263885, Entropy Loss: 0.6344043016433716
Update: 1537, Loss: 0.051197152584791183, Policy Loss: -0.041829466819763184, Value Loss: 0.1988677978515625, Entropy Loss: 0.6407279968261719


                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-600.mp4
Update: 1545, Loss: 0.031749241054058075, Policy Loss: -0.05339476093649864, Value Loss: 0.18349719047546387, Entropy Loss: 0.6604592800140381
Update: 1546, Loss: 0.02983837015926838, Policy Loss: -0.04575379937887192, Value Loss: 0.1633863002061844, Entropy Loss: 0.6100980043411255
Update: 1547, Loss: 0.008083516731858253, Policy Loss: -0.06693942844867706, Value Loss: 0.16845515370368958, Entropy Loss: 0.9204631447792053
Update: 1548, Loss: 0.04815542697906494, Policy Loss: -0.06389733403921127, Value Loss: 0.23923301696777344, Entropy Loss: 0.7563748359680176
Update: 1549, Loss: 0.021463755518198013, Policy Loss: -0.058924250304698944, Value Loss: 0.17742928862571716, Entropy Loss: 0.832663893699646
Update: 1550, Loss: 0.06876826286315918, Policy Loss: -0.06164061278104782, Value Loss: 0.2759077250957489, Entropy Loss: 0.7544984817504883
Update: 1551, Loss: 0.0772729367

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-600.mp4
Update: 1556, Loss: 0.03724789619445801, Policy Loss: -0.0630849301815033, Value Loss: 0.21710461378097534, Entropy Loss: 0.8219478726387024
Update: 1557, Loss: 0.03923529013991356, Policy Loss: -0.062252454459667206, Value Loss: 0.22048796713352203, Entropy Loss: 0.875623881816864
Update: 1558, Loss: 0.07819747924804688, Policy Loss: -0.05325259640812874, Value Loss: 0.27795323729515076, Entropy Loss: 0.7526543736457825
Update: 1559, Loss: 0.054324738681316376, Policy Loss: -0.040622107684612274, Value Loss: 0.20625421404838562, Entropy Loss: 0.8180261850357056
Update: 1560, Loss: 0.04041304439306259, Policy Loss: -0.04592091217637062, Value Loss: 0.1867561638355255, Entropy Loss: 0.704412579536438
Update: 1561, Loss: 0.007900227792561054, Policy Loss: -0.06571492552757263, Value Loss: 0.16325229406356812, Entropy Loss: 0.801099419593811
Update: 1562, Loss: 0.030140357092

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-700.mp4
Update: 1960, Loss: 0.07345007359981537, Policy Loss: -0.04357075318694115, Value Loss: 0.25086793303489685, Entropy Loss: 0.8413134813308716
Update: 1961, Loss: 0.038691453635692596, Policy Loss: -0.043218038976192474, Value Loss: 0.17657604813575745, Entropy Loss: 0.6378533244132996
Update: 1962, Loss: 0.04372693970799446, Policy Loss: -0.019813066348433495, Value Loss: 0.14305993914604187, Entropy Loss: 0.798996090888977
Update: 1963, Loss: -0.010661743581295013, Policy Loss: -0.06941118836402893, Value Loss: 0.13598541915416718, Entropy Loss: 0.924326479434967
Update: 1964, Loss: 0.06473011523485184, Policy Loss: -0.057914599776268005, Value Loss: 0.26136988401412964, Entropy Loss: 0.804023027420044
Update: 1965, Loss: 0.012243110686540604, Policy Loss: -0.062339067459106445, Value Loss: 0.16354840993881226, Entropy Loss: 0.7192026376724243
Update: 1966, Loss: -0.00243

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-700.mp4
Update: 1968, Loss: 0.03557153791189194, Policy Loss: -0.043163709342479706, Value Loss: 0.17340968549251556, Entropy Loss: 0.796959400177002
Update: 1969, Loss: 0.06020074710249901, Policy Loss: -0.05018353462219238, Value Loss: 0.23450249433517456, Entropy Loss: 0.686696469783783
Update: 1970, Loss: 0.012493496760725975, Policy Loss: -0.05441233515739441, Value Loss: 0.15180440247058868, Entropy Loss: 0.89963698387146
Update: 1971, Loss: 0.02832116186618805, Policy Loss: -0.059069789946079254, Value Loss: 0.19402822852134705, Entropy Loss: 0.9623162746429443
Update: 1972, Loss: 0.03641330450773239, Policy Loss: -0.0572446808218956, Value Loss: 0.20333242416381836, Entropy Loss: 0.8008227348327637
Update: 1973, Loss: 0.023002494126558304, Policy Loss: -0.06867194920778275, Value Loss: 0.19773361086845398, Entropy Loss: 0.7192361354827881
Update: 1974, Loss: 0.030146826058

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-700.mp4
Update: 1981, Loss: 0.04978681728243828, Policy Loss: -0.015293335542082787, Value Loss: 0.14596709609031677, Entropy Loss: 0.7903392910957336
Update: 1982, Loss: 0.0016378848813474178, Policy Loss: -0.07053443789482117, Value Loss: 0.15934528410434723, Entropy Loss: 0.7500319480895996
Update: 1983, Loss: 0.11459054797887802, Policy Loss: -0.025183044373989105, Value Loss: 0.2942214608192444, Entropy Loss: 0.7337138652801514
Update: 1984, Loss: 0.08453567326068878, Policy Loss: -0.06084435060620308, Value Loss: 0.3065117597579956, Entropy Loss: 0.7875860929489136
Update: 1985, Loss: -0.007736491970717907, Policy Loss: -0.061937782913446426, Value Loss: 0.12146611511707306, Entropy Loss: 0.6531766653060913
Update: 1986, Loss: 0.05509502440690994, Policy Loss: -0.022572815418243408, Value Loss: 0.17284907400608063, Entropy Loss: 0.8756698966026306
Update: 1987, Loss: 0.03132

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-700.mp4
Update: 2007, Loss: 0.005718031898140907, Policy Loss: -0.059375982731580734, Value Loss: 0.14631669223308563, Entropy Loss: 0.8064331412315369
Update: 2008, Loss: 0.050390906631946564, Policy Loss: -0.02874027192592621, Value Loss: 0.172654390335083, Entropy Loss: 0.7196016311645508
Update: 2009, Loss: 0.06423132866621017, Policy Loss: -0.017050759866833687, Value Loss: 0.17857806384563446, Entropy Loss: 0.8006943464279175
Update: 2010, Loss: 0.04314255714416504, Policy Loss: -0.019349779933691025, Value Loss: 0.1394546777009964, Entropy Loss: 0.7235001921653748
Update: 2011, Loss: 0.05271132290363312, Policy Loss: -0.021321002393960953, Value Loss: 0.16677077114582062, Entropy Loss: 0.9353062510490417
Update: 2012, Loss: 0.06723637133836746, Policy Loss: 0.009555894881486893, Value Loss: 0.13467799127101898, Entropy Loss: 0.9658520817756653
Update: 2013, Loss: 0.00893910

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-800.mp4
Update: 2413, Loss: 0.10245665907859802, Policy Loss: 0.027013111859560013, Value Loss: 0.1725122481584549, Entropy Loss: 1.081257939338684
Update: 2414, Loss: 0.03942533954977989, Policy Loss: -0.051411788910627365, Value Loss: 0.19816233217716217, Entropy Loss: 0.8244037628173828
Update: 2415, Loss: 0.07037428021430969, Policy Loss: -0.04492472857236862, Value Loss: 0.24516594409942627, Entropy Loss: 0.7283961176872253
Update: 2416, Loss: 0.013030446134507656, Policy Loss: -0.06215071305632591, Value Loss: 0.16790905594825745, Entropy Loss: 0.8773369193077087
Update: 2417, Loss: 0.02138209342956543, Policy Loss: -0.049931108951568604, Value Loss: 0.15707969665527344, Entropy Loss: 0.7226646542549133
Update: 2418, Loss: 0.08897050470113754, Policy Loss: -0.03293732553720474, Value Loss: 0.2601736783981323, Entropy Loss: 0.817901074886322
Update: 2419, Loss: 0.021489448845

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-800.mp4
Update: 2423, Loss: 0.05833900347352028, Policy Loss: -0.05398239567875862, Value Loss: 0.24396184086799622, Entropy Loss: 0.965952455997467
Update: 2424, Loss: 0.007287538610398769, Policy Loss: -0.0786801427602768, Value Loss: 0.18974941968917847, Entropy Loss: 0.8907028436660767
Update: 2425, Loss: 0.06207853555679321, Policy Loss: -0.03102164715528488, Value Loss: 0.20061218738555908, Entropy Loss: 0.7205910682678223
Update: 2426, Loss: 0.06501317769289017, Policy Loss: -0.038072310388088226, Value Loss: 0.21837680041790009, Entropy Loss: 0.6102914214134216
Update: 2427, Loss: 0.021624770015478134, Policy Loss: -0.05124820023775101, Value Loss: 0.1586061269044876, Entropy Loss: 0.6430093050003052
Update: 2428, Loss: 0.0615132637321949, Policy Loss: -0.019805196672677994, Value Loss: 0.18014177680015564, Entropy Loss: 0.875243067741394
Update: 2429, Loss: -0.00484601594

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-800.mp4
Update: 2446, Loss: 0.029005680233240128, Policy Loss: -0.04728272557258606, Value Loss: 0.17009221017360687, Entropy Loss: 0.875769853591919
Update: 2447, Loss: 0.03411177545785904, Policy Loss: -0.05834750086069107, Value Loss: 0.2040046751499176, Entropy Loss: 0.9543060660362244
Update: 2448, Loss: 0.0008293036371469498, Policy Loss: -0.06830590963363647, Value Loss: 0.15706968307495117, Entropy Loss: 0.9399628043174744
Update: 2449, Loss: 0.025777261704206467, Policy Loss: -0.02905777096748352, Value Loss: 0.12762822210788727, Entropy Loss: 0.8979078531265259
Update: 2450, Loss: -0.0034840423613786697, Policy Loss: -0.06033385917544365, Value Loss: 0.13358622789382935, Entropy Loss: 0.9943296909332275
Update: 2451, Loss: 0.0250588096678257, Policy Loss: -0.04167831689119339, Value Loss: 0.1494605839252472, Entropy Loss: 0.7993165254592896
Update: 2452, Loss: 0.02777867

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-800.mp4
Update: 2461, Loss: 0.08842015266418457, Policy Loss: -0.041992295533418655, Value Loss: 0.27727842330932617, Entropy Loss: 0.8226770758628845
Update: 2462, Loss: 0.02397814765572548, Policy Loss: -0.05617036670446396, Value Loss: 0.17739343643188477, Entropy Loss: 0.8548203110694885
Update: 2463, Loss: 0.04994358867406845, Policy Loss: -0.01794211007654667, Value Loss: 0.15434575080871582, Entropy Loss: 0.928718090057373
Update: 2464, Loss: 0.04357113689184189, Policy Loss: -0.04653787612915039, Value Loss: 0.19852440059185028, Entropy Loss: 0.9153189659118652
Update: 2465, Loss: 0.042269185185432434, Policy Loss: -0.039137355983257294, Value Loss: 0.17708627879619598, Entropy Loss: 0.7136596441268921
Update: 2466, Loss: 0.046210143715143204, Policy Loss: -0.05415092781186104, Value Loss: 0.22033986449241638, Entropy Loss: 0.9808862209320068
Update: 2467, Loss: 0.11609031

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4
Update: 2873, Loss: 0.07572091370820999, Policy Loss: -0.055132731795310974, Value Loss: 0.28133270144462585, Entropy Loss: 0.981270432472229
Update: 2874, Loss: 0.05870401859283447, Policy Loss: -0.04737941920757294, Value Loss: 0.22798973321914673, Entropy Loss: 0.7911427021026611
Update: 2875, Loss: 0.035205285996198654, Policy Loss: -0.05921567231416702, Value Loss: 0.2033272087574005, Entropy Loss: 0.7242645025253296
Update: 2876, Loss: 0.03341486304998398, Policy Loss: -0.041110776364803314, Value Loss: 0.16107243299484253, Entropy Loss: 0.6010576486587524
Update: 2877, Loss: 0.05070137232542038, Policy Loss: -0.03990557789802551, Value Loss: 0.19453229010105133, Entropy Loss: 0.6659196615219116
Update: 2878, Loss: 0.0608566477894783, Policy Loss: -0.032194964587688446, Value Loss: 0.2020118534564972, Entropy Loss: 0.79543137550354
Update: 2879, Loss: 0.0322851948440

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4
Update: 2886, Loss: 0.01788311079144478, Policy Loss: -0.06584864854812622, Value Loss: 0.18599534034729004, Entropy Loss: 0.9265910387039185
Update: 2887, Loss: 0.06391245126724243, Policy Loss: -0.023615097627043724, Value Loss: 0.19028663635253906, Entropy Loss: 0.7615769505500793
Update: 2888, Loss: 0.024329448118805885, Policy Loss: -0.0418267585337162, Value Loss: 0.1500357687473297, Entropy Loss: 0.8861678242683411
Update: 2889, Loss: 0.06597036123275757, Policy Loss: -0.06473919749259949, Value Loss: 0.2798559069633484, Entropy Loss: 0.9218393564224243
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4




Update: 2890, Loss: 0.0643688440322876, Policy Loss: -0.037886619567871094, Value Loss: 0.2220626026391983, Entropy Loss: 0.8775836825370789
Update: 2891, Loss: 0.036582209169864655, Policy Loss: -0.03421686217188835, Value Loss: 0.15984858572483063, Entropy Loss: 0.9125220775604248
Update: 2892, Loss: 0.0941460132598877, Policy Loss: -0.04665542393922806, Value Loss: 0.2971177101135254, Entropy Loss: 0.7757418751716614
Update: 2893, Loss: 0.08996206521987915, Policy Loss: -0.0010772459208965302, Value Loss: 0.19659020006656647, Entropy Loss: 0.7255796194076538
Update: 2894, Loss: 0.050243277102708817, Policy Loss: -0.020131384953856468, Value Loss: 0.15912245213985443, Entropy Loss: 0.9186561107635498
Update: 2895, Loss: 0.03709176182746887, Policy Loss: -0.05236419290304184, Value Loss: 0.19429466128349304, Entropy Loss: 0.7691377401351929
Update: 2896, Loss: 0.048326727002859116, Policy Loss: -0.06599431484937668, Value Loss: 0.24343115091323853, Entropy Loss: 0.7394533157348633
Upd

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-900.mp4
Update: 2911, Loss: 0.021791351959109306, Policy Loss: -0.04105474799871445, Value Loss: 0.1436120718717575, Entropy Loss: 0.8959935903549194
Update: 2912, Loss: 0.03647629916667938, Policy Loss: -0.028940163552761078, Value Loss: 0.14758619666099548, Entropy Loss: 0.8376636505126953
Update: 2913, Loss: 0.035777829587459564, Policy Loss: -0.05460197478532791, Value Loss: 0.19588178396224976, Entropy Loss: 0.7561087012290955
Update: 2914, Loss: 0.055138811469078064, Policy Loss: -0.036270175129175186, Value Loss: 0.19857531785964966, Entropy Loss: 0.7878677845001221
Update: 2915, Loss: 0.008686796762049198, Policy Loss: -0.06251056492328644, Value Loss: 0.15709471702575684, Entropy Loss: 0.7349997162818909
Update: 2916, Loss: 0.06860930472612381, Policy Loss: -0.03935312479734421, Value Loss: 0.23020406067371368, Entropy Loss: 0.7139604091644287
Update: 2917, Loss: 0.037957

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4
Update: 3332, Loss: 0.02877640910446644, Policy Loss: -0.06202008202672005, Value Loss: 0.1970064342021942, Entropy Loss: 0.7706726789474487
Update: 3333, Loss: 0.03390757739543915, Policy Loss: -0.0577111579477787, Value Loss: 0.199492946267128, Entropy Loss: 0.8127740025520325
Update: 3334, Loss: 0.04195944964885712, Policy Loss: -0.03780505806207657, Value Loss: 0.17797452211380005, Entropy Loss: 0.9222752451896667
Update: 3335, Loss: 0.05183958634734154, Policy Loss: -0.0355827622115612, Value Loss: 0.19379910826683044, Entropy Loss: 0.9477205872535706
Update: 3336, Loss: 0.03940363973379135, Policy Loss: -0.05415362864732742, Value Loss: 0.204585999250412, Entropy Loss: 0.8735730051994324
Update: 3337, Loss: 0.0830099880695343, Policy Loss: -0.04377608373761177, Value Loss: 0.27180740237236023, Entropy Loss: 0.9117633700370789
Update: 3338, Loss: 0.04681805893778801,

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4
Update: 3344, Loss: 0.0010917922481894493, Policy Loss: -0.05304710566997528, Value Loss: 0.12472107261419296, Entropy Loss: 0.8221638798713684
Update: 3345, Loss: 0.023906292393803596, Policy Loss: -0.06345000118017197, Value Loss: 0.19112004339694977, Entropy Loss: 0.8203728199005127
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4



                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4
Update: 3346, Loss: 0.017191288992762566, Policy Loss: -0.043335288763046265, Value Loss: 0.1368977129459381, Entropy Loss: 0.7922278642654419
Update: 3347, Loss: 0.05917096138000488, Policy Loss: -0.04559815675020218, Value Loss: 0.22335240244865417, Entropy Loss: 0.6907083988189697
Update: 3348, Loss: 0.027344781905412674, Policy Loss: -0.04931804910302162, Value Loss: 0.16393868625164032, Entropy Loss: 0.5306511521339417
Update: 3349, Loss: 0.04162844270467758, Policy Loss: -0.0430326946079731, Value Loss: 0.18147820234298706, Entropy Loss: 0.6077964305877686
Update: 3350, Loss: 0.06702884286642075, Policy Loss: -0.05602969601750374, Value Loss: 0.26331961154937744, Entropy Loss: 0.860126793384552
Update: 3351, Loss: 0.08714190125465393, Policy Loss: -0.03540544956922531, Value Loss: 0.26254981756210327, Entropy Loss: 0.8727558851242065
Update: 3352, Loss: 0.0718870908

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1000.mp4
Update: 3380, Loss: 0.0825878381729126, Policy Loss: -0.025288065895438194, Value Loss: 0.22961506247520447, Entropy Loss: 0.6931625604629517
Update: 3381, Loss: 0.05405833572149277, Policy Loss: -0.05981262028217316, Value Loss: 0.24601316452026367, Entropy Loss: 0.9135627746582031
Update: 3382, Loss: 0.0837879627943039, Policy Loss: -0.015017250552773476, Value Loss: 0.21340365707874298, Entropy Loss: 0.7896615266799927
Update: 3383, Loss: 0.005436856299638748, Policy Loss: -0.058721303939819336, Value Loss: 0.1444135308265686, Entropy Loss: 0.804860532283783
Update: 3384, Loss: 0.05302119255065918, Policy Loss: -0.04679795354604721, Value Loss: 0.2167055606842041, Entropy Loss: 0.8533632159233093
Update: 3385, Loss: 0.02222614735364914, Policy Loss: -0.05731843411922455, Value Loss: 0.17554493248462677, Entropy Loss: 0.8227885961532593
Update: 3386, Loss: 0.04912798479

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1100.mp4
Update: 3780, Loss: 0.03351005166769028, Policy Loss: -0.059042468667030334, Value Loss: 0.20096659660339355, Entropy Loss: 0.7930778861045837
Update: 3781, Loss: 0.05871758982539177, Policy Loss: -0.004156671464443207, Value Loss: 0.14298002421855927, Entropy Loss: 0.8615751266479492
Update: 3782, Loss: 0.014454180374741554, Policy Loss: -0.050378672778606415, Value Loss: 0.14210297167301178, Entropy Loss: 0.6218633055686951
Update: 3783, Loss: 0.02435469441115856, Policy Loss: -0.04640953987836838, Value Loss: 0.15850041806697845, Entropy Loss: 0.848597526550293
Update: 3784, Loss: 0.0609159991145134, Policy Loss: -0.04613012447953224, Value Loss: 0.22905901074409485, Entropy Loss: 0.7483385801315308
Update: 3785, Loss: 0.06195339933037758, Policy Loss: -0.03797069936990738, Value Loss: 0.2189105600118637, Entropy Loss: 0.9531180262565613
Update: 3786, Loss: 0.023531761

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1100.mp4
Update: 3787, Loss: 0.018417848274111748, Policy Loss: -0.050238724797964096, Value Loss: 0.15761299431324005, Entropy Loss: 1.0149924755096436
Update: 3788, Loss: 0.018347736448049545, Policy Loss: -0.06366205215454102, Value Loss: 0.18066181242465973, Entropy Loss: 0.832111656665802
Update: 3789, Loss: 0.06239575147628784, Policy Loss: -0.0038946233689785004, Value Loss: 0.14719733595848083, Entropy Loss: 0.7308290004730225
Update: 3790, Loss: 0.08218851685523987, Policy Loss: -0.020793933421373367, Value Loss: 0.22517891228199005, Entropy Loss: 0.9607005715370178
Update: 3791, Loss: 0.005879757925868034, Policy Loss: -0.06034481152892113, Value Loss: 0.1518998146057129, Entropy Loss: 0.9725338220596313
Update: 3792, Loss: 0.014560464769601822, Policy Loss: -0.05809396132826805, Value Loss: 0.1617911458015442, Entropy Loss: 0.8241146802902222
Update: 3793, Loss: 0.07302

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1100.mp4
Update: 3807, Loss: 0.01161622442305088, Policy Loss: -0.06877510249614716, Value Loss: 0.1798524260520935, Entropy Loss: 0.9534885883331299
Update: 3808, Loss: 0.02561044506728649, Policy Loss: -0.06689152121543884, Value Loss: 0.20509839057922363, Entropy Loss: 1.0047229528427124
Update: 3809, Loss: 0.06784503906965256, Policy Loss: -0.04994497448205948, Value Loss: 0.25295841693878174, Entropy Loss: 0.8689193725585938
Update: 3810, Loss: 0.08133227378129959, Policy Loss: -0.03250022232532501, Value Loss: 0.2485591322183609, Entropy Loss: 1.0447072982788086
Update: 3811, Loss: 0.04630102962255478, Policy Loss: -0.03011872060596943, Value Loss: 0.1679818481206894, Entropy Loss: 0.7571173906326294
Update: 3812, Loss: 0.09735194593667984, Policy Loss: -0.020358700305223465, Value Loss: 0.24866418540477753, Entropy Loss: 0.6621445417404175
Update: 3813, Loss: 0.063665941357

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1100.mp4
Update: 3835, Loss: 0.042658790946006775, Policy Loss: -0.08628357946872711, Value Loss: 0.2757425010204315, Entropy Loss: 0.8928879499435425
Update: 3836, Loss: 0.045354172587394714, Policy Loss: -0.04304610192775726, Value Loss: 0.19500556588172913, Entropy Loss: 0.9102510213851929
Update: 3837, Loss: 0.05754078924655914, Policy Loss: -0.042303770780563354, Value Loss: 0.21537050604820251, Entropy Loss: 0.784069299697876
Update: 3838, Loss: 0.04512506723403931, Policy Loss: -0.05483344569802284, Value Loss: 0.2169801890850067, Entropy Loss: 0.8531581163406372
Update: 3839, Loss: 0.05184802785515785, Policy Loss: -0.038862444460392, Value Loss: 0.19977743923664093, Entropy Loss: 0.9178247451782227
Update: 3840, Loss: 0.02987678349018097, Policy Loss: -0.03428124636411667, Value Loss: 0.1478734165430069, Entropy Loss: 0.9778679609298706
Update: 3841, Loss: 0.0672514140605

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4
Update: 4226, Loss: 0.04005726799368858, Policy Loss: -0.028606414794921875, Value Loss: 0.1580231934785843, Entropy Loss: 1.034791350364685
Update: 4227, Loss: 0.038614172488451004, Policy Loss: -0.05942352116107941, Value Loss: 0.21206103265285492, Entropy Loss: 0.7992823719978333
Update: 4228, Loss: 0.024885619059205055, Policy Loss: -0.0539531409740448, Value Loss: 0.17552191019058228, Entropy Loss: 0.8922194838523865
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4
Update: 4229, Loss: 0.07001999020576477, Policy Loss: -0.006714651361107826, Value Loss: 0.17073670029640198, Entropy Loss: 0.8633706569671631
Update: 4230, Loss: 0.041289158165454865, Policy Loss: -0.05056571215391159, Value Loss: 0.2008061408996582, Entropy Loss: 0.8548200726509094
Update: 4231, Loss: 0.059048011898994446, Policy Loss: -0.03953801468014717, Value Loss: 0.21334466338157654, Entropy Loss: 0.8086307048797607
Update: 4232, Loss: 0.08056064695119858, Policy Loss: -0.021769287064671516, Value Loss: 0.22254490852355957, Entropy Loss: 0.8942524790763855
Update: 4233, Loss: 0.039297424256801605, Policy Loss: -0.03976863995194435, Value Loss: 0.17421838641166687, Entropy Loss: 0.8043129444122314
Update: 4234, Loss: 0.061208151280879974, Policy Loss: -0.015649527311325073, Value Loss: 0.16804637014865875, Entropy Loss: 0.7165504693984985
Update: 4235, Loss: 0.0929

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4
Update: 4258, Loss: 0.015088217332959175, Policy Loss: -0.06768600642681122, Value Loss: 0.18500278890132904, Entropy Loss: 0.9727170467376709
Update: 4259, Loss: 0.08154630661010742, Policy Loss: -0.057364314794540405, Value Loss: 0.29486140608787537, Entropy Loss: 0.8520078659057617
Update: 4260, Loss: 0.05035081133246422, Policy Loss: -0.028570078313350677, Value Loss: 0.17741455137729645, Entropy Loss: 0.9786385893821716
Update: 4261, Loss: 0.024010200053453445, Policy Loss: -0.04564964771270752, Value Loss: 0.15473723411560059, Entropy Loss: 0.770876944065094
Update: 4262, Loss: 0.07091672718524933, Policy Loss: -0.04214976355433464, Value Loss: 0.2417387068271637, Entropy Loss: 0.7802861928939819
Update: 4263, Loss: 0.02038872055709362, Policy Loss: -0.04486839845776558, Value Loss: 0.14719648659229279, Entropy Loss: 0.8341124057769775
Update: 4264, Loss: 0.02001470

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1200.mp4
Update: 4276, Loss: 0.056716736406087875, Policy Loss: -0.058585166931152344, Value Loss: 0.24614942073822021, Entropy Loss: 0.7772808074951172
Update: 4277, Loss: 0.0586233027279377, Policy Loss: -0.029642581939697266, Value Loss: 0.1935838758945465, Entropy Loss: 0.8526053428649902
Update: 4278, Loss: 0.015253441408276558, Policy Loss: -0.06824252009391785, Value Loss: 0.18314200639724731, Entropy Loss: 0.8075041770935059
Update: 4279, Loss: 0.0034835701808333397, Policy Loss: -0.05330833047628403, Value Loss: 0.13062173128128052, Entropy Loss: 0.8518965244293213
Update: 4280, Loss: 0.013974128291010857, Policy Loss: -0.06741610169410706, Value Loss: 0.1789340078830719, Entropy Loss: 0.8076773881912231
Update: 4281, Loss: 0.029918082058429718, Policy Loss: -0.05533779785037041, Value Loss: 0.18472135066986084, Entropy Loss: 0.7104794979095459
Update: 4282, Loss: 0.04933

                                                                 

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4
Update: 4676, Loss: 0.06254150718450546, Policy Loss: -0.055461324751377106, Value Loss: 0.25618481636047363, Entropy Loss: 1.0089577436447144
Update: 4677, Loss: 0.11797260493040085, Policy Loss: 0.03023786097764969, Value Loss: 0.1874188482761383, Entropy Loss: 0.5974679589271545
Update: 4678, Loss: 0.0399114154279232, Policy Loss: -0.05064314603805542, Value Loss: 0.19444908201694489, Entropy Loss: 0.6669979095458984
Update: 4679, Loss: 0.09589971601963043, Policy Loss: -0.04360327124595642, Value Loss: 0.2919365465641022, Entropy Loss: 0.6465288996696472
Update: 4680, Loss: 0.07770780473947525, Policy Loss: -0.025498954579234123, Value Loss: 0.22430233657360077, Entropy Loss: 0.8944408297538757
Update: 4681, Loss: 0.03721575438976288, Policy Loss: -0.04032585024833679, Value Loss: 0.17164218425750732, Entropy Loss: 0.8279489874839783
Update: 4682, Loss: 0.040747441351

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4
Update: 4690, Loss: 0.09168105572462082, Policy Loss: -0.04847182333469391, Value Loss: 0.29908740520477295, Entropy Loss: 0.9390825033187866
Update: 4691, Loss: 0.07038940489292145, Policy Loss: -0.04347151145339012, Value Loss: 0.24163037538528442, Entropy Loss: 0.6954270601272583
Update: 4692, Loss: 0.06507881730794907, Policy Loss: -0.020158719271421432, Value Loss: 0.18651986122131348, Entropy Loss: 0.8022388219833374
Update: 4693, Loss: 0.06981667131185532, Policy Loss: -0.045816875994205475, Value Loss: 0.24629828333854675, Entropy Loss: 0.7515592575073242
Update: 4694, Loss: 0.03742697462439537, Policy Loss: -0.04771607741713524, Value Loss: 0.18695877492427826, Entropy Loss: 0.833633542060852
Update: 4695, Loss: 0.031451765447854996, Policy Loss: -0.05833122879266739, Value Loss: 0.19669336080551147, Entropy Loss: 0.8563687205314636
Update: 4696, Loss: 0.05028571

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4
Update: 4727, Loss: 0.04754618927836418, Policy Loss: -0.041219279170036316, Value Loss: 0.19308845698833466, Entropy Loss: 0.7778759002685547
Update: 4728, Loss: -0.0036926642060279846, Policy Loss: -0.059132277965545654, Value Loss: 0.12702322006225586, Entropy Loss: 0.8071996569633484
Update: 4729, Loss: 0.07816436886787415, Policy Loss: -0.03813651204109192, Value Loss: 0.24654021859169006, Entropy Loss: 0.6969228982925415
Update: 4730, Loss: 0.07808085530996323, Policy Loss: -0.024134643375873566, Value Loss: 0.21939748525619507, Entropy Loss: 0.748324453830719
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1300.mp4
Update: 4731, Loss: 0.04594184458255768, Policy Loss: -0.046512044966220856, Value Loss: 0.20408600568771362, Entropy Loss: 0.9589114189147949
Update: 4732, Loss: 0.05423365905880928, Policy Loss: -0.05377370864152908, Value Loss: 0.23509687185287476, Entropy Loss: 0.9541068077087402
Update: 4733, Loss: 0.0750911608338356, Policy Loss: -0.02413829229772091, Value Loss: 0.2130357176065445, Entropy Loss: 0.7288400530815125
Update: 4734, Loss: 0.03570561110973358, Policy Loss: -0.055859122425317764, Value Loss: 0.20189668238162994, Entropy Loss: 0.9383606314659119
Update: 4735, Loss: 0.06891603022813797, Policy Loss: -0.03156948834657669, Value Loss: 0.214863121509552, Entropy Loss: 0.6946039199829102
Update: 4736, Loss: 0.016269823536276817, Policy Loss: -0.057648129761219025, Value Loss: 0.16760492324829102, Entropy Loss: 0.9884508848190308
Update: 4737, Loss: 0.0294470936

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4
Update: 5134, Loss: 0.05381878465414047, Policy Loss: -0.05173702910542488, Value Loss: 0.22742445766925812, Entropy Loss: 0.815641462802887
Update: 5135, Loss: 0.026507992297410965, Policy Loss: -0.046172358095645905, Value Loss: 0.15975253283977509, Entropy Loss: 0.7195916771888733
Update: 5136, Loss: 0.01504014153033495, Policy Loss: -0.05142078548669815, Value Loss: 0.14898550510406494, Entropy Loss: 0.8031825423240662
Update: 5137, Loss: -0.01755528151988983, Policy Loss: -0.058687686920166016, Value Loss: 0.1037343442440033, Entropy Loss: 1.0734766721725464
Update: 5138, Loss: 0.05149715393781662, Policy Loss: -0.03971027582883835, Value Loss: 0.19960254430770874, Entropy Loss: 0.8593843579292297
Update: 5139, Loss: 0.03962527960538864, Policy Loss: -0.03996436297893524, Value Loss: 0.17979556322097778, Entropy Loss: 1.0308139324188232
Update: 5140, Loss: 0.05256688

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4
Update: 5152, Loss: 0.09372351318597794, Policy Loss: -0.013347802683711052, Value Loss: 0.23067057132720947, Entropy Loss: 0.8263970017433167
Update: 5153, Loss: 0.04521225392818451, Policy Loss: -0.05416685715317726, Value Loss: 0.21744734048843384, Entropy Loss: 0.9344557523727417
Update: 5154, Loss: 0.08102738112211227, Policy Loss: -0.019407406449317932, Value Loss: 0.21438896656036377, Entropy Loss: 0.6759697198867798
Update: 5155, Loss: 0.03475932031869888, Policy Loss: -0.06382173299789429, Value Loss: 0.21476015448570251, Entropy Loss: 0.879902184009552
Update: 5156, Loss: 0.06868075579404831, Policy Loss: -0.04383005574345589, Value Loss: 0.24437376856803894, Entropy Loss: 0.9676079750061035
Update: 5157, Loss: 0.05857153236865997, Policy Loss: -0.032595373690128326, Value Loss: 0.2022991180419922, Entropy Loss: 0.9982651472091675
Update: 5158, Loss: 0.044829834

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4
Update: 5176, Loss: 0.05391022190451622, Policy Loss: -0.0347471721470356, Value Loss: 0.19626674056053162, Entropy Loss: 0.9475979804992676
Update: 5177, Loss: 0.033601365983486176, Policy Loss: -0.05898766592144966, Value Loss: 0.20053815841674805, Entropy Loss: 0.7680046558380127
Update: 5178, Loss: 0.06704079359769821, Policy Loss: -0.028399020433425903, Value Loss: 0.20869438350200653, Entropy Loss: 0.890737771987915
Update: 5179, Loss: 0.036415815353393555, Policy Loss: -0.060532402247190475, Value Loss: 0.21200667321681976, Entropy Loss: 0.9055118560791016
Moviepy - Building video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4.
Moviepy - Writing video /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1400.mp4
Update: 5180, Loss: 0.042447663843631744, Policy Loss: -0.031326938420534134, Value Loss: 0.16371522843837738, Entropy Loss: 0.8083013892173767
Update: 5181, Loss: 0.0215986967086792, Policy Loss: -0.056581877171993256, Value Loss: 0.1741989701986313, Entropy Loss: 0.8918910622596741
Update: 5182, Loss: 0.060776859521865845, Policy Loss: -0.04732649400830269, Value Loss: 0.232987642288208, Entropy Loss: 0.8390473127365112
Update: 5183, Loss: 0.03252986818552017, Policy Loss: -0.033629823476076126, Value Loss: 0.1516207456588745, Entropy Loss: 0.9650683403015137
Update: 5184, Loss: 0.05070209503173828, Policy Loss: -0.03366190940141678, Value Loss: 0.18333713710308075, Entropy Loss: 0.7304562926292419
Update: 5185, Loss: -0.009888975881040096, Policy Loss: -0.07936571538448334, Value Loss: 0.15689650177955627, Entropy Loss: 0.8971511125564575
Update: 5186, Loss: 0.00597023

                                                                  

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1500.mp4
Update: 5563, Loss: 0.04626983776688576, Policy Loss: -0.04879593104124069, Value Loss: 0.20626376569271088, Entropy Loss: 0.8066115379333496
Update: 5564, Loss: 0.031110815703868866, Policy Loss: -0.04330543801188469, Value Loss: 0.1682109385728836, Entropy Loss: 0.9689215421676636
Update: 5565, Loss: -0.009725840762257576, Policy Loss: -0.06219106912612915, Value Loss: 0.12419205904006958, Entropy Loss: 0.9630801677703857
Update: 5566, Loss: 0.0430082343518734, Policy Loss: -0.04257245734333992, Value Loss: 0.18780872225761414, Entropy Loss: 0.8323668241500854
Update: 5567, Loss: 0.02970053255558014, Policy Loss: -0.06306294351816177, Value Loss: 0.20457734167575836, Entropy Loss: 0.9525195956230164
Update: 5568, Loss: 0.07001492381095886, Policy Loss: -0.042233556509017944, Value Loss: 0.24183893203735352, Entropy Loss: 0.8670985698699951
Update: 5569, Loss: 0.02508478

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1500.mp4
Update: 5604, Loss: 0.04479938745498657, Policy Loss: -0.034528397023677826, Value Loss: 0.1779802292585373, Entropy Loss: 0.9662330746650696
Update: 5605, Loss: 0.057347144931554794, Policy Loss: -0.01525186374783516, Value Loss: 0.16352255642414093, Entropy Loss: 0.9162265062332153
Update: 5606, Loss: 0.03896605595946312, Policy Loss: -0.03842464089393616, Value Loss: 0.17496357858181, Entropy Loss: 1.009109377861023
Update: 5607, Loss: -0.00916019082069397, Policy Loss: -0.06871302425861359, Value Loss: 0.13966843485832214, Entropy Loss: 1.0281383991241455
Update: 5608, Loss: 0.040107157081365585, Policy Loss: -0.028781305998563766, Value Loss: 0.15484127402305603, Entropy Loss: 0.8532174825668335
Update: 5609, Loss: 0.024790074676275253, Policy Loss: -0.043670304119586945, Value Loss: 0.15423841774463654, Entropy Loss: 0.8658831119537354
Update: 5610, Loss: 0.06068956

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1500.mp4
Update: 5611, Loss: -0.004174687899649143, Policy Loss: -0.0708690732717514, Value Loss: 0.15506738424301147, Entropy Loss: 1.0839307308197021
Update: 5612, Loss: 0.039464469999074936, Policy Loss: -0.03922436758875847, Value Loss: 0.17188149690628052, Entropy Loss: 0.7251911759376526
Update: 5613, Loss: 0.05092056468129158, Policy Loss: -0.021028678864240646, Value Loss: 0.1599859744310379, Entropy Loss: 0.8043743371963501
Update: 5614, Loss: 0.03616650775074959, Policy Loss: -0.027344856411218643, Value Loss: 0.1464405655860901, Entropy Loss: 0.9708918929100037
Update: 5615, Loss: 0.06650661677122116, Policy Loss: -0.03836720436811447, Value Loss: 0.22651927173137665, Entropy Loss: 0.838581383228302
Update: 5616, Loss: 0.0707274079322815, Policy Loss: -0.031029976904392242, Value Loss: 0.21791279315948486, Entropy Loss: 0.7199011445045471
Update: 5617, Loss: 0.073612362

                                                                   

Moviepy - Done !
Moviepy - video ready /home/ananya-acharya/Projects/rl_atari/videos/Pong/rl-video-episode-1500.mp4
Update: 5624, Loss: 0.022846907377243042, Policy Loss: -0.05159773677587509, Value Loss: 0.16705076396465302, Entropy Loss: 0.908073902130127
Update: 5625, Loss: -0.0008414657786488533, Policy Loss: -0.06595903635025024, Value Loss: 0.14827775955200195, Entropy Loss: 0.9021309018135071
Update: 5626, Loss: 0.033091865479946136, Policy Loss: -0.027885261923074722, Value Loss: 0.13998378813266754, Entropy Loss: 0.9014768600463867
Update: 5627, Loss: 0.06753508001565933, Policy Loss: -0.042685896158218384, Value Loss: 0.23839956521987915, Entropy Loss: 0.8978806734085083
Update: 5628, Loss: 0.029971487820148468, Policy Loss: -0.050362445414066315, Value Loss: 0.17559611797332764, Entropy Loss: 0.7464125752449036
Update: 5629, Loss: 0.07940559089183807, Policy Loss: 0.0023373886942863464, Value Loss: 0.17211896181106567, Entropy Loss: 0.8991276025772095
Update: 5630, Loss: 0.0

In [41]:
envs.close()