### ENVIRONMENT

In [7]:
"""
This is a gym environment based for the drone_game
The goal is to reach randomly positoned targets
"""

import os
from math import sin, cos, pi, sqrt
from random import randrange

import numpy as np
import gym
from gym import spaces

import pygame
from pygame.locals import *


class droneEnv(gym.Env):
    def __init__(self, render_every_frame, mouse_target):
        super(droneEnv, self).__init__()

        self.render_every_frame = render_every_frame
        # Makes the target follow the mouse
        self.mouse_target = mouse_target

        # Initialize Pygame, load sprites
        pygame.init()
        self.screen = pygame.display.set_mode((800, 800))
        self.FramePerSec = pygame.time.Clock()

        self.player = pygame.image.load(os.path.join("assets/sprites/drone_old.png"))
        self.player.convert()

        self.target = pygame.image.load(os.path.join("assets/sprites/target_old.png"))
        self.target.convert()

        pygame.font.init()
        self.myfont = pygame.font.SysFont("Comic Sans MS", 20)

        # Physics constants
        self.FPS = 60
        self.gravity = 0.08
        self.thruster_amplitude = 0.04
        self.diff_amplitude = 0.003
        self.thruster_mean = 0.04
        self.mass = 1
        self.arm = 25

        # Initialize variables
        (self.a, self.ad, self.add) = (0, 0, 0)
        (self.x, self.xd, self.xdd) = (400, 0, 0)
        (self.y, self.yd, self.ydd) = (400, 0, 0)
        self.xt = randrange(200, 600)
        self.yt = randrange(200, 600)

        # Initialize game variables
        self.target_counter = 0
        self.reward = 0
        self.time = 0
        self.time_limit = 20
        if self.mouse_target is True:
            self.time_limit = 1000

        # 2 action thrust amplitude and thrust difference in float values between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
        # 6 observations: angle_to_up, velocity, angle_velocity, distance_to_target, angle_to_target, angle_target_and_velocity
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,))

    def reset(self):
        # Reset variables
        (self.a, self.ad, self.add) = (0, 0, 0)
        (self.x, self.xd, self.xdd) = (400, 0, 0)
        (self.y, self.yd, self.ydd) = (400, 0, 0)
        self.xt = randrange(200, 600)
        self.yt = randrange(200, 600)

        self.target_counter = 0
        self.reward = 0
        self.time = 0

        return self.get_obs()

    def get_obs(self) -> np.ndarray:
        """
        Calculates the observations

        Returns:
            np.ndarray: The normalized observations:
            - angle_to_up : angle between the drone and the up vector (to observe gravity)
            - velocity : velocity of the drone
            - angle_velocity : angle of the velocity vector
            - distance_to_target : distance to the target
            - angle_to_target : angle between the drone and the target
            - angle_target_and_velocity : angle between the to_target vector and the velocity vector
        """
        angle_to_up = self.a / 180 * pi
        velocity = sqrt(self.xd**2 + self.yd**2)
        angle_velocity = self.ad
        distance_to_target = (
            sqrt((self.xt - self.x) ** 2 + (self.yt - self.y) ** 2) / 500
        )
        angle_to_target = np.arctan2(self.yt - self.y, self.xt - self.x)
        # Angle between the to_target vector and the velocity vector
        angle_target_and_velocity = np.arctan2(
            self.yt - self.y, self.xt - self.x
        ) - np.arctan2(self.yd, self.xd)
        distance_to_target = (
            sqrt((self.xt - self.x) ** 2 + (self.yt - self.y) ** 2) / 500
        )
        return np.array(
            [
                angle_to_up,
                velocity,
                angle_velocity,
                distance_to_target,
                angle_to_target,
                angle_target_and_velocity,
            ]
        ).astype(np.float32)

    def step(self, action):
        # Game loop
        self.reward = 0.0
        (action0, action1) = (action[0], action[1])

        # Act every 5 frames
        for _ in range(5):
            self.time += 1 / 60

            if self.mouse_target is True:
                self.xt, self.yt = pygame.mouse.get_pos()

            # Initialize accelerations
            self.xdd = 0
            self.ydd = self.gravity
            self.add = 0
            thruster_left = self.thruster_mean
            thruster_right = self.thruster_mean

            thruster_left += action0 * self.thruster_amplitude
            thruster_right += action0 * self.thruster_amplitude
            thruster_left += action1 * self.diff_amplitude
            thruster_right -= action1 * self.diff_amplitude

            # Calculating accelerations with Newton's laws of motions
            self.xdd += (
                -(thruster_left + thruster_right) * sin(self.a * pi / 180) / self.mass
            )
            self.ydd += (
                -(thruster_left + thruster_right) * cos(self.a * pi / 180) / self.mass
            )
            self.add += self.arm * (thruster_right - thruster_left) / self.mass

            self.xd += self.xdd
            self.yd += self.ydd
            self.ad += self.add
            self.x += self.xd
            self.y += self.yd
            self.a += self.ad

            dist = sqrt((self.x - self.xt) ** 2 + (self.y - self.yt) ** 2)

            # Reward per step survived
            self.reward += 1 / 60
            # Penalty according to the distance to target
            self.reward -= dist / (100 * 60)

            if dist < 50:
                # Reward if close to target
                self.xt = randrange(200, 600)
                self.yt = randrange(200, 600)
                self.reward += 100
                
                
                #
                self.target_counter+=1

            # If out of time
            if self.time > self.time_limit:
                done = True
                break

            # If too far from target (crash)
            elif dist > 1000:
                self.reward -= 1000
                done = True
                break

            else:
                done = False

            if self.render_every_frame is True:
                self.render("yes")

        info = {}

        return (
            self.get_obs(),
            self.reward,
            done,
            info,
        )

    def render(self, mode):
        # Pygame rendering
        pygame.event.get()
        self.screen.fill(0)
        self.screen.blit(
            self.target,
            (
                self.xt - int(self.target.get_width() / 2),
                self.yt - int(self.target.get_height() / 2),
            ),
        )
        player_copy = pygame.transform.rotate(self.player, self.a)
        self.screen.blit(
            player_copy,
            (
                self.x - int(player_copy.get_width() / 2),
                self.y - int(player_copy.get_height() / 2),
            ),
        )

        textsurface = self.myfont.render(
            "Collected: " + str(self.target_counter), False, (255, 255, 255)
        )
        self.screen.blit(textsurface, (20, 20))
        textsurface3 = self.myfont.render(
            "Time: " + str(int(self.time)), False, (255, 255, 255)
        )
        self.screen.blit(textsurface3, (20, 50))

        pygame.display.update()
        self.FramePerSec.tick(self.FPS)

    def close(self):
        pass


## TEST ENVIRONMENT WORKING

In [2]:
env = droneEnv(True, False)

obs = env.reset()
env.render("yes")

print("Observation space:")
print(env.observation_space)
print("")
print("Action space:")
print(env.action_space)
print("")
print("Action space sample:")
print(env.action_space.sample())

# Choose an action to execute n_steps times
action = np.array([0.0, 1.0])
n_steps = 10
for step in range(n_steps):
    print("Step {}".format(step + 1))
    obs, reward, done, info = env.step(action)
    print("obs=", obs, "reward=", reward, "done=", done)
    env.render("yes")
    if done:
        print("Done!", "reward=", reward)
        break


Observation space:
Box([-inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf], (6,), float32)

Action space:
Box([-1. -1.], [1. 1.], (2,), float32)

Action space sample:
[ 0.39743373 -0.84471196]
Step 1
obs= [-0.03926991  0.00418868 -0.75        0.12176462  2.4027543   2.3931987 ] reward= 0.03260112590763964 done= False
Step 2
obs= [-0.14398967  0.03454228 -1.5         0.12190311  2.4038532   2.3648481 ] reward= 0.03257146751646054 done= False
Step 3
obs= [-0.31415927  0.1170206  -2.25        0.12244841  2.4085126   2.3204532 ] reward= 0.03242365841406434 done= False
Step 4
obs= [-0.5497787   0.27655566 -3.          0.12379135  2.421157    2.2645454 ] reward= 0.032011134430318694 done= False
Step 5
obs= [-0.850848    0.53502405 -3.75        0.12635559  2.4480414   2.2036622 ] reward= 0.031160051441842983 done= False
Step 6
obs= [-1.2173672   0.907548   -4.5         0.13052404  2.4964764   2.1457124 ] reward= 0.029702167775983026 done= False
Step 7
obs= [-1.6493361  1.3969848 -5.25 

## TRAIN SAC AGENT

In [6]:
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
import wandb
from wandb.integration.sb3 import WandbCallback


run = wandb.init(
    project="CSP",
    sync_tensorboard=True,
    monitor_gym=True,
)

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

# Create and wrap the environment
env = droneEnv(False, False)
env = Monitor(env, log_dir)

# Create SAC agent
model = SAC("MlpPolicy", env, verbose=1, tensorboard_log=log_dir)

training_steps = 1_000_000


# Create checkpoint callback
checkpoint_callback = CheckpointCallback(
    save_freq=training_steps, save_path=log_dir, name_prefix="sac_model_v5"
)

# Train the agent
model.learn(
    total_timesteps=training_steps,
    callback=[
        checkpoint_callback,
        WandbCallback(
            gradient_save_freq=training_steps,
            model_save_path=f"models/{run.id}",
            model_save_freq=training_steps,
            verbose=2,
        ),
    ],
)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…



Using cpu device
Wrapping the env in a DummyVecEnv.
Logging to tmp/SAC_4
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 41.8      |
|    ep_rew_mean     | -1.01e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 145       |
|    time_elapsed    | 1         |
|    total_timesteps | 167       |
| train/             |           |
|    actor_loss      | 13.2      |
|    critic_loss     | 1.48e+04  |
|    ent_coef        | 0.981     |
|    ent_coef_loss   | -0.0654   |
|    learning_rate   | 0.0003    |
|    n_updates       | 66        |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 39.5      |
|    ep_rew_mean     | -1.01e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 90        |
|    time_elapsed    | 3         |
|    total_timesteps | 316       |
| train/         

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.7     |
|    ep_rew_mean     | -994     |
| time/              |          |
|    episodes        | 60       |
|    fps             | 59       |
|    time_elapsed    | 35       |
|    total_timesteps | 2140     |
| train/             |          |
|    actor_loss      | 233      |
|    critic_loss     | 2.52e+03 |
|    ent_coef        | 0.888    |
|    ent_coef_loss   | -0.00835 |
|    learning_rate   | 0.0003   |
|    n_updates       | 2039     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.3     |
|    ep_rew_mean     | -995     |
| time/              |          |
|    episodes        | 64       |
|    fps             | 59       |
|    time_elapsed    | 38       |
|    total_timesteps | 2260     |
| train/             |          |
|    actor_loss      | 257      |
|    critic_loss     | 1.32e+03 |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34       |
|    ep_rew_mean     | -996     |
| time/              |          |
|    episodes        | 120      |
|    fps             | 56       |
|    time_elapsed    | 73       |
|    total_timesteps | 4175     |
| train/             |          |
|    actor_loss      | 376      |
|    critic_loss     | 757      |
|    ent_coef        | 0.938    |
|    ent_coef_loss   | -0.0431  |
|    learning_rate   | 0.0003   |
|    n_updates       | 4074     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 33.9     |
|    ep_rew_mean     | -995     |
| time/              |          |
|    episodes        | 124      |
|    fps             | 56       |
|    time_elapsed    | 76       |
|    total_timesteps | 4313     |
| train/             |          |
|    actor_loss      | 421      |
|    critic_loss     | 1.66e+03 |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 42.5     |
|    ep_rew_mean     | -983     |
| time/              |          |
|    episodes        | 180      |
|    fps             | 55       |
|    time_elapsed    | 127      |
|    total_timesteps | 7043     |
| train/             |          |
|    actor_loss      | 561      |
|    critic_loss     | 511      |
|    ent_coef        | 0.51     |
|    ent_coef_loss   | 0.08     |
|    learning_rate   | 0.0003   |
|    n_updates       | 6942     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 44.4     |
|    ep_rew_mean     | -983     |
| time/              |          |
|    episodes        | 184      |
|    fps             | 55       |
|    time_elapsed    | 133      |
|    total_timesteps | 7376     |
| train/             |          |
|    actor_loss      | 575      |
|    critic_loss     | 904      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 69       |
|    ep_rew_mean     | -973     |
| time/              |          |
|    episodes        | 240      |
|    fps             | 54       |
|    time_elapsed    | 218      |
|    total_timesteps | 11849    |
| train/             |          |
|    actor_loss      | 651      |
|    critic_loss     | 675      |
|    ent_coef        | 0.397    |
|    ent_coef_loss   | -0.121   |
|    learning_rate   | 0.0003   |
|    n_updates       | 11748    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 70.4     |
|    ep_rew_mean     | -972     |
| time/              |          |
|    episodes        | 244      |
|    fps             | 54       |
|    time_elapsed    | 224      |
|    total_timesteps | 12173    |
| train/             |          |
|    actor_loss      | 617      |
|    critic_loss     | 292      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 139      |
|    ep_rew_mean     | -781     |
| time/              |          |
|    episodes        | 300      |
|    fps             | 54       |
|    time_elapsed    | 414      |
|    total_timesteps | 22398    |
| train/             |          |
|    actor_loss      | 513      |
|    critic_loss     | 105      |
|    ent_coef        | 0.424    |
|    ent_coef_loss   | -0.103   |
|    learning_rate   | 0.0003   |
|    n_updates       | 22297    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 145      |
|    ep_rew_mean     | -743     |
| time/              |          |
|    episodes        | 304      |
|    fps             | 54       |
|    time_elapsed    | 431      |
|    total_timesteps | 23358    |
| train/             |          |
|    actor_loss      | 506      |
|    critic_loss     | 273      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 227      |
|    ep_rew_mean     | -257     |
| time/              |          |
|    episodes        | 360      |
|    fps             | 54       |
|    time_elapsed    | 676      |
|    total_timesteps | 36640    |
| train/             |          |
|    actor_loss      | 334      |
|    critic_loss     | 2.38e+03 |
|    ent_coef        | 0.406    |
|    ent_coef_loss   | 0.146    |
|    learning_rate   | 0.0003   |
|    n_updates       | 36539    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 230      |
|    ep_rew_mean     | -217     |
| time/              |          |
|    episodes        | 364      |
|    fps             | 54       |
|    time_elapsed    | 693      |
|    total_timesteps | 37600    |
| train/             |          |
|    actor_loss      | 299      |
|    critic_loss     | 81.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 239      |
|    ep_rew_mean     | -57.7    |
| time/              |          |
|    episodes        | 420      |
|    fps             | 54       |
|    time_elapsed    | 942      |
|    total_timesteps | 51040    |
| train/             |          |
|    actor_loss      | 246      |
|    critic_loss     | 266      |
|    ent_coef        | 0.297    |
|    ent_coef_loss   | 0.239    |
|    learning_rate   | 0.0003   |
|    n_updates       | 50939    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | -41.3    |
| time/              |          |
|    episodes        | 424      |
|    fps             | 54       |
|    time_elapsed    | 959      |
|    total_timesteps | 52000    |
| train/             |          |
|    actor_loss      | 230      |
|    critic_loss     | 53.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | -37.8    |
| time/              |          |
|    episodes        | 480      |
|    fps             | 53       |
|    time_elapsed    | 1218     |
|    total_timesteps | 65440    |
| train/             |          |
|    actor_loss      | 150      |
|    critic_loss     | 1.4e+03  |
|    ent_coef        | 0.237    |
|    ent_coef_loss   | 0.0653   |
|    learning_rate   | 0.0003   |
|    n_updates       | 65339    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | -38.6    |
| time/              |          |
|    episodes        | 484      |
|    fps             | 53       |
|    time_elapsed    | 1237     |
|    total_timesteps | 66400    |
| train/             |          |
|    actor_loss      | 139      |
|    critic_loss     | 47.2     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | -20.8    |
| time/              |          |
|    episodes        | 540      |
|    fps             | 52       |
|    time_elapsed    | 1514     |
|    total_timesteps | 79840    |
| train/             |          |
|    actor_loss      | 95.3     |
|    critic_loss     | 271      |
|    ent_coef        | 0.197    |
|    ent_coef_loss   | -0.0415  |
|    learning_rate   | 0.0003   |
|    n_updates       | 79739    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | -18.4    |
| time/              |          |
|    episodes        | 544      |
|    fps             | 52       |
|    time_elapsed    | 1535     |
|    total_timesteps | 80800    |
| train/             |          |
|    actor_loss      | 112      |
|    critic_loss     | 95.4     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 239      |
|    ep_rew_mean     | 30.8     |
| time/              |          |
|    episodes        | 600      |
|    fps             | 51       |
|    time_elapsed    | 1823     |
|    total_timesteps | 94153    |
| train/             |          |
|    actor_loss      | 55.8     |
|    critic_loss     | 95.7     |
|    ent_coef        | 0.199    |
|    ent_coef_loss   | -0.285   |
|    learning_rate   | 0.0003   |
|    n_updates       | 94052    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 239      |
|    ep_rew_mean     | 29.1     |
| time/              |          |
|    episodes        | 604      |
|    fps             | 51       |
|    time_elapsed    | 1842     |
|    total_timesteps | 95111    |
| train/             |          |
|    actor_loss      | 108      |
|    critic_loss     | 204      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 239      |
|    ep_rew_mean     | 205      |
| time/              |          |
|    episodes        | 660      |
|    fps             | 51       |
|    time_elapsed    | 2113     |
|    total_timesteps | 108551   |
| train/             |          |
|    actor_loss      | 78       |
|    critic_loss     | 68.6     |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | -0.091   |
|    learning_rate   | 0.0003   |
|    n_updates       | 108450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 239      |
|    ep_rew_mean     | 217      |
| time/              |          |
|    episodes        | 664      |
|    fps             | 51       |
|    time_elapsed    | 2131     |
|    total_timesteps | 109511   |
| train/             |          |
|    actor_loss      | 69.5     |
|    critic_loss     | 144      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 398      |
| time/              |          |
|    episodes        | 720      |
|    fps             | 51       |
|    time_elapsed    | 2384     |
|    total_timesteps | 122951   |
| train/             |          |
|    actor_loss      | 51.9     |
|    critic_loss     | 113      |
|    ent_coef        | 0.174    |
|    ent_coef_loss   | -0.0692  |
|    learning_rate   | 0.0003   |
|    n_updates       | 122850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 404      |
| time/              |          |
|    episodes        | 724      |
|    fps             | 51       |
|    time_elapsed    | 2403     |
|    total_timesteps | 123911   |
| train/             |          |
|    actor_loss      | 41.7     |
|    critic_loss     | 272      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 401      |
| time/              |          |
|    episodes        | 780      |
|    fps             | 51       |
|    time_elapsed    | 2657     |
|    total_timesteps | 137351   |
| train/             |          |
|    actor_loss      | 13.1     |
|    critic_loss     | 97.2     |
|    ent_coef        | 0.164    |
|    ent_coef_loss   | 0.0129   |
|    learning_rate   | 0.0003   |
|    n_updates       | 137250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 417      |
| time/              |          |
|    episodes        | 784      |
|    fps             | 51       |
|    time_elapsed    | 2675     |
|    total_timesteps | 138311   |
| train/             |          |
|    actor_loss      | -11.7    |
|    critic_loss     | 69.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 476      |
| time/              |          |
|    episodes        | 840      |
|    fps             | 51       |
|    time_elapsed    | 2928     |
|    total_timesteps | 151751   |
| train/             |          |
|    actor_loss      | -13.8    |
|    critic_loss     | 46.5     |
|    ent_coef        | 0.158    |
|    ent_coef_loss   | 0.23     |
|    learning_rate   | 0.0003   |
|    n_updates       | 151650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 487      |
| time/              |          |
|    episodes        | 844      |
|    fps             | 51       |
|    time_elapsed    | 2946     |
|    total_timesteps | 152711   |
| train/             |          |
|    actor_loss      | -34.7    |
|    critic_loss     | 74.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 538      |
| time/              |          |
|    episodes        | 900      |
|    fps             | 51       |
|    time_elapsed    | 3199     |
|    total_timesteps | 166151   |
| train/             |          |
|    actor_loss      | -35.6    |
|    critic_loss     | 78.4     |
|    ent_coef        | 0.153    |
|    ent_coef_loss   | 0.353    |
|    learning_rate   | 0.0003   |
|    n_updates       | 166050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 535      |
| time/              |          |
|    episodes        | 904      |
|    fps             | 51       |
|    time_elapsed    | 3217     |
|    total_timesteps | 167111   |
| train/             |          |
|    actor_loss      | -42.3    |
|    critic_loss     | 149      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 523      |
| time/              |          |
|    episodes        | 960      |
|    fps             | 51       |
|    time_elapsed    | 3490     |
|    total_timesteps | 180551   |
| train/             |          |
|    actor_loss      | -23.6    |
|    critic_loss     | 70.9     |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | -0.255   |
|    learning_rate   | 0.0003   |
|    n_updates       | 180450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 520      |
| time/              |          |
|    episodes        | 964      |
|    fps             | 51       |
|    time_elapsed    | 3510     |
|    total_timesteps | 181511   |
| train/             |          |
|    actor_loss      | -49.9    |
|    critic_loss     | 84.7     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 542      |
| time/              |          |
|    episodes        | 1020     |
|    fps             | 51       |
|    time_elapsed    | 3801     |
|    total_timesteps | 194951   |
| train/             |          |
|    actor_loss      | -55.7    |
|    critic_loss     | 139      |
|    ent_coef        | 0.16     |
|    ent_coef_loss   | -0.166   |
|    learning_rate   | 0.0003   |
|    n_updates       | 194850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 541      |
| time/              |          |
|    episodes        | 1024     |
|    fps             | 51       |
|    time_elapsed    | 3823     |
|    total_timesteps | 195911   |
| train/             |          |
|    actor_loss      | -62.9    |
|    critic_loss     | 87.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 576      |
| time/              |          |
|    episodes        | 1080     |
|    fps             | 50       |
|    time_elapsed    | 4112     |
|    total_timesteps | 209351   |
| train/             |          |
|    actor_loss      | -74.6    |
|    critic_loss     | 153      |
|    ent_coef        | 0.155    |
|    ent_coef_loss   | -0.197   |
|    learning_rate   | 0.0003   |
|    n_updates       | 209250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 576      |
| time/              |          |
|    episodes        | 1084     |
|    fps             | 50       |
|    time_elapsed    | 4132     |
|    total_timesteps | 210311   |
| train/             |          |
|    actor_loss      | -56.1    |
|    critic_loss     | 89.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 628      |
| time/              |          |
|    episodes        | 1140     |
|    fps             | 50       |
|    time_elapsed    | 4388     |
|    total_timesteps | 223751   |
| train/             |          |
|    actor_loss      | -68.8    |
|    critic_loss     | 186      |
|    ent_coef        | 0.158    |
|    ent_coef_loss   | 0.00742  |
|    learning_rate   | 0.0003   |
|    n_updates       | 223650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 641      |
| time/              |          |
|    episodes        | 1144     |
|    fps             | 50       |
|    time_elapsed    | 4406     |
|    total_timesteps | 224711   |
| train/             |          |
|    actor_loss      | -81      |
|    critic_loss     | 123      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 689      |
| time/              |          |
|    episodes        | 1200     |
|    fps             | 51       |
|    time_elapsed    | 4661     |
|    total_timesteps | 238151   |
| train/             |          |
|    actor_loss      | -76.7    |
|    critic_loss     | 237      |
|    ent_coef        | 0.155    |
|    ent_coef_loss   | 0.656    |
|    learning_rate   | 0.0003   |
|    n_updates       | 238050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 688      |
| time/              |          |
|    episodes        | 1204     |
|    fps             | 51       |
|    time_elapsed    | 4680     |
|    total_timesteps | 239111   |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 169      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 694      |
| time/              |          |
|    episodes        | 1260     |
|    fps             | 51       |
|    time_elapsed    | 4935     |
|    total_timesteps | 252551   |
| train/             |          |
|    actor_loss      | -92.7    |
|    critic_loss     | 280      |
|    ent_coef        | 0.158    |
|    ent_coef_loss   | 0.214    |
|    learning_rate   | 0.0003   |
|    n_updates       | 252450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 689      |
| time/              |          |
|    episodes        | 1264     |
|    fps             | 51       |
|    time_elapsed    | 4953     |
|    total_timesteps | 253511   |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 243      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 702      |
| time/              |          |
|    episodes        | 1320     |
|    fps             | 51       |
|    time_elapsed    | 5211     |
|    total_timesteps | 266951   |
| train/             |          |
|    actor_loss      | -110     |
|    critic_loss     | 63.5     |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | 0.348    |
|    learning_rate   | 0.0003   |
|    n_updates       | 266850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 701      |
| time/              |          |
|    episodes        | 1324     |
|    fps             | 51       |
|    time_elapsed    | 5229     |
|    total_timesteps | 267911   |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 78.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 754      |
| time/              |          |
|    episodes        | 1380     |
|    fps             | 51       |
|    time_elapsed    | 5491     |
|    total_timesteps | 281351   |
| train/             |          |
|    actor_loss      | -116     |
|    critic_loss     | 115      |
|    ent_coef        | 0.155    |
|    ent_coef_loss   | 0.0267   |
|    learning_rate   | 0.0003   |
|    n_updates       | 281250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 757      |
| time/              |          |
|    episodes        | 1384     |
|    fps             | 51       |
|    time_elapsed    | 5511     |
|    total_timesteps | 282311   |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 409      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 766      |
| time/              |          |
|    episodes        | 1440     |
|    fps             | 51       |
|    time_elapsed    | 5787     |
|    total_timesteps | 295751   |
| train/             |          |
|    actor_loss      | -127     |
|    critic_loss     | 44.3     |
|    ent_coef        | 0.152    |
|    ent_coef_loss   | -0.046   |
|    learning_rate   | 0.0003   |
|    n_updates       | 295650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 761      |
| time/              |          |
|    episodes        | 1444     |
|    fps             | 51       |
|    time_elapsed    | 5807     |
|    total_timesteps | 296711   |
| train/             |          |
|    actor_loss      | -122     |
|    critic_loss     | 140      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 782      |
| time/              |          |
|    episodes        | 1500     |
|    fps             | 50       |
|    time_elapsed    | 6106     |
|    total_timesteps | 310151   |
| train/             |          |
|    actor_loss      | -93.9    |
|    critic_loss     | 20       |
|    ent_coef        | 0.143    |
|    ent_coef_loss   | 0.0842   |
|    learning_rate   | 0.0003   |
|    n_updates       | 310050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 779      |
| time/              |          |
|    episodes        | 1504     |
|    fps             | 50       |
|    time_elapsed    | 6127     |
|    total_timesteps | 311111   |
| train/             |          |
|    actor_loss      | -131     |
|    critic_loss     | 232      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 788      |
| time/              |          |
|    episodes        | 1560     |
|    fps             | 50       |
|    time_elapsed    | 6419     |
|    total_timesteps | 324551   |
| train/             |          |
|    actor_loss      | -128     |
|    critic_loss     | 263      |
|    ent_coef        | 0.146    |
|    ent_coef_loss   | -0.0822  |
|    learning_rate   | 0.0003   |
|    n_updates       | 324450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 791      |
| time/              |          |
|    episodes        | 1564     |
|    fps             | 50       |
|    time_elapsed    | 6438     |
|    total_timesteps | 325511   |
| train/             |          |
|    actor_loss      | -150     |
|    critic_loss     | 22.8     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 787      |
| time/              |          |
|    episodes        | 1620     |
|    fps             | 50       |
|    time_elapsed    | 6703     |
|    total_timesteps | 338951   |
| train/             |          |
|    actor_loss      | -142     |
|    critic_loss     | 34.3     |
|    ent_coef        | 0.141    |
|    ent_coef_loss   | 0.166    |
|    learning_rate   | 0.0003   |
|    n_updates       | 338850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 795      |
| time/              |          |
|    episodes        | 1624     |
|    fps             | 50       |
|    time_elapsed    | 6722     |
|    total_timesteps | 339911   |
| train/             |          |
|    actor_loss      | -138     |
|    critic_loss     | 53.3     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 793      |
| time/              |          |
|    episodes        | 1680     |
|    fps             | 50       |
|    time_elapsed    | 6988     |
|    total_timesteps | 353351   |
| train/             |          |
|    actor_loss      | -140     |
|    critic_loss     | 28.6     |
|    ent_coef        | 0.141    |
|    ent_coef_loss   | 0.272    |
|    learning_rate   | 0.0003   |
|    n_updates       | 353250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 789      |
| time/              |          |
|    episodes        | 1684     |
|    fps             | 50       |
|    time_elapsed    | 7007     |
|    total_timesteps | 354311   |
| train/             |          |
|    actor_loss      | -151     |
|    critic_loss     | 54.5     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 800      |
| time/              |          |
|    episodes        | 1740     |
|    fps             | 50       |
|    time_elapsed    | 7273     |
|    total_timesteps | 367751   |
| train/             |          |
|    actor_loss      | -152     |
|    critic_loss     | 190      |
|    ent_coef        | 0.134    |
|    ent_coef_loss   | 0.0297   |
|    learning_rate   | 0.0003   |
|    n_updates       | 367650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 798      |
| time/              |          |
|    episodes        | 1744     |
|    fps             | 50       |
|    time_elapsed    | 7292     |
|    total_timesteps | 368711   |
| train/             |          |
|    actor_loss      | -129     |
|    critic_loss     | 205      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 796      |
| time/              |          |
|    episodes        | 1800     |
|    fps             | 50       |
|    time_elapsed    | 7559     |
|    total_timesteps | 382151   |
| train/             |          |
|    actor_loss      | -154     |
|    critic_loss     | 433      |
|    ent_coef        | 0.133    |
|    ent_coef_loss   | -0.143   |
|    learning_rate   | 0.0003   |
|    n_updates       | 382050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 800      |
| time/              |          |
|    episodes        | 1804     |
|    fps             | 50       |
|    time_elapsed    | 7577     |
|    total_timesteps | 383111   |
| train/             |          |
|    actor_loss      | -145     |
|    critic_loss     | 197      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 807      |
| time/              |          |
|    episodes        | 1860     |
|    fps             | 50       |
|    time_elapsed    | 7865     |
|    total_timesteps | 396551   |
| train/             |          |
|    actor_loss      | -147     |
|    critic_loss     | 207      |
|    ent_coef        | 0.135    |
|    ent_coef_loss   | -0.274   |
|    learning_rate   | 0.0003   |
|    n_updates       | 396450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 816      |
| time/              |          |
|    episodes        | 1864     |
|    fps             | 50       |
|    time_elapsed    | 7886     |
|    total_timesteps | 397511   |
| train/             |          |
|    actor_loss      | -146     |
|    critic_loss     | 182      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 836      |
| time/              |          |
|    episodes        | 1920     |
|    fps             | 50       |
|    time_elapsed    | 8198     |
|    total_timesteps | 410951   |
| train/             |          |
|    actor_loss      | -144     |
|    critic_loss     | 102      |
|    ent_coef        | 0.137    |
|    ent_coef_loss   | -0.00506 |
|    learning_rate   | 0.0003   |
|    n_updates       | 410850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 831      |
| time/              |          |
|    episodes        | 1924     |
|    fps             | 50       |
|    time_elapsed    | 8220     |
|    total_timesteps | 411911   |
| train/             |          |
|    actor_loss      | -139     |
|    critic_loss     | 156      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 841      |
| time/              |          |
|    episodes        | 1980     |
|    fps             | 49       |
|    time_elapsed    | 8525     |
|    total_timesteps | 425351   |
| train/             |          |
|    actor_loss      | -163     |
|    critic_loss     | 90.2     |
|    ent_coef        | 0.128    |
|    ent_coef_loss   | -0.0785  |
|    learning_rate   | 0.0003   |
|    n_updates       | 425250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 848      |
| time/              |          |
|    episodes        | 1984     |
|    fps             | 49       |
|    time_elapsed    | 8549     |
|    total_timesteps | 426311   |
| train/             |          |
|    actor_loss      | -151     |
|    critic_loss     | 207      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 866      |
| time/              |          |
|    episodes        | 2040     |
|    fps             | 49       |
|    time_elapsed    | 8827     |
|    total_timesteps | 439751   |
| train/             |          |
|    actor_loss      | -165     |
|    critic_loss     | 41.8     |
|    ent_coef        | 0.125    |
|    ent_coef_loss   | 0.374    |
|    learning_rate   | 0.0003   |
|    n_updates       | 439650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 875      |
| time/              |          |
|    episodes        | 2044     |
|    fps             | 49       |
|    time_elapsed    | 8846     |
|    total_timesteps | 440711   |
| train/             |          |
|    actor_loss      | -148     |
|    critic_loss     | 62.2     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 875      |
| time/              |          |
|    episodes        | 2100     |
|    fps             | 49       |
|    time_elapsed    | 9111     |
|    total_timesteps | 454151   |
| train/             |          |
|    actor_loss      | -160     |
|    critic_loss     | 46.6     |
|    ent_coef        | 0.134    |
|    ent_coef_loss   | -0.0304  |
|    learning_rate   | 0.0003   |
|    n_updates       | 454050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 869      |
| time/              |          |
|    episodes        | 2104     |
|    fps             | 49       |
|    time_elapsed    | 9130     |
|    total_timesteps | 455111   |
| train/             |          |
|    actor_loss      | -143     |
|    critic_loss     | 217      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 883      |
| time/              |          |
|    episodes        | 2160     |
|    fps             | 49       |
|    time_elapsed    | 9396     |
|    total_timesteps | 468551   |
| train/             |          |
|    actor_loss      | -155     |
|    critic_loss     | 194      |
|    ent_coef        | 0.129    |
|    ent_coef_loss   | 0.141    |
|    learning_rate   | 0.0003   |
|    n_updates       | 468450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 887      |
| time/              |          |
|    episodes        | 2164     |
|    fps             | 49       |
|    time_elapsed    | 9416     |
|    total_timesteps | 469511   |
| train/             |          |
|    actor_loss      | -164     |
|    critic_loss     | 455      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 872      |
| time/              |          |
|    episodes        | 2220     |
|    fps             | 49       |
|    time_elapsed    | 9683     |
|    total_timesteps | 482951   |
| train/             |          |
|    actor_loss      | -162     |
|    critic_loss     | 35.8     |
|    ent_coef        | 0.124    |
|    ent_coef_loss   | 0.319    |
|    learning_rate   | 0.0003   |
|    n_updates       | 482850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 874      |
| time/              |          |
|    episodes        | 2224     |
|    fps             | 49       |
|    time_elapsed    | 9702     |
|    total_timesteps | 483911   |
| train/             |          |
|    actor_loss      | -176     |
|    critic_loss     | 22.2     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 880      |
| time/              |          |
|    episodes        | 2280     |
|    fps             | 49       |
|    time_elapsed    | 9985     |
|    total_timesteps | 497351   |
| train/             |          |
|    actor_loss      | -172     |
|    critic_loss     | 42.4     |
|    ent_coef        | 0.126    |
|    ent_coef_loss   | -0.239   |
|    learning_rate   | 0.0003   |
|    n_updates       | 497250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 876      |
| time/              |          |
|    episodes        | 2284     |
|    fps             | 49       |
|    time_elapsed    | 10005    |
|    total_timesteps | 498311   |
| train/             |          |
|    actor_loss      | -178     |
|    critic_loss     | 379      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 889      |
| time/              |          |
|    episodes        | 2340     |
|    fps             | 49       |
|    time_elapsed    | 10303    |
|    total_timesteps | 511751   |
| train/             |          |
|    actor_loss      | -181     |
|    critic_loss     | 190      |
|    ent_coef        | 0.123    |
|    ent_coef_loss   | 0.0492   |
|    learning_rate   | 0.0003   |
|    n_updates       | 511650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 887      |
| time/              |          |
|    episodes        | 2344     |
|    fps             | 49       |
|    time_elapsed    | 10325    |
|    total_timesteps | 512711   |
| train/             |          |
|    actor_loss      | -174     |
|    critic_loss     | 276      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 889      |
| time/              |          |
|    episodes        | 2400     |
|    fps             | 49       |
|    time_elapsed    | 10628    |
|    total_timesteps | 526151   |
| train/             |          |
|    actor_loss      | -174     |
|    critic_loss     | 149      |
|    ent_coef        | 0.124    |
|    ent_coef_loss   | -0.161   |
|    learning_rate   | 0.0003   |
|    n_updates       | 526050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 888      |
| time/              |          |
|    episodes        | 2404     |
|    fps             | 49       |
|    time_elapsed    | 10648    |
|    total_timesteps | 527111   |
| train/             |          |
|    actor_loss      | -180     |
|    critic_loss     | 472      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 848      |
| time/              |          |
|    episodes        | 2460     |
|    fps             | 49       |
|    time_elapsed    | 10930    |
|    total_timesteps | 540551   |
| train/             |          |
|    actor_loss      | -176     |
|    critic_loss     | 61       |
|    ent_coef        | 0.122    |
|    ent_coef_loss   | 0.0188   |
|    learning_rate   | 0.0003   |
|    n_updates       | 540450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 854      |
| time/              |          |
|    episodes        | 2464     |
|    fps             | 49       |
|    time_elapsed    | 10948    |
|    total_timesteps | 541511   |
| train/             |          |
|    actor_loss      | -170     |
|    critic_loss     | 119      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 850      |
| time/              |          |
|    episodes        | 2520     |
|    fps             | 49       |
|    time_elapsed    | 11211    |
|    total_timesteps | 554951   |
| train/             |          |
|    actor_loss      | -195     |
|    critic_loss     | 90.2     |
|    ent_coef        | 0.119    |
|    ent_coef_loss   | 0.0605   |
|    learning_rate   | 0.0003   |
|    n_updates       | 554850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 857      |
| time/              |          |
|    episodes        | 2524     |
|    fps             | 49       |
|    time_elapsed    | 11230    |
|    total_timesteps | 555911   |
| train/             |          |
|    actor_loss      | -188     |
|    critic_loss     | 242      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 875      |
| time/              |          |
|    episodes        | 2580     |
|    fps             | 49       |
|    time_elapsed    | 11499    |
|    total_timesteps | 569351   |
| train/             |          |
|    actor_loss      | -193     |
|    critic_loss     | 480      |
|    ent_coef        | 0.12     |
|    ent_coef_loss   | 0.405    |
|    learning_rate   | 0.0003   |
|    n_updates       | 569250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 877      |
| time/              |          |
|    episodes        | 2584     |
|    fps             | 49       |
|    time_elapsed    | 11518    |
|    total_timesteps | 570311   |
| train/             |          |
|    actor_loss      | -174     |
|    critic_loss     | 96.2     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 898      |
| time/              |          |
|    episodes        | 2640     |
|    fps             | 49       |
|    time_elapsed    | 11786    |
|    total_timesteps | 583751   |
| train/             |          |
|    actor_loss      | -187     |
|    critic_loss     | 244      |
|    ent_coef        | 0.119    |
|    ent_coef_loss   | 0.817    |
|    learning_rate   | 0.0003   |
|    n_updates       | 583650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 893      |
| time/              |          |
|    episodes        | 2644     |
|    fps             | 49       |
|    time_elapsed    | 11805    |
|    total_timesteps | 584711   |
| train/             |          |
|    actor_loss      | -159     |
|    critic_loss     | 394      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 915      |
| time/              |          |
|    episodes        | 2700     |
|    fps             | 49       |
|    time_elapsed    | 12086    |
|    total_timesteps | 598151   |
| train/             |          |
|    actor_loss      | -197     |
|    critic_loss     | 312      |
|    ent_coef        | 0.117    |
|    ent_coef_loss   | -0.326   |
|    learning_rate   | 0.0003   |
|    n_updates       | 598050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 923      |
| time/              |          |
|    episodes        | 2704     |
|    fps             | 49       |
|    time_elapsed    | 12106    |
|    total_timesteps | 599111   |
| train/             |          |
|    actor_loss      | -195     |
|    critic_loss     | 50       |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 880      |
| time/              |          |
|    episodes        | 2760     |
|    fps             | 49       |
|    time_elapsed    | 12402    |
|    total_timesteps | 612551   |
| train/             |          |
|    actor_loss      | -192     |
|    critic_loss     | 295      |
|    ent_coef        | 0.119    |
|    ent_coef_loss   | -0.43    |
|    learning_rate   | 0.0003   |
|    n_updates       | 612450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 880      |
| time/              |          |
|    episodes        | 2764     |
|    fps             | 49       |
|    time_elapsed    | 12425    |
|    total_timesteps | 613511   |
| train/             |          |
|    actor_loss      | -190     |
|    critic_loss     | 208      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 883      |
| time/              |          |
|    episodes        | 2820     |
|    fps             | 49       |
|    time_elapsed    | 12721    |
|    total_timesteps | 626951   |
| train/             |          |
|    actor_loss      | -193     |
|    critic_loss     | 47.9     |
|    ent_coef        | 0.114    |
|    ent_coef_loss   | -0.436   |
|    learning_rate   | 0.0003   |
|    n_updates       | 626850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 884      |
| time/              |          |
|    episodes        | 2824     |
|    fps             | 49       |
|    time_elapsed    | 12742    |
|    total_timesteps | 627911   |
| train/             |          |
|    actor_loss      | -199     |
|    critic_loss     | 152      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 874      |
| time/              |          |
|    episodes        | 2880     |
|    fps             | 49       |
|    time_elapsed    | 13020    |
|    total_timesteps | 641351   |
| train/             |          |
|    actor_loss      | -188     |
|    critic_loss     | 144      |
|    ent_coef        | 0.112    |
|    ent_coef_loss   | -0.0903  |
|    learning_rate   | 0.0003   |
|    n_updates       | 641250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 872      |
| time/              |          |
|    episodes        | 2884     |
|    fps             | 49       |
|    time_elapsed    | 13039    |
|    total_timesteps | 642311   |
| train/             |          |
|    actor_loss      | -200     |
|    critic_loss     | 473      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 871      |
| time/              |          |
|    episodes        | 2940     |
|    fps             | 49       |
|    time_elapsed    | 13325    |
|    total_timesteps | 655751   |
| train/             |          |
|    actor_loss      | -197     |
|    critic_loss     | 102      |
|    ent_coef        | 0.114    |
|    ent_coef_loss   | -0.0736  |
|    learning_rate   | 0.0003   |
|    n_updates       | 655650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 875      |
| time/              |          |
|    episodes        | 2944     |
|    fps             | 49       |
|    time_elapsed    | 13348    |
|    total_timesteps | 656711   |
| train/             |          |
|    actor_loss      | -195     |
|    critic_loss     | 79.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 914      |
| time/              |          |
|    episodes        | 3000     |
|    fps             | 48       |
|    time_elapsed    | 13682    |
|    total_timesteps | 670151   |
| train/             |          |
|    actor_loss      | -202     |
|    critic_loss     | 20.8     |
|    ent_coef        | 0.115    |
|    ent_coef_loss   | 0.115    |
|    learning_rate   | 0.0003   |
|    n_updates       | 670050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 913      |
| time/              |          |
|    episodes        | 3004     |
|    fps             | 48       |
|    time_elapsed    | 13705    |
|    total_timesteps | 671111   |
| train/             |          |
|    actor_loss      | -202     |
|    critic_loss     | 81.8     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 906      |
| time/              |          |
|    episodes        | 3060     |
|    fps             | 48       |
|    time_elapsed    | 14032    |
|    total_timesteps | 684551   |
| train/             |          |
|    actor_loss      | -200     |
|    critic_loss     | 647      |
|    ent_coef        | 0.116    |
|    ent_coef_loss   | -0.111   |
|    learning_rate   | 0.0003   |
|    n_updates       | 684450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 905      |
| time/              |          |
|    episodes        | 3064     |
|    fps             | 48       |
|    time_elapsed    | 14055    |
|    total_timesteps | 685511   |
| train/             |          |
|    actor_loss      | -196     |
|    critic_loss     | 97.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 909      |
| time/              |          |
|    episodes        | 3120     |
|    fps             | 48       |
|    time_elapsed    | 14453    |
|    total_timesteps | 698951   |
| train/             |          |
|    actor_loss      | -202     |
|    critic_loss     | 273      |
|    ent_coef        | 0.114    |
|    ent_coef_loss   | -0.178   |
|    learning_rate   | 0.0003   |
|    n_updates       | 698850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 918      |
| time/              |          |
|    episodes        | 3124     |
|    fps             | 48       |
|    time_elapsed    | 14476    |
|    total_timesteps | 699911   |
| train/             |          |
|    actor_loss      | -195     |
|    critic_loss     | 139      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 922      |
| time/              |          |
|    episodes        | 3180     |
|    fps             | 48       |
|    time_elapsed    | 14794    |
|    total_timesteps | 713351   |
| train/             |          |
|    actor_loss      | -193     |
|    critic_loss     | 172      |
|    ent_coef        | 0.107    |
|    ent_coef_loss   | 0.0651   |
|    learning_rate   | 0.0003   |
|    n_updates       | 713250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 920      |
| time/              |          |
|    episodes        | 3184     |
|    fps             | 48       |
|    time_elapsed    | 14816    |
|    total_timesteps | 714311   |
| train/             |          |
|    actor_loss      | -201     |
|    critic_loss     | 614      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 894      |
| time/              |          |
|    episodes        | 3240     |
|    fps             | 48       |
|    time_elapsed    | 15124    |
|    total_timesteps | 727751   |
| train/             |          |
|    actor_loss      | -212     |
|    critic_loss     | 17.2     |
|    ent_coef        | 0.111    |
|    ent_coef_loss   | -0.29    |
|    learning_rate   | 0.0003   |
|    n_updates       | 727650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 900      |
| time/              |          |
|    episodes        | 3244     |
|    fps             | 48       |
|    time_elapsed    | 15146    |
|    total_timesteps | 728711   |
| train/             |          |
|    actor_loss      | -205     |
|    critic_loss     | 206      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 905      |
| time/              |          |
|    episodes        | 3300     |
|    fps             | 47       |
|    time_elapsed    | 15475    |
|    total_timesteps | 742151   |
| train/             |          |
|    actor_loss      | -199     |
|    critic_loss     | 107      |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.411   |
|    learning_rate   | 0.0003   |
|    n_updates       | 742050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 901      |
| time/              |          |
|    episodes        | 3304     |
|    fps             | 47       |
|    time_elapsed    | 15497    |
|    total_timesteps | 743111   |
| train/             |          |
|    actor_loss      | -210     |
|    critic_loss     | 24.8     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 912      |
| time/              |          |
|    episodes        | 3360     |
|    fps             | 47       |
|    time_elapsed    | 15800    |
|    total_timesteps | 756551   |
| train/             |          |
|    actor_loss      | -208     |
|    critic_loss     | 29.5     |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.209   |
|    learning_rate   | 0.0003   |
|    n_updates       | 756450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 913      |
| time/              |          |
|    episodes        | 3364     |
|    fps             | 47       |
|    time_elapsed    | 15820    |
|    total_timesteps | 757511   |
| train/             |          |
|    actor_loss      | -214     |
|    critic_loss     | 430      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 911      |
| time/              |          |
|    episodes        | 3420     |
|    fps             | 47       |
|    time_elapsed    | 16131    |
|    total_timesteps | 770951   |
| train/             |          |
|    actor_loss      | -197     |
|    critic_loss     | 330      |
|    ent_coef        | 0.112    |
|    ent_coef_loss   | -0.0408  |
|    learning_rate   | 0.0003   |
|    n_updates       | 770850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 914      |
| time/              |          |
|    episodes        | 3424     |
|    fps             | 47       |
|    time_elapsed    | 16153    |
|    total_timesteps | 771911   |
| train/             |          |
|    actor_loss      | -202     |
|    critic_loss     | 598      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 905      |
| time/              |          |
|    episodes        | 3480     |
|    fps             | 47       |
|    time_elapsed    | 16468    |
|    total_timesteps | 785351   |
| train/             |          |
|    actor_loss      | -223     |
|    critic_loss     | 495      |
|    ent_coef        | 0.107    |
|    ent_coef_loss   | -0.0296  |
|    learning_rate   | 0.0003   |
|    n_updates       | 785250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 908      |
| time/              |          |
|    episodes        | 3484     |
|    fps             | 47       |
|    time_elapsed    | 16489    |
|    total_timesteps | 786311   |
| train/             |          |
|    actor_loss      | -215     |
|    critic_loss     | 599      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 922      |
| time/              |          |
|    episodes        | 3540     |
|    fps             | 47       |
|    time_elapsed    | 16783    |
|    total_timesteps | 799751   |
| train/             |          |
|    actor_loss      | -221     |
|    critic_loss     | 781      |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.135   |
|    learning_rate   | 0.0003   |
|    n_updates       | 799650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 930      |
| time/              |          |
|    episodes        | 3544     |
|    fps             | 47       |
|    time_elapsed    | 16806    |
|    total_timesteps | 800711   |
| train/             |          |
|    actor_loss      | -209     |
|    critic_loss     | 476      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 949      |
| time/              |          |
|    episodes        | 3600     |
|    fps             | 47       |
|    time_elapsed    | 17120    |
|    total_timesteps | 814151   |
| train/             |          |
|    actor_loss      | -217     |
|    critic_loss     | 519      |
|    ent_coef        | 0.118    |
|    ent_coef_loss   | -0.282   |
|    learning_rate   | 0.0003   |
|    n_updates       | 814050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 946      |
| time/              |          |
|    episodes        | 3604     |
|    fps             | 47       |
|    time_elapsed    | 17142    |
|    total_timesteps | 815111   |
| train/             |          |
|    actor_loss      | -211     |
|    critic_loss     | 393      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 939      |
| time/              |          |
|    episodes        | 3660     |
|    fps             | 47       |
|    time_elapsed    | 17474    |
|    total_timesteps | 828551   |
| train/             |          |
|    actor_loss      | -214     |
|    critic_loss     | 50.6     |
|    ent_coef        | 0.113    |
|    ent_coef_loss   | 0.194    |
|    learning_rate   | 0.0003   |
|    n_updates       | 828450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 941      |
| time/              |          |
|    episodes        | 3664     |
|    fps             | 47       |
|    time_elapsed    | 17503    |
|    total_timesteps | 829511   |
| train/             |          |
|    actor_loss      | -207     |
|    critic_loss     | 384      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 911      |
| time/              |          |
|    episodes        | 3720     |
|    fps             | 46       |
|    time_elapsed    | 17936    |
|    total_timesteps | 842951   |
| train/             |          |
|    actor_loss      | -217     |
|    critic_loss     | 195      |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.42    |
|    learning_rate   | 0.0003   |
|    n_updates       | 842850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 914      |
| time/              |          |
|    episodes        | 3724     |
|    fps             | 46       |
|    time_elapsed    | 17959    |
|    total_timesteps | 843911   |
| train/             |          |
|    actor_loss      | -213     |
|    critic_loss     | 368      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 928      |
| time/              |          |
|    episodes        | 3780     |
|    fps             | 46       |
|    time_elapsed    | 18279    |
|    total_timesteps | 857351   |
| train/             |          |
|    actor_loss      | -203     |
|    critic_loss     | 177      |
|    ent_coef        | 0.106    |
|    ent_coef_loss   | 0.379    |
|    learning_rate   | 0.0003   |
|    n_updates       | 857250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 926      |
| time/              |          |
|    episodes        | 3784     |
|    fps             | 46       |
|    time_elapsed    | 18302    |
|    total_timesteps | 858311   |
| train/             |          |
|    actor_loss      | -209     |
|    critic_loss     | 79.3     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 941      |
| time/              |          |
|    episodes        | 3840     |
|    fps             | 46       |
|    time_elapsed    | 18644    |
|    total_timesteps | 871751   |
| train/             |          |
|    actor_loss      | -213     |
|    critic_loss     | 207      |
|    ent_coef        | 0.108    |
|    ent_coef_loss   | 0.191    |
|    learning_rate   | 0.0003   |
|    n_updates       | 871650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 938      |
| time/              |          |
|    episodes        | 3844     |
|    fps             | 46       |
|    time_elapsed    | 18667    |
|    total_timesteps | 872711   |
| train/             |          |
|    actor_loss      | -220     |
|    critic_loss     | 398      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 941      |
| time/              |          |
|    episodes        | 3900     |
|    fps             | 46       |
|    time_elapsed    | 18985    |
|    total_timesteps | 886151   |
| train/             |          |
|    actor_loss      | -220     |
|    critic_loss     | 408      |
|    ent_coef        | 0.111    |
|    ent_coef_loss   | 0.142    |
|    learning_rate   | 0.0003   |
|    n_updates       | 886050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 944      |
| time/              |          |
|    episodes        | 3904     |
|    fps             | 46       |
|    time_elapsed    | 19008    |
|    total_timesteps | 887111   |
| train/             |          |
|    actor_loss      | -223     |
|    critic_loss     | 342      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 958      |
| time/              |          |
|    episodes        | 3960     |
|    fps             | 46       |
|    time_elapsed    | 19319    |
|    total_timesteps | 900551   |
| train/             |          |
|    actor_loss      | -228     |
|    critic_loss     | 393      |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.263   |
|    learning_rate   | 0.0003   |
|    n_updates       | 900450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 960      |
| time/              |          |
|    episodes        | 3964     |
|    fps             | 46       |
|    time_elapsed    | 19340    |
|    total_timesteps | 901511   |
| train/             |          |
|    actor_loss      | -218     |
|    critic_loss     | 467      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 932      |
| time/              |          |
|    episodes        | 4020     |
|    fps             | 46       |
|    time_elapsed    | 19631    |
|    total_timesteps | 914951   |
| train/             |          |
|    actor_loss      | -224     |
|    critic_loss     | 578      |
|    ent_coef        | 0.106    |
|    ent_coef_loss   | -0.235   |
|    learning_rate   | 0.0003   |
|    n_updates       | 914850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 932      |
| time/              |          |
|    episodes        | 4024     |
|    fps             | 46       |
|    time_elapsed    | 19650    |
|    total_timesteps | 915911   |
| train/             |          |
|    actor_loss      | -224     |
|    critic_loss     | 298      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 932      |
| time/              |          |
|    episodes        | 4080     |
|    fps             | 46       |
|    time_elapsed    | 19929    |
|    total_timesteps | 929351   |
| train/             |          |
|    actor_loss      | -222     |
|    critic_loss     | 209      |
|    ent_coef        | 0.11     |
|    ent_coef_loss   | -0.206   |
|    learning_rate   | 0.0003   |
|    n_updates       | 929250   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 934      |
| time/              |          |
|    episodes        | 4084     |
|    fps             | 46       |
|    time_elapsed    | 19949    |
|    total_timesteps | 930311   |
| train/             |          |
|    actor_loss      | -217     |
|    critic_loss     | 72.5     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 940      |
| time/              |          |
|    episodes        | 4140     |
|    fps             | 46       |
|    time_elapsed    | 20222    |
|    total_timesteps | 943751   |
| train/             |          |
|    actor_loss      | -220     |
|    critic_loss     | 130      |
|    ent_coef        | 0.108    |
|    ent_coef_loss   | -0.0362  |
|    learning_rate   | 0.0003   |
|    n_updates       | 943650   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 944      |
| time/              |          |
|    episodes        | 4144     |
|    fps             | 46       |
|    time_elapsed    | 20241    |
|    total_timesteps | 944711   |
| train/             |          |
|    actor_loss      | -221     |
|    critic_loss     | 120      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 960      |
| time/              |          |
|    episodes        | 4200     |
|    fps             | 46       |
|    time_elapsed    | 20514    |
|    total_timesteps | 958151   |
| train/             |          |
|    actor_loss      | -218     |
|    critic_loss     | 307      |
|    ent_coef        | 0.107    |
|    ent_coef_loss   | 0.255    |
|    learning_rate   | 0.0003   |
|    n_updates       | 958050   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 962      |
| time/              |          |
|    episodes        | 4204     |
|    fps             | 46       |
|    time_elapsed    | 20534    |
|    total_timesteps | 959111   |
| train/             |          |
|    actor_loss      | -234     |
|    critic_loss     | 30.5     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 947      |
| time/              |          |
|    episodes        | 4260     |
|    fps             | 46       |
|    time_elapsed    | 20801    |
|    total_timesteps | 972551   |
| train/             |          |
|    actor_loss      | -220     |
|    critic_loss     | 652      |
|    ent_coef        | 0.103    |
|    ent_coef_loss   | -0.0165  |
|    learning_rate   | 0.0003   |
|    n_updates       | 972450   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 945      |
| time/              |          |
|    episodes        | 4264     |
|    fps             | 46       |
|    time_elapsed    | 20819    |
|    total_timesteps | 973511   |
| train/             |          |
|    actor_loss      | -225     |
|    critic_loss     | 189      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 962      |
| time/              |          |
|    episodes        | 4320     |
|    fps             | 46       |
|    time_elapsed    | 21080    |
|    total_timesteps | 986951   |
| train/             |          |
|    actor_loss      | -231     |
|    critic_loss     | 508      |
|    ent_coef        | 0.104    |
|    ent_coef_loss   | -0.265   |
|    learning_rate   | 0.0003   |
|    n_updates       | 986850   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 240      |
|    ep_rew_mean     | 966      |
| time/              |          |
|    episodes        | 4324     |
|    fps             | 46       |
|    time_elapsed    | 21100    |
|    total_timesteps | 987911   |
| train/             |          |
|    actor_loss      | -233     |
|    critic_loss     | 347      |
|    ent_coef 

OSError: [WinError 1314] A required privilege is not held by the client: 'C:\\Users\\AVISH\\models\\nc0cqlss\\model.zip' -> 'C:\\Users\\AVISH\\wandb\\run-20231210_143604-nc0cqlss\\files\\model.zip'

## TEST TRAINED AGENT IN THE ENVIRONMENT

In [None]:
import torch as th
from stable_baselines3 import SAC
MODEL_PATH =r"C:\Users\AVISH\models\9ygbjh9i\model.zip"

# Create and wrap the environment
env = droneEnv(True, False)

# Load the trained agent
model = SAC.load(MODEL_PATH, env=env)

# Evaluate the agent
for i in range(5):
    obs = env.reset()
    done = False
    episode_reward = 0
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        episode_reward += reward
    print("Episode reward", episode_reward)
    env.render("yes")

In [17]:
model.tau

0.005