### ENVIRONMENT

In [2]:
"""
This is a gym environment based for the drone_game
The goal is to reach randomly positoned targets
"""

import os
from math import sin, cos, pi, sqrt
from random import randrange

import numpy as np
import gym
from gym import spaces

import pygame
from pygame.locals import *


class droneEnv(gym.Env):
    def __init__(self, render_every_frame, mouse_target):
        super(droneEnv, self).__init__()

        self.render_every_frame = render_every_frame
        # Makes the target follow the mouse
        self.mouse_target = mouse_target

        # Initialize Pygame, load sprites
        pygame.init()
        self.screen = pygame.display.set_mode((800, 800))
        self.FramePerSec = pygame.time.Clock()

        self.player = pygame.image.load(os.path.join("assets/sprites/drone_old.png"))
        self.player.convert()

        self.target = pygame.image.load(os.path.join("assets/sprites/target_old.png"))
        self.target.convert()

        pygame.font.init()
        self.myfont = pygame.font.SysFont("Comic Sans MS", 20)

        # Physics constants
        self.FPS = 60
        self.gravity = 0.08
        self.thruster_amplitude = 0.04
        self.diff_amplitude = 0.003
        self.thruster_mean = 0.04
        self.mass = 1
        self.arm = 25

        # Initialize variables
        (self.a, self.ad, self.add) = (0, 0, 0)
        (self.x, self.xd, self.xdd) = (400, 0, 0)
        (self.y, self.yd, self.ydd) = (400, 0, 0)
        self.xt = randrange(200, 600)
        self.yt = randrange(200, 600)

        # Initialize game variables
        self.target_counter = 0
        self.reward = 0
        self.time = 0
        self.time_limit = 20
        if self.mouse_target is True:
            self.time_limit = 1000

        # 2 action thrust amplitude and thrust difference in float values between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
        # 6 observations: angle_to_up, velocity, angle_velocity, distance_to_target, angle_to_target, angle_target_and_velocity
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,))

    def reset(self):
        # Reset variables
        (self.a, self.ad, self.add) = (0, 0, 0)
        (self.x, self.xd, self.xdd) = (400, 0, 0)
        (self.y, self.yd, self.ydd) = (400, 0, 0)
        self.xt = randrange(200, 600)
        self.yt = randrange(200, 600)

        self.target_counter = 0
        self.reward = 0
        self.time = 0

        return self.get_obs()

    def get_obs(self) -> np.ndarray:
        """
        Calculates the observations

        Returns:
            np.ndarray: The normalized observations:
            - angle_to_up : angle between the drone and the up vector (to observe gravity)
            - velocity : velocity of the drone
            - angle_velocity : angle of the velocity vector
            - distance_to_target : distance to the target
            - angle_to_target : angle between the drone and the target
            - angle_target_and_velocity : angle between the to_target vector and the velocity vector
        """
        angle_to_up = self.a / 180 * pi
        velocity = sqrt(self.xd**2 + self.yd**2)
        angle_velocity = self.ad
        distance_to_target = (
            sqrt((self.xt - self.x) ** 2 + (self.yt - self.y) ** 2) / 500
        )
        angle_to_target = np.arctan2(self.yt - self.y, self.xt - self.x)
        # Angle between the to_target vector and the velocity vector
        angle_target_and_velocity = np.arctan2(
            self.yt - self.y, self.xt - self.x
        ) - np.arctan2(self.yd, self.xd)
        distance_to_target = (
            sqrt((self.xt - self.x) ** 2 + (self.yt - self.y) ** 2) / 500
        )
        return np.array(
            [
                angle_to_up,
                velocity,
                angle_velocity,
                distance_to_target,
                angle_to_target,
                angle_target_and_velocity,
            ]
        ).astype(np.float32)

    def step(self, action):
        # Game loop
        self.reward = 0.0
        (action0, action1) = (action[0], action[1])

        # Act every 5 frames
        for _ in range(5):
            self.time += 1 / 60

            if self.mouse_target is True:
                self.xt, self.yt = pygame.mouse.get_pos()

            # Initialize accelerations
            self.xdd = 0
            self.ydd = self.gravity
            self.add = 0
            thruster_left = self.thruster_mean
            thruster_right = self.thruster_mean

            thruster_left += action0 * self.thruster_amplitude
            thruster_right += action0 * self.thruster_amplitude
            thruster_left += action1 * self.diff_amplitude
            thruster_right -= action1 * self.diff_amplitude

            # Calculating accelerations with Newton's laws of motions
            self.xdd += (
                -(thruster_left + thruster_right) * sin(self.a * pi / 180) / self.mass
            )
            self.ydd += (
                -(thruster_left + thruster_right) * cos(self.a * pi / 180) / self.mass
            )
            self.add += self.arm * (thruster_right - thruster_left) / self.mass

            self.xd += self.xdd
            self.yd += self.ydd
            self.ad += self.add
            self.x += self.xd
            self.y += self.yd
            self.a += self.ad

            dist = sqrt((self.x - self.xt) ** 2 + (self.y - self.yt) ** 2)

            # Reward per step survived
            self.reward += 1 / 60
            # Penalty according to the distance to target
            self.reward -= dist / (100 * 60)

            if dist < 50:
                # Reward if close to target
                self.xt = randrange(200, 600)
                self.yt = randrange(200, 600)
                self.reward += 100
                
                
                #
                self.target_counter+=1

            # If out of time
            if self.time > self.time_limit:
                done = True
                break

            # If too far from target (crash)
            elif dist > 1000:
                self.reward -= 1000
                done = True
                break

            else:
                done = False

            if self.render_every_frame is True:
                self.render("yes")

        info = {}

        return (
            self.get_obs(),
            self.reward,
            done,
            info,
        )

    def render(self, mode):
        # Pygame rendering
        pygame.event.get()
        self.screen.fill(0)
        self.screen.blit(
            self.target,
            (
                self.xt - int(self.target.get_width() / 2),
                self.yt - int(self.target.get_height() / 2),
            ),
        )
        player_copy = pygame.transform.rotate(self.player, self.a)
        self.screen.blit(
            player_copy,
            (
                self.x - int(player_copy.get_width() / 2),
                self.y - int(player_copy.get_height() / 2),
            ),
        )

        textsurface = self.myfont.render(
            "Collected: " + str(self.target_counter), False, (255, 255, 255)
        )
        self.screen.blit(textsurface, (20, 20))
        textsurface3 = self.myfont.render(
            "Time: " + str(int(self.time)), False, (255, 255, 255)
        )
        self.screen.blit(textsurface3, (20, 50))

        pygame.display.update()
        self.FramePerSec.tick(self.FPS)

    def close(self):
        pass


## TEST ENVIRONMENT WORKING

In [3]:
env = droneEnv(True, False)

obs = env.reset()
env.render("yes")

print("Observation space:")
print(env.observation_space)
print("")
print("Action space:")
print(env.action_space)
print("")
print("Action space sample:")
print(env.action_space.sample())

# Choose an action to execute n_steps times
action = np.array([0.0, 1.0])
n_steps = 10
for step in range(n_steps):
    print("Step {}".format(step + 1))
    obs, reward, done, info = env.step(action)
    print("obs=", obs, "reward=", reward, "done=", done)
    env.render("yes")
    if done:
        print("Done!", "reward=", reward)
        break


Observation space:
Box([-inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf], (6,), float32)

Action space:
Box([-1. -1.], [1. 1.], (2,), float32)

Action space sample:
[0.69754815 0.47666866]
Step 1
obs= [-0.03926991  0.00418868 -0.75        0.5318017   0.8013739   0.79181826] reward= -0.13825362982355913 done= False
Step 2
obs= [-0.14398967  0.03454228 -1.5         0.5316636   0.80162644  0.7626214 ] reward= -0.13822441168881405 done= False
Step 3
obs= [-0.31415927  0.1170206  -2.25        0.53107655  0.8026165   0.7145571 ] reward= -0.13806922357633705 done= False
Step 4
obs= [-0.5497787   0.27655566 -3.          0.52946705  0.80502886  0.6484174 ] reward= -0.13759285149245498 done= False
Step 5
obs= [-0.850848    0.53502405 -3.75        0.5259651   0.8095198   0.56514055] reward= -0.13648331002542924 done= False
Step 6
obs= [-1.2173672   0.907548   -4.5         0.5194007   0.8164096   0.46564546] reward= -0.13430199893417313 done= False
Step 7
obs= [-1.6493361   1.3969848  -5.

## TRAIN PPO AGENT

In [8]:
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
import wandb
from wandb.integration.sb3 import WandbCallback


run = wandb.init(
    project="CSP",
    sync_tensorboard=True,
    monitor_gym=True,
)

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

# Create and wrap the environment
env = droneEnv(False, False)
env = Monitor(env, log_dir)

# Create TD agent
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_dir)



training_steps = 1_000_000

# Create checkpoint callback
checkpoint_callback = CheckpointCallback(
    save_freq=training_steps, save_path=log_dir, name_prefix="ppo_model_v6"
)

# Train the agent
model.learn(
    total_timesteps=training_steps,
    callback=[
        checkpoint_callback,
        WandbCallback(
            gradient_save_freq=100,
            model_save_path=f"models/{run.id}",
            model_save_freq=training_steps,
            verbose=2,
        ),
    ],
)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…



Using cpu device
Wrapping the env in a DummyVecEnv.
Logging to tmp/PPO_5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 41.5     |
|    ep_rew_mean     | -991     |
| time/              |          |
|    fps             | 712      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 40.9         |
|    ep_rew_mean          | -985         |
| time/                   |              |
|    fps                  | 355          |
|    iterations           | 2            |
|    time_elapsed         | 11           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0040013473 |
|    clip_fraction        | 0.0205       |
|    clip_range           | 0.2          |
|    entropy_loss         | 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 43.9         |
|    ep_rew_mean          | -988         |
| time/                   |              |
|    fps                  | 358          |
|    iterations           | 11           |
|    time_elapsed         | 62           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0044342554 |
|    clip_fraction        | 0.0216       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.94        |
|    explained_variance   | 0.000243     |
|    learning_rate        | 0.0003       |
|    loss                 | 8.07e+04     |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00326     |
|    std                  | 1.05         |
|    value_loss           | 1.69e+05     |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 42.7        |
|    ep_rew_mean          | -985        |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 20          |
|    time_elapsed         | 112         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.005374747 |
|    clip_fraction        | 0.0362      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.05       |
|    explained_variance   | 3.53e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 5.6e+04     |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00339    |
|    std                  | 1.12        |
|    value_loss           | 1.44e+05    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 44.4        |
|    ep_rew_mean          | -994        |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 29          |
|    time_elapsed         | 161         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.004978488 |
|    clip_fraction        | 0.0277      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.12       |
|    explained_variance   | 1.15e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 6.06e+04    |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.00352    |
|    std                  | 1.16        |
|    value_loss           | 1.21e+05    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 42.8         |
|    ep_rew_mean          | -989         |
| time/                   |              |
|    fps                  | 368          |
|    iterations           | 38           |
|    time_elapsed         | 210          |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0036581086 |
|    clip_fraction        | 0.019        |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.17        |
|    explained_variance   | 6.56e-06     |
|    learning_rate        | 0.0003       |
|    loss                 | 6.64e+04     |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.0026      |
|    std                  | 1.18         |
|    value_loss           | 1.07e+05     |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 47.1         |
|    ep_rew_mean          | -974         |
| time/                   |              |
|    fps                  | 370          |
|    iterations           | 47           |
|    time_elapsed         | 259          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0049305884 |
|    clip_fraction        | 0.0377       |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.19        |
|    explained_variance   | 3.21e-05     |
|    learning_rate        | 0.0003       |
|    loss                 | 4.26e+04     |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.00467     |
|    std                  | 1.19         |
|    value_loss           | 8.08e+04     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 65.5         |
|    ep_rew_mean          | -984         |
| time/                   |              |
|    fps                  | 373          |
|    iterations           | 56           |
|    time_elapsed         | 307          |
|    total_timesteps      | 114688       |
| train/                  |              |
|    approx_kl            | 0.0031696334 |
|    clip_fraction        | 0.0301       |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.13        |
|    explained_variance   | 0.111        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.79e+04     |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00334     |
|    std                  | 1.17         |
|    value_loss           | 4.65e+04     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 118         |
|    ep_rew_mean          | -865        |
| time/                   |             |
|    fps                  | 372         |
|    iterations           | 65          |
|    time_elapsed         | 357         |
|    total_timesteps      | 133120      |
| train/                  |             |
|    approx_kl            | 0.005692034 |
|    clip_fraction        | 0.0489      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.18       |
|    explained_variance   | 0.374       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.57e+03    |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.00687    |
|    std                  | 1.19        |
|    value_loss           | 2.08e+04    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 179         |
|    ep_rew_mean          | -580        |
| time/                   |             |
|    fps                  | 368         |
|    iterations           | 74          |
|    time_elapsed         | 411         |
|    total_timesteps      | 151552      |
| train/                  |             |
|    approx_kl            | 0.004136608 |
|    clip_fraction        | 0.0215      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.13       |
|    explained_variance   | 0.54        |
|    learning_rate        | 0.0003      |
|    loss                 | 4.06e+03    |
|    n_updates            | 730         |
|    policy_gradient_loss | -0.00409    |
|    std                  | 1.17        |
|    value_loss           | 9.39e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 223          |
|    ep_rew_mean          | -106         |
| time/                   |              |
|    fps                  | 368          |
|    iterations           | 83           |
|    time_elapsed         | 461          |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0039419592 |
|    clip_fraction        | 0.048        |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.03        |
|    explained_variance   | 0.565        |
|    learning_rate        | 0.0003       |
|    loss                 | 813          |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.0034      |
|    std                  | 1.12         |
|    value_loss           | 2.67e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 239         |
|    ep_rew_mean          | 288         |
| time/                   |             |
|    fps                  | 366         |
|    iterations           | 92          |
|    time_elapsed         | 513         |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.006668342 |
|    clip_fraction        | 0.0611      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.87       |
|    explained_variance   | 0.676       |
|    learning_rate        | 0.0003      |
|    loss                 | 403         |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.00518    |
|    std                  | 1.03        |
|    value_loss           | 723         |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 237          |
|    ep_rew_mean          | 390          |
| time/                   |              |
|    fps                  | 366          |
|    iterations           | 101          |
|    time_elapsed         | 564          |
|    total_timesteps      | 206848       |
| train/                  |              |
|    approx_kl            | 0.0057046283 |
|    clip_fraction        | 0.0406       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.78        |
|    explained_variance   | 0.47         |
|    learning_rate        | 0.0003       |
|    loss                 | 485          |
|    n_updates            | 1000         |
|    policy_gradient_loss | -0.00177     |
|    std                  | 0.984        |
|    value_loss           | 1.16e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 237         |
|    ep_rew_mean          | 459         |
| time/                   |             |
|    fps                  | 365         |
|    iterations           | 110         |
|    time_elapsed         | 615         |
|    total_timesteps      | 225280      |
| train/                  |             |
|    approx_kl            | 0.003623146 |
|    clip_fraction        | 0.0242      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.7        |
|    explained_variance   | 0.656       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.9e+03     |
|    n_updates            | 1090        |
|    policy_gradient_loss | -0.00329    |
|    std                  | 0.947       |
|    value_loss           | 3.65e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 239          |
|    ep_rew_mean          | 565          |
| time/                   |              |
|    fps                  | 363          |
|    iterations           | 119          |
|    time_elapsed         | 670          |
|    total_timesteps      | 243712       |
| train/                  |              |
|    approx_kl            | 0.0053914413 |
|    clip_fraction        | 0.0495       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.57        |
|    explained_variance   | 0.544        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.42e+03     |
|    n_updates            | 1180         |
|    policy_gradient_loss | -0.00625     |
|    std                  | 0.889        |
|    value_loss           | 3.37e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 238         |
|    ep_rew_mean          | 616         |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 128         |
|    time_elapsed         | 724         |
|    total_timesteps      | 262144      |
| train/                  |             |
|    approx_kl            | 0.007251516 |
|    clip_fraction        | 0.0543      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.48       |
|    explained_variance   | 0.716       |
|    learning_rate        | 0.0003      |
|    loss                 | 951         |
|    n_updates            | 1270        |
|    policy_gradient_loss | -0.00403    |
|    std                  | 0.85        |
|    value_loss           | 3.79e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 236         |
|    ep_rew_mean          | 632         |
| time/                   |             |
|    fps                  | 360         |
|    iterations           | 137         |
|    time_elapsed         | 778         |
|    total_timesteps      | 280576      |
| train/                  |             |
|    approx_kl            | 0.007810317 |
|    clip_fraction        | 0.0678      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.42       |
|    explained_variance   | 0.377       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.4e+03     |
|    n_updates            | 1360        |
|    policy_gradient_loss | -0.00438    |
|    std                  | 0.826       |
|    value_loss           | 2.12e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 709         |
| time/                   |             |
|    fps                  | 358         |
|    iterations           | 146         |
|    time_elapsed         | 834         |
|    total_timesteps      | 299008      |
| train/                  |             |
|    approx_kl            | 0.007836312 |
|    clip_fraction        | 0.079       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.29       |
|    explained_variance   | 0.35        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.19e+03    |
|    n_updates            | 1450        |
|    policy_gradient_loss | -0.00258    |
|    std                  | 0.775       |
|    value_loss           | 1.93e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 736         |
| time/                   |             |
|    fps                  | 356         |
|    iterations           | 155         |
|    time_elapsed         | 890         |
|    total_timesteps      | 317440      |
| train/                  |             |
|    approx_kl            | 0.008420177 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.21       |
|    explained_variance   | 0.393       |
|    learning_rate        | 0.0003      |
|    loss                 | 555         |
|    n_updates            | 1540        |
|    policy_gradient_loss | -0.00299    |
|    std                  | 0.75        |
|    value_loss           | 1.87e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 747         |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 164         |
|    time_elapsed         | 943         |
|    total_timesteps      | 335872      |
| train/                  |             |
|    approx_kl            | 0.007289159 |
|    clip_fraction        | 0.0761      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.21       |
|    explained_variance   | 0.503       |
|    learning_rate        | 0.0003      |
|    loss                 | 822         |
|    n_updates            | 1630        |
|    policy_gradient_loss | -0.00402    |
|    std                  | 0.754       |
|    value_loss           | 1.96e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 711         |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 173         |
|    time_elapsed         | 996         |
|    total_timesteps      | 354304      |
| train/                  |             |
|    approx_kl            | 0.009283798 |
|    clip_fraction        | 0.0856      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.16       |
|    explained_variance   | 0.465       |
|    learning_rate        | 0.0003      |
|    loss                 | 784         |
|    n_updates            | 1720        |
|    policy_gradient_loss | -0.00469    |
|    std                  | 0.725       |
|    value_loss           | 1.76e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 775         |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 182         |
|    time_elapsed         | 1048        |
|    total_timesteps      | 372736      |
| train/                  |             |
|    approx_kl            | 0.007214131 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.07       |
|    explained_variance   | 0.477       |
|    learning_rate        | 0.0003      |
|    loss                 | 748         |
|    n_updates            | 1810        |
|    policy_gradient_loss | -0.00537    |
|    std                  | 0.694       |
|    value_loss           | 2.12e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 818         |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 191         |
|    time_elapsed         | 1100        |
|    total_timesteps      | 391168      |
| train/                  |             |
|    approx_kl            | 0.008790467 |
|    clip_fraction        | 0.09        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.98       |
|    explained_variance   | 0.357       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.71e+03    |
|    n_updates            | 1900        |
|    policy_gradient_loss | -0.00259    |
|    std                  | 0.664       |
|    value_loss           | 2.72e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 240          |
|    ep_rew_mean          | 818          |
| time/                   |              |
|    fps                  | 354          |
|    iterations           | 200          |
|    time_elapsed         | 1155         |
|    total_timesteps      | 409600       |
| train/                  |              |
|    approx_kl            | 0.0075951396 |
|    clip_fraction        | 0.126        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | 0.415        |
|    learning_rate        | 0.0003       |
|    loss                 | 500          |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00563     |
|    std                  | 0.651        |
|    value_loss           | 2.04e+03     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 240          |
|    ep_rew_mean          | 816          |
| time/                   |              |
|    fps                  | 354          |
|    iterations           | 209          |
|    time_elapsed         | 1207         |
|    total_timesteps      | 428032       |
| train/                  |              |
|    approx_kl            | 0.0075134747 |
|    clip_fraction        | 0.0724       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.9         |
|    explained_variance   | 0.419        |
|    learning_rate        | 0.0003       |
|    loss                 | 589          |
|    n_updates            | 2080         |
|    policy_gradient_loss | -0.00237     |
|    std                  | 0.639        |
|    value_loss           | 2.41e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 844         |
| time/                   |             |
|    fps                  | 354         |
|    iterations           | 218         |
|    time_elapsed         | 1260        |
|    total_timesteps      | 446464      |
| train/                  |             |
|    approx_kl            | 0.007115772 |
|    clip_fraction        | 0.0706      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.84       |
|    explained_variance   | 0.4         |
|    learning_rate        | 0.0003      |
|    loss                 | 1.24e+03    |
|    n_updates            | 2170        |
|    policy_gradient_loss | -0.00223    |
|    std                  | 0.62        |
|    value_loss           | 2.49e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 853         |
| time/                   |             |
|    fps                  | 353         |
|    iterations           | 227         |
|    time_elapsed         | 1315        |
|    total_timesteps      | 464896      |
| train/                  |             |
|    approx_kl            | 0.010486128 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.398       |
|    learning_rate        | 0.0003      |
|    loss                 | 700         |
|    n_updates            | 2260        |
|    policy_gradient_loss | -0.00561    |
|    std                  | 0.597       |
|    value_loss           | 2.39e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 848         |
| time/                   |             |
|    fps                  | 352         |
|    iterations           | 236         |
|    time_elapsed         | 1370        |
|    total_timesteps      | 483328      |
| train/                  |             |
|    approx_kl            | 0.007465494 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.78       |
|    explained_variance   | 0.377       |
|    learning_rate        | 0.0003      |
|    loss                 | 993         |
|    n_updates            | 2350        |
|    policy_gradient_loss | -0.00571    |
|    std                  | 0.594       |
|    value_loss           | 2.64e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 857         |
| time/                   |             |
|    fps                  | 352         |
|    iterations           | 245         |
|    time_elapsed         | 1424        |
|    total_timesteps      | 501760      |
| train/                  |             |
|    approx_kl            | 0.008974627 |
|    clip_fraction        | 0.0921      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.389       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.15e+03    |
|    n_updates            | 2440        |
|    policy_gradient_loss | -0.00444    |
|    std                  | 0.591       |
|    value_loss           | 2.53e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 854         |
| time/                   |             |
|    fps                  | 351         |
|    iterations           | 254         |
|    time_elapsed         | 1480        |
|    total_timesteps      | 520192      |
| train/                  |             |
|    approx_kl            | 0.008830607 |
|    clip_fraction        | 0.13        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.7        |
|    explained_variance   | 0.395       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.52e+03    |
|    n_updates            | 2530        |
|    policy_gradient_loss | -0.00368    |
|    std                  | 0.569       |
|    value_loss           | 2.73e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 841         |
| time/                   |             |
|    fps                  | 349         |
|    iterations           | 263         |
|    time_elapsed         | 1542        |
|    total_timesteps      | 538624      |
| train/                  |             |
|    approx_kl            | 0.010252647 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.66       |
|    explained_variance   | 0.481       |
|    learning_rate        | 0.0003      |
|    loss                 | 618         |
|    n_updates            | 2620        |
|    policy_gradient_loss | -0.00402    |
|    std                  | 0.556       |
|    value_loss           | 1.86e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 851         |
| time/                   |             |
|    fps                  | 349         |
|    iterations           | 272         |
|    time_elapsed         | 1596        |
|    total_timesteps      | 557056      |
| train/                  |             |
|    approx_kl            | 0.009355132 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.65       |
|    explained_variance   | 0.319       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.45e+03    |
|    n_updates            | 2710        |
|    policy_gradient_loss | -0.00415    |
|    std                  | 0.552       |
|    value_loss           | 2.84e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 858         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 281         |
|    time_elapsed         | 1649        |
|    total_timesteps      | 575488      |
| train/                  |             |
|    approx_kl            | 0.009543221 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.59       |
|    explained_variance   | 0.26        |
|    learning_rate        | 0.0003      |
|    loss                 | 814         |
|    n_updates            | 2800        |
|    policy_gradient_loss | -0.00387    |
|    std                  | 0.537       |
|    value_loss           | 3.05e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 845         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 290         |
|    time_elapsed         | 1702        |
|    total_timesteps      | 593920      |
| train/                  |             |
|    approx_kl            | 0.010164628 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.59       |
|    explained_variance   | 0.365       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.73e+03    |
|    n_updates            | 2890        |
|    policy_gradient_loss | -0.00358    |
|    std                  | 0.535       |
|    value_loss           | 2.78e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 881         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 299         |
|    time_elapsed         | 1755        |
|    total_timesteps      | 612352      |
| train/                  |             |
|    approx_kl            | 0.010899505 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.63       |
|    explained_variance   | 0.362       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.28e+03    |
|    n_updates            | 2980        |
|    policy_gradient_loss | -0.00307    |
|    std                  | 0.551       |
|    value_loss           | 2.5e+03     |
-----------------------------------------
--------------------------------------
| rollout/                |          

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 908         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 308         |
|    time_elapsed         | 1809        |
|    total_timesteps      | 630784      |
| train/                  |             |
|    approx_kl            | 0.011762329 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.54       |
|    explained_variance   | 0.28        |
|    learning_rate        | 0.0003      |
|    loss                 | 428         |
|    n_updates            | 3070        |
|    policy_gradient_loss | 0.000162    |
|    std                  | 0.521       |
|    value_loss           | 2.57e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 880         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 317         |
|    time_elapsed         | 1863        |
|    total_timesteps      | 649216      |
| train/                  |             |
|    approx_kl            | 0.012602403 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.51       |
|    explained_variance   | 0.316       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.44e+03    |
|    n_updates            | 3160        |
|    policy_gradient_loss | -0.0039     |
|    std                  | 0.517       |
|    value_loss           | 3.6e+03     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 888         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 326         |
|    time_elapsed         | 1915        |
|    total_timesteps      | 667648      |
| train/                  |             |
|    approx_kl            | 0.011809302 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.47       |
|    explained_variance   | 0.31        |
|    learning_rate        | 0.0003      |
|    loss                 | 995         |
|    n_updates            | 3250        |
|    policy_gradient_loss | -0.00274    |
|    std                  | 0.507       |
|    value_loss           | 3.3e+03     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 240          |
|    ep_rew_mean          | 879          |
| time/                   |              |
|    fps                  | 348          |
|    iterations           | 335          |
|    time_elapsed         | 1967         |
|    total_timesteps      | 686080       |
| train/                  |              |
|    approx_kl            | 0.0075826594 |
|    clip_fraction        | 0.0993       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0.294        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.6e+03      |
|    n_updates            | 3340         |
|    policy_gradient_loss | -0.00146     |
|    std                  | 0.491        |
|    value_loss           | 2.83e+03     |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 848         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 344         |
|    time_elapsed         | 2022        |
|    total_timesteps      | 704512      |
| train/                  |             |
|    approx_kl            | 0.013499703 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | 0.323       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.49e+03    |
|    n_updates            | 3430        |
|    policy_gradient_loss | -0.00707    |
|    std                  | 0.474       |
|    value_loss           | 2.54e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 861         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 353         |
|    time_elapsed         | 2076        |
|    total_timesteps      | 722944      |
| train/                  |             |
|    approx_kl            | 0.011822827 |
|    clip_fraction        | 0.122       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.26       |
|    explained_variance   | 0.315       |
|    learning_rate        | 0.0003      |
|    loss                 | 765         |
|    n_updates            | 3520        |
|    policy_gradient_loss | 0.00067     |
|    std                  | 0.454       |
|    value_loss           | 3e+03       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 878         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 362         |
|    time_elapsed         | 2129        |
|    total_timesteps      | 741376      |
| train/                  |             |
|    approx_kl            | 0.008755861 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.19       |
|    explained_variance   | 0.29        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.1e+03     |
|    n_updates            | 3610        |
|    policy_gradient_loss | -0.00439    |
|    std                  | 0.444       |
|    value_loss           | 2.69e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 887         |
| time/                   |             |
|    fps                  | 347         |
|    iterations           | 371         |
|    time_elapsed         | 2183        |
|    total_timesteps      | 759808      |
| train/                  |             |
|    approx_kl            | 0.010446267 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.16       |
|    explained_variance   | 0.285       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.03e+03    |
|    n_updates            | 3700        |
|    policy_gradient_loss | -0.00579    |
|    std                  | 0.435       |
|    value_loss           | 3.52e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 922         |
| time/                   |             |
|    fps                  | 347         |
|    iterations           | 380         |
|    time_elapsed         | 2240        |
|    total_timesteps      | 778240      |
| train/                  |             |
|    approx_kl            | 0.010790566 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.11       |
|    explained_variance   | 0.288       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.53e+03    |
|    n_updates            | 3790        |
|    policy_gradient_loss | -0.00207    |
|    std                  | 0.427       |
|    value_loss           | 3.7e+03     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 940         |
| time/                   |             |
|    fps                  | 347         |
|    iterations           | 389         |
|    time_elapsed         | 2292        |
|    total_timesteps      | 796672      |
| train/                  |             |
|    approx_kl            | 0.007904313 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.386       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.12e+03    |
|    n_updates            | 3880        |
|    policy_gradient_loss | -0.00258    |
|    std                  | 0.42        |
|    value_loss           | 2.83e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 947         |
| time/                   |             |
|    fps                  | 347         |
|    iterations           | 398         |
|    time_elapsed         | 2342        |
|    total_timesteps      | 815104      |
| train/                  |             |
|    approx_kl            | 0.013903421 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.02       |
|    explained_variance   | 0.359       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.73e+03    |
|    n_updates            | 3970        |
|    policy_gradient_loss | -0.00312    |
|    std                  | 0.413       |
|    value_loss           | 2.7e+03     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 948         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 407         |
|    time_elapsed         | 2395        |
|    total_timesteps      | 833536      |
| train/                  |             |
|    approx_kl            | 0.010346816 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | 0.305       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.44e+03    |
|    n_updates            | 4060        |
|    policy_gradient_loss | -0.00429    |
|    std                  | 0.414       |
|    value_loss           | 3.62e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 238         |
|    ep_rew_mean          | 881         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 416         |
|    time_elapsed         | 2445        |
|    total_timesteps      | 851968      |
| train/                  |             |
|    approx_kl            | 0.010294322 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.951      |
|    explained_variance   | 0.507       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.44e+03    |
|    n_updates            | 4150        |
|    policy_gradient_loss | -0.00755    |
|    std                  | 0.393       |
|    value_loss           | 6.13e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 898         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 425         |
|    time_elapsed         | 2499        |
|    total_timesteps      | 870400      |
| train/                  |             |
|    approx_kl            | 0.013335604 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.888      |
|    explained_variance   | 0.335       |
|    learning_rate        | 0.0003      |
|    loss                 | 575         |
|    n_updates            | 4240        |
|    policy_gradient_loss | -0.00605    |
|    std                  | 0.385       |
|    value_loss           | 2.71e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 912         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 434         |
|    time_elapsed         | 2547        |
|    total_timesteps      | 888832      |
| train/                  |             |
|    approx_kl            | 0.008581354 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.82       |
|    explained_variance   | 0.289       |
|    learning_rate        | 0.0003      |
|    loss                 | 854         |
|    n_updates            | 4330        |
|    policy_gradient_loss | -0.00125    |
|    std                  | 0.372       |
|    value_loss           | 2.77e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 903         |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 443         |
|    time_elapsed         | 2600        |
|    total_timesteps      | 907264      |
| train/                  |             |
|    approx_kl            | 0.011102966 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.866      |
|    explained_variance   | 0.422       |
|    learning_rate        | 0.0003      |
|    loss                 | 851         |
|    n_updates            | 4420        |
|    policy_gradient_loss | -0.00282    |
|    std                  | 0.383       |
|    value_loss           | 2.78e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 918         |
| time/                   |             |
|    fps                  | 349         |
|    iterations           | 452         |
|    time_elapsed         | 2649        |
|    total_timesteps      | 925696      |
| train/                  |             |
|    approx_kl            | 0.015546444 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.84       |
|    explained_variance   | 0.271       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.47e+03    |
|    n_updates            | 4510        |
|    policy_gradient_loss | -0.00949    |
|    std                  | 0.374       |
|    value_loss           | 3.66e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 240         |
|    ep_rew_mean          | 932         |
| time/                   |             |
|    fps                  | 349         |
|    iterations           | 461         |
|    time_elapsed         | 2698        |
|    total_timesteps      | 944128      |
| train/                  |             |
|    approx_kl            | 0.012015989 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.815      |
|    explained_variance   | 0.194       |
|    learning_rate        | 0.0003      |
|    loss                 | 974         |
|    n_updates            | 4600        |
|    policy_gradient_loss | 0.000931    |
|    std                  | 0.371       |
|    value_loss           | 3.32e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 238          |
|    ep_rew_mean          | 937          |
| time/                   |              |
|    fps                  | 350          |
|    iterations           | 470          |
|    time_elapsed         | 2747         |
|    total_timesteps      | 962560       |
| train/                  |              |
|    approx_kl            | 0.0142571125 |
|    clip_fraction        | 0.164        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.795       |
|    explained_variance   | 0.275        |
|    learning_rate        | 0.0003       |
|    loss                 | 993          |
|    n_updates            | 4690         |
|    policy_gradient_loss | -0.000482    |
|    std                  | 0.366        |
|    value_loss           | 3.38e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 239         |
|    ep_rew_mean          | 953         |
| time/                   |             |
|    fps                  | 351         |
|    iterations           | 479         |
|    time_elapsed         | 2791        |
|    total_timesteps      | 980992      |
| train/                  |             |
|    approx_kl            | 0.012817298 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.773      |
|    explained_variance   | 0.296       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.75e+03    |
|    n_updates            | 4780        |
|    policy_gradient_loss | -0.00518    |
|    std                  | 0.363       |
|    value_loss           | 3.27e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 239         |
|    ep_rew_mean          | 915         |
| time/                   |             |
|    fps                  | 352         |
|    iterations           | 488         |
|    time_elapsed         | 2833        |
|    total_timesteps      | 999424      |
| train/                  |             |
|    approx_kl            | 0.012937615 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.781      |
|    explained_variance   | 0.359       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.3e+03     |
|    n_updates            | 4870        |
|    policy_gradient_loss | -0.0022     |
|    std                  | 0.367       |
|    value_loss           | 3.36e+03    |
-----------------------------------------


OSError: [WinError 1314] A required privilege is not held by the client: 'C:\\Users\\AVISH\\models\\5kda4cwh\\model.zip' -> 'C:\\Users\\AVISH\\wandb\\run-20231209_172614-5kda4cwh\\files\\model.zip'

## TEST TRAINED AGENT IN THE ENVIRONMENT

In [10]:
import torch as th
from stable_baselines3 import PPO
MODEL_PATH =r"C:\Users\AVISH\tmp\ppo_model_v1_1000000_steps.zip"

# Create and wrap the environment
env = droneEnv(True, False)

# Load the trained agent
model = PPO.load(MODEL_PATH, env=env)

# Evaluate the agent
for i in range(5):
    obs = env.reset()
    done = False
    episode_reward = 0
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        episode_reward += reward
    print("Episode reward", episode_reward)
    env.render("yes")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Episode reward 982.158490281774
Episode reward 1087.0984967706584
Episode reward 882.398535159258
Episode reward 781.6807484924246
Episode reward 777.6349398393277
