In [1]:
import pybullet as p
import pybullet_data
import time
import numpy as np
import gymnasium as gym
from gymnasium import spaces

from stable_baselines3 import PPO # Or A2C, DQN for discrete actions
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv
import time
from stable_baselines3.common.callbacks import ProgressBarCallback
from stable_baselines3.common.vec_env import SubprocVecEnv


pybullet build time: Jan 29 2025 23:16:28


In [2]:
class MazeCarEnv(gym.Env):
    metadata = {'render_modes': ['human'], "render_fps": 30}

    def __init__(self, render_mode=None):
        super().__init__()

        # --- Define Action Space ---
        # Hier exemplarisch: 2 diskrete Aktionen (links/rechts drehen + vorrücken)
        self.action_space = spaces.Discrete(2)  # 0: links, 1: rechts
        
        # --- Define Observation Space ---
        # Beobachtung: [Auto_x, Auto_y, Auto_yaw, Ziel_x, Ziel_y]
        low = np.array([-6, -6, -np.pi, -6, -6], dtype=np.float32)
        high = np.array([6, 6, np.pi, 6, 6], dtype=np.float32)
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        # --- PyBullet Setup ---
        self.render_mode = render_mode
        self.client = p.connect(p.DIRECT if render_mode is None else p.GUI)
        p.setAdditionalSearchPath(pybullet_data.getDataPath())
        p.setGravity(0, 0, -9.81, physicsClientId=self.client)

        # Lade eine Plane als Boden
        self.planeId = p.loadURDF("plane.urdf", physicsClientId=self.client)

        # Maze als URDF laden (dieses sollte z. B. als "maze.urdf" im Arbeitsverzeichnis liegen)
        # Die Option useFixedBase=True sorgt dafür, dass das Maze statisch bleibt.
        self.mazeId = p.loadURDF("urdf/maze.urdf",
                                 basePosition=[0, 0, 0],
                                 useFixedBase=True,
                                 physicsClientId=self.client)

        # Ziele definieren (2 verschiedene, beispielhaft)
        # Passe sie an die tatsächlichen Gänge/Ausgänge in deinem Maze an.
        self.goal_area_1 = np.array([4.5, 4.5])   # oberer Ausgang
        self.goal_area_2 = np.array([4.5, -4.5])  # unterer Ausgang
        self.goal_radius = 0.5
        self.target_goal_pos = None  # Wird in reset() gesetzt
        self.correct_goal_index = -1

        # Falls im GUI-Modus: Visualisiere die Ziele als farbige Kugeln
        if p.getConnectionInfo()['connectionMethod'] == p.GUI:
            goal_visual_shape_1 = p.createVisualShape(
                p.GEOM_SPHERE,
                radius=self.goal_radius,
                rgbaColor=[0, 1, 0, 0.5]  # halbdurchsichtig grün
            )
            goal_visual_shape_2 = p.createVisualShape(
                p.GEOM_SPHERE,
                radius=self.goal_radius,
                rgbaColor=[1, 0, 0, 0.5]  # halbdurchsichtig rot
            )
            p.createMultiBody(baseVisualShapeIndex=goal_visual_shape_1,
                              basePosition=[self.goal_area_1[0], self.goal_area_1[1], 0.1])
            p.createMultiBody(baseVisualShapeIndex=goal_visual_shape_2,
                              basePosition=[self.goal_area_2[0], self.goal_area_2[1], 0.1])

        # Roboter (Auto) laden
        # Passe ggf. den Pfad an, wenn das URDF woanders liegt
        self.start_pos = [-4.5, 0.0, 0.1]  # Start nahe der linken Seite des Maze
        self.start_orn = p.getQuaternionFromEuler([0, 0, 0])
        self.carId = p.loadURDF("urdf/simple_two_wheel_car.urdf",
                                self.start_pos, self.start_orn,
                                physicsClientId=self.client)
        
        # IDs der Räder (müssen zu deinem Roboter-URDF passen)
        self.left_wheel_joint_index = 1
        self.right_wheel_joint_index = 0

        self.step_counter = 0
        self.max_steps_per_episode = 500  # z. B. mehr Schritte erlauben

        self.action_repeat = 25

    def _get_obs(self):
        pos, orn_quat = p.getBasePositionAndOrientation(self.carId, physicsClientId=self.client)
        euler = p.getEulerFromQuaternion(orn_quat)
        yaw = euler[2]
        return np.array([pos[0], pos[1], yaw,
                         self.target_goal_pos[0],
                         self.target_goal_pos[1]], dtype=np.float32)

    def _get_info(self):
        car_pos, _ = p.getBasePositionAndOrientation(self.carId, physicsClientId=self.client)
        dist_goal1 = np.linalg.norm(np.array(car_pos[:2]) - self.goal_area_1)
        dist_goal2 = np.linalg.norm(np.array(car_pos[:2]) - self.goal_area_2)
        return {
            "distance_goal1": dist_goal1,
            "distance_goal2": dist_goal2,
            "target_goal_index": self.correct_goal_index
        }

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.step_counter = 0

        # Auto zurücksetzen
        p.resetBasePositionAndOrientation(self.carId, self.start_pos, self.start_orn, physicsClientId=self.client)
        p.resetBaseVelocity(self.carId,
                            linearVelocity=[0, 0, 0],
                            angularVelocity=[0, 0, 0],
                            physicsClientId=self.client)

        # Zufällige Wahl des "richtigen" Ziels (oder fest vorgegeben)
        # self.correct_goal_index = self.np_random.integers(0, 2)
        self.correct_goal_index = 0  # zum Testen immer Goal 1
        if self.correct_goal_index == 0:
            self.target_goal_pos = self.goal_area_1
        else:
            self.target_goal_pos = self.goal_area_2

        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()
            # time.sleep(0.02)

        return observation, info

    def step(self, action):

        if action == 0:
            left_vel, right_vel = 10.0, 10.0
        elif action == 1:
            left_vel, right_vel = 2.0, 10.0
        else:  # action == 2
            left_vel, right_vel = 10.0, 2.0

        # Setze die Motoren der beiden Räder
        p.setJointMotorControl2(
            bodyUniqueId=self.carId,
            jointIndex=self.left_wheel_joint_index,
            controlMode=p.VELOCITY_CONTROL,
            targetVelocity=left_vel,
            force=20.0
        )
        p.setJointMotorControl2(
            bodyUniqueId=self.carId,
            jointIndex=self.right_wheel_joint_index,
            controlMode=p.VELOCITY_CONTROL,
            targetVelocity=right_vel,
            force=20.0
        )


        # print(f"Action: {action}, Velocities: L={left_vel}, R={right_vel}, Step={self.step_counter}")
        # car_pos, _ = p.getBasePositionAndOrientation(self.carId, physicsClientId=self.client)
        # print(f"Position: {car_pos}")


        # Jetzt die Simulation mehrmals updaten,
        # damit der Roboter tatsächlich fährt und gegen Wände kollidieren kann
        for _ in range(self.action_repeat):
            p.stepSimulation()
            # if self.render_mode == "human":
            #     time.sleep(0.001) 
            #     print("Step Simulation")
        
        self.step_counter += 1

        # Beobachtung + Reward + Done bestimmen
        observation = self._get_obs()
        info = self._get_info()

        # Check goal conditions
        car_pos, _ = p.getBasePositionAndOrientation(self.carId, physicsClientId=self.client)
        in_goal_1 = np.linalg.norm(car_pos[:2] - self.goal_area_1) < self.goal_radius
        in_goal_2 = np.linalg.norm(car_pos[:2] - self.goal_area_2) < self.goal_radius

        terminated = False
        reward = -0.01  # kleiner Schritt-Penalty

        if in_goal_1:
            if self.correct_goal_index == 0:
                reward = 10.0
                terminated = True
            else:
                reward = -5.0
                terminated = True
        elif in_goal_2:
            if self.correct_goal_index == 1:
                reward = 10.0
                terminated = True
            else:
                reward = -5.0
                terminated = True

        truncated = (self.step_counter >= self.max_steps_per_episode)

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, terminated, truncated, info

    def render(self):
        # Bei PyBullet im GUI-Modus passiert das Rendering automatisch
        pass

    def _render_frame(self):
        # Debug-Anzeigen, falls erwünscht
        pass

    def close(self):
        p.disconnect(physicsClientId=self.client)


In [3]:
def make_env():
    def _init():
        return MazeCarEnv(render_mode=None)  # or "human" for visualization
    return _init


In [4]:
num_envs = 4
env = SubprocVecEnv([make_env() for _ in range(num_envs)])
# env = MazeCarEnv(render_mode=None) 
# env = MazeCarEnv(render_mode="human")

# try:
#     check_env(env)
#     print("Environment check passed!")
# except Exception as e:
#     print(f"Environment check failed: {e}")
#     env.close()
#     exit()

# model = SAC(
#     "MlpPolicy", 
#     env, 
#     verbose=1, 
#     tensorboard_log="./sac_mazecar_tensorboard/",
#     batch_size=64,
#     learning_rate=0.0003,
#     train_freq=1,
#     gradient_steps=1,
#     buffer_size=1000000,
#     tau=0.005,
#     gamma=0.99
# )

n_steps_calc = num_envs * 512

model = PPO("MlpPolicy", 
            env, 
            verbose=1, 
            tensorboard_log="./ppo_mazecar_tensorboard/",
            batch_size=256, # after n_steps the data is split into batches
            learning_rate=0.0003,
            n_steps=512,   # number of steps collected before a training
            device="cuda"
)


pybullet build time: Jan 29 2025 23:16:28
pybullet build time: Jan 29 2025 23:16:28
pybullet build time: Jan 29 2025 23:16:28
pybullet build time: Jan 29 2025 23:16:28


Using cuda device




In [None]:
# # Train the model
# check_env = DummyVecEnv([lambda: MazeCarEnv(render_mode="human")])
# check_callback = EvalCallback(
#     check_env, 
#     best_model_save_path='./logs/',
#     log_path='./logs/', 
#     eval_freq=100,
#     deterministic=True, 
#     render=True
# )


model.learn(total_timesteps=200000, progress_bar=True)
# model.learn(total_timesteps=5000)

model.save("ppo_mazecar_model_2_multi")
env.close()
# check_env.close()

Logging to ./ppo_mazecar_tensorboard/PPO_55


Output()

-----------------------------
| time/              |      |
|    fps             | 962  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1021         |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0014756119 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.692       |
|    explained_variance   | -36.3        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0145      |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00103     |
|    value_loss           | 0.00341      |
------------------------------------------


----------------------------------------
| time/                   |            |
|    fps                  | 1052       |
|    iterations           | 3          |
|    time_elapsed         | 5          |
|    total_timesteps      | 6144       |
| train/                  |            |
|    approx_kl            | 0.01787328 |
|    clip_fraction        | 0.0927     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.684     |
|    explained_variance   | -0.311     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0131    |
|    n_updates            | 20         |
|    policy_gradient_loss | -0.0052    |
|    value_loss           | 0.000293   |
----------------------------------------


----------------------------------------
| time/                   |            |
|    fps                  | 1082       |
|    iterations           | 4          |
|    time_elapsed         | 7          |
|    total_timesteps      | 8192       |
| train/                  |            |
|    approx_kl            | 0.01066561 |
|    clip_fraction        | 0.0198     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.66      |
|    explained_variance   | 0.0456     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00683   |
|    n_updates            | 30         |
|    policy_gradient_loss | -0.00155   |
|    value_loss           | 0.000203   |
----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1064        |
|    iterations           | 5           |
|    time_elapsed         | 9           |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.010316574 |
|    clip_fraction        | 0.0368      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.605      |
|    explained_variance   | -0.104      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00179    |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.00191    |
|    value_loss           | 0.000133    |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1079        |
|    iterations           | 6           |
|    time_elapsed         | 11          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.008506973 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.541      |
|    explained_variance   | 0.364       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0119     |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.00663    |
|    value_loss           | 0.000106    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1092         |
|    iterations           | 7            |
|    time_elapsed         | 13           |
|    total_timesteps      | 14336        |
| train/                  |              |
|    approx_kl            | 0.0046001542 |
|    clip_fraction        | 0.0128       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.557       |
|    explained_variance   | 0.616        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00217      |
|    n_updates            | 60           |
|    policy_gradient_loss | -0.000839    |
|    value_loss           | 7.81e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1079         |
|    iterations           | 8            |
|    time_elapsed         | 15           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0065085446 |
|    clip_fraction        | 0.0234       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.532       |
|    explained_variance   | 0.0668       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00988      |
|    n_updates            | 70           |
|    policy_gradient_loss | -0.00142     |
|    value_loss           | 5.21e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1073         |
|    iterations           | 9            |
|    time_elapsed         | 17           |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0053816065 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.475       |
|    explained_variance   | 0.484        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00982     |
|    n_updates            | 80           |
|    policy_gradient_loss | -0.000857    |
|    value_loss           | 3.36e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1043         |
|    iterations           | 10           |
|    time_elapsed         | 19           |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 0.0044112517 |
|    clip_fraction        | 0.0148       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.418       |
|    explained_variance   | 0.768        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00849     |
|    n_updates            | 90           |
|    policy_gradient_loss | -0.000719    |
|    value_loss           | 3.2e-05      |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1024         |
|    iterations           | 11           |
|    time_elapsed         | 21           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0023014974 |
|    clip_fraction        | 0.0164       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.432       |
|    explained_variance   | 0.743        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00191     |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.000689    |
|    value_loss           | 2.15e-05     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1020        |
|    iterations           | 12          |
|    time_elapsed         | 24          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.004246147 |
|    clip_fraction        | 0.0132      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.396      |
|    explained_variance   | 0.683       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00303     |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00156    |
|    value_loss           | 1.74e-05    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1013         |
|    iterations           | 13           |
|    time_elapsed         | 26           |
|    total_timesteps      | 26624        |
| train/                  |              |
|    approx_kl            | 0.0027235553 |
|    clip_fraction        | 0.0347       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.34        |
|    explained_variance   | 0.816        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00248      |
|    n_updates            | 120          |
|    policy_gradient_loss | -0.00331     |
|    value_loss           | 3.64e-05     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1020        |
|    iterations           | 14          |
|    time_elapsed         | 28          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.005043137 |
|    clip_fraction        | 0.07        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.288      |
|    explained_variance   | 0.93        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00626    |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.00627    |
|    value_loss           | 0.000146    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1020         |
|    iterations           | 15           |
|    time_elapsed         | 30           |
|    total_timesteps      | 30720        |
| train/                  |              |
|    approx_kl            | 0.0014453018 |
|    clip_fraction        | 0.00718      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.288       |
|    explained_variance   | 0.95         |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00293     |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.000295    |
|    value_loss           | 0.000253     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1011         |
|    iterations           | 16           |
|    time_elapsed         | 32           |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0019088538 |
|    clip_fraction        | 0.0142       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.262       |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.000762     |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.0035      |
|    value_loss           | 0.000271     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1008         |
|    iterations           | 17           |
|    time_elapsed         | 34           |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0023215448 |
|    clip_fraction        | 0.0407       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.231       |
|    explained_variance   | 0.95         |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00756     |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.00369     |
|    value_loss           | 0.000237     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1019         |
|    iterations           | 18           |
|    time_elapsed         | 36           |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 0.0021753083 |
|    clip_fraction        | 0.0291       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.214       |
|    explained_variance   | 0.934        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00169     |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.00257     |
|    value_loss           | 0.000223     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1023         |
|    iterations           | 19           |
|    time_elapsed         | 38           |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 0.0026616915 |
|    clip_fraction        | 0.0288       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.193       |
|    explained_variance   | 0.917        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00716     |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.00242     |
|    value_loss           | 0.000344     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1025         |
|    iterations           | 20           |
|    time_elapsed         | 39           |
|    total_timesteps      | 40960        |
| train/                  |              |
|    approx_kl            | 0.0006614199 |
|    clip_fraction        | 0.00596      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.217       |
|    explained_variance   | 0.817        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00386     |
|    n_updates            | 190          |
|    policy_gradient_loss | -0.000682    |
|    value_loss           | 0.000351     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1030         |
|    iterations           | 21           |
|    time_elapsed         | 41           |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0014183712 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.259       |
|    explained_variance   | 0.907        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00796     |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00249     |
|    value_loss           | 0.000383     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1032         |
|    iterations           | 22           |
|    time_elapsed         | 43           |
|    total_timesteps      | 45056        |
| train/                  |              |
|    approx_kl            | 0.0007798475 |
|    clip_fraction        | 0.000244     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.264       |
|    explained_variance   | 0.892        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00342      |
|    n_updates            | 210          |
|    policy_gradient_loss | -3.35e-05    |
|    value_loss           | 0.000166     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1037          |
|    iterations           | 23            |
|    time_elapsed         | 45            |
|    total_timesteps      | 47104         |
| train/                  |               |
|    approx_kl            | 0.00040049473 |
|    clip_fraction        | 0.000635      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.252        |
|    explained_variance   | 0.937         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.000208      |
|    n_updates            | 220           |
|    policy_gradient_loss | -0.000221     |
|    value_loss           | 0.000211      |
-------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1042        |
|    iterations           | 24          |
|    time_elapsed         | 47          |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.012039823 |
|    clip_fraction        | 0.0139      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.332      |
|    explained_variance   | 0.927       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0151     |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.00251    |
|    value_loss           | 0.000252    |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1050        |
|    iterations           | 25          |
|    time_elapsed         | 48          |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.003980278 |
|    clip_fraction        | 0.0386      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.375      |
|    explained_variance   | 0.942       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.000646   |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.000841   |
|    value_loss           | 0.000152    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1056         |
|    iterations           | 26           |
|    time_elapsed         | 50           |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0050045205 |
|    clip_fraction        | 0.0743       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.431       |
|    explained_variance   | 0.923        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.000237     |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00453     |
|    value_loss           | 0.000166     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1060        |
|    iterations           | 27          |
|    time_elapsed         | 52          |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.002067421 |
|    clip_fraction        | 0.0211      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.408      |
|    explained_variance   | 0.946       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00246    |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.00186    |
|    value_loss           | 7.96e-05    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1065         |
|    iterations           | 28           |
|    time_elapsed         | 53           |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 0.0022704203 |
|    clip_fraction        | 0.00977      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.396       |
|    explained_variance   | 0.88         |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00158     |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.00198     |
|    value_loss           | 8.24e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1072         |
|    iterations           | 29           |
|    time_elapsed         | 55           |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0019939332 |
|    clip_fraction        | 0.00757      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.37        |
|    explained_variance   | 0.957        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.001        |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.00184     |
|    value_loss           | 3.98e-05     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1080        |
|    iterations           | 30          |
|    time_elapsed         | 56          |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.005262019 |
|    clip_fraction        | 0.057       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.436      |
|    explained_variance   | 0.941       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0141     |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0027     |
|    value_loss           | 2.81e-05    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1083         |
|    iterations           | 31           |
|    time_elapsed         | 58           |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0021017604 |
|    clip_fraction        | 0.0111       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.407       |
|    explained_variance   | 0.927        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00132     |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.000773    |
|    value_loss           | 2.94e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1085         |
|    iterations           | 32           |
|    time_elapsed         | 60           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0017574168 |
|    clip_fraction        | 0.00937      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.369       |
|    explained_variance   | 0.963        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00971     |
|    n_updates            | 310          |
|    policy_gradient_loss | -0.0023      |
|    value_loss           | 1.6e-05      |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1089        |
|    iterations           | 33          |
|    time_elapsed         | 62          |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.003792473 |
|    clip_fraction        | 0.0236      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.419      |
|    explained_variance   | 0.888       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00911    |
|    n_updates            | 320         |
|    policy_gradient_loss | -0.002      |
|    value_loss           | 1.92e-05    |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1089        |
|    iterations           | 34          |
|    time_elapsed         | 63          |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.005453941 |
|    clip_fraction        | 0.0491      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.381      |
|    explained_variance   | 0.962       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0131     |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.00371    |
|    value_loss           | 1.09e-05    |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1093        |
|    iterations           | 35          |
|    time_elapsed         | 65          |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.025844546 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.345      |
|    explained_variance   | 0.0689      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0311     |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.00858    |
|    value_loss           | 0.000145    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1096         |
|    iterations           | 36           |
|    time_elapsed         | 67           |
|    total_timesteps      | 73728        |
| train/                  |              |
|    approx_kl            | 0.0018972439 |
|    clip_fraction        | 0.0061       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.385       |
|    explained_variance   | 0.91         |
|    learning_rate        | 0.0003       |
|    loss                 | -9.01e-05    |
|    n_updates            | 350          |
|    policy_gradient_loss | -0.00085     |
|    value_loss           | 1.34e-05     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1096        |
|    iterations           | 37          |
|    time_elapsed         | 69          |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.002929945 |
|    clip_fraction        | 0.0153      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.433      |
|    explained_variance   | 0.946       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00544    |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.00266    |
|    value_loss           | 5.79e-06    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1098         |
|    iterations           | 38           |
|    time_elapsed         | 70           |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0022063837 |
|    clip_fraction        | 0.0336       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.448       |
|    explained_variance   | 0.947        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00405     |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.00274     |
|    value_loss           | 3.48e-06     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1103        |
|    iterations           | 39          |
|    time_elapsed         | 72          |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.004133329 |
|    clip_fraction        | 0.0348      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.406      |
|    explained_variance   | 0.958       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00369    |
|    n_updates            | 380         |
|    policy_gradient_loss | -0.00166    |
|    value_loss           | 3.41e-06    |
-----------------------------------------


----------------------------------------
| time/                   |            |
|    fps                  | 1105       |
|    iterations           | 40         |
|    time_elapsed         | 74         |
|    total_timesteps      | 81920      |
| train/                  |            |
|    approx_kl            | 0.00418411 |
|    clip_fraction        | 0.0258     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.462     |
|    explained_variance   | 0.899      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00164   |
|    n_updates            | 390        |
|    policy_gradient_loss | -0.00209   |
|    value_loss           | 3.16e-06   |
----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1108         |
|    iterations           | 41           |
|    time_elapsed         | 75           |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0031442954 |
|    clip_fraction        | 0.00771      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.454       |
|    explained_variance   | 0.888        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.000828     |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.00124     |
|    value_loss           | 2.62e-06     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1111        |
|    iterations           | 42          |
|    time_elapsed         | 77          |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.005012474 |
|    clip_fraction        | 0.045       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.449      |
|    explained_variance   | 0.938       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00488    |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.000904   |
|    value_loss           | 1.7e-06     |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1112         |
|    iterations           | 43           |
|    time_elapsed         | 79           |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0032263468 |
|    clip_fraction        | 0.0309       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.459       |
|    explained_variance   | 0.976        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00238     |
|    n_updates            | 420          |
|    policy_gradient_loss | -0.00213     |
|    value_loss           | 1.34e-06     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1109         |
|    iterations           | 44           |
|    time_elapsed         | 81           |
|    total_timesteps      | 90112        |
| train/                  |              |
|    approx_kl            | 0.0026493426 |
|    clip_fraction        | 0.00864      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.39        |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00748      |
|    n_updates            | 430          |
|    policy_gradient_loss | -0.000789    |
|    value_loss           | 1.27e-06     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1108        |
|    iterations           | 45          |
|    time_elapsed         | 83          |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.015413116 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.454      |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0203     |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.012      |
|    value_loss           | 9.42e-07    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1103         |
|    iterations           | 46           |
|    time_elapsed         | 85           |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 0.0027971081 |
|    clip_fraction        | 0.0347       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.391       |
|    explained_variance   | 0.974        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00232     |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.00263     |
|    value_loss           | 1.23e-06     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1105         |
|    iterations           | 47           |
|    time_elapsed         | 87           |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0061643883 |
|    clip_fraction        | 0.0292       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.327       |
|    explained_variance   | 0.774        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00183     |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.00165     |
|    value_loss           | 2.73e-06     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1106        |
|    iterations           | 48          |
|    time_elapsed         | 88          |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.007144546 |
|    clip_fraction        | 0.0462      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.392      |
|    explained_variance   | 0.0108      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.513       |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.000618   |
|    value_loss           | 1.04        |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1107         |
|    iterations           | 49           |
|    time_elapsed         | 90           |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0037020831 |
|    clip_fraction        | 0.0236       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.315       |
|    explained_variance   | 0.665        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0066       |
|    n_updates            | 480          |
|    policy_gradient_loss | -0.00129     |
|    value_loss           | 0.0161       |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1110        |
|    iterations           | 50          |
|    time_elapsed         | 92          |
|    total_timesteps      | 102400      |
| train/                  |             |
|    approx_kl            | 0.011456323 |
|    clip_fraction        | 0.0756      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.402      |
|    explained_variance   | 0.49        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.141       |
|    n_updates            | 490         |
|    policy_gradient_loss | -0.00623    |
|    value_loss           | 0.222       |
-----------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1112        |
|    iterations           | 51          |
|    time_elapsed         | 93          |
|    total_timesteps      | 104448      |
| train/                  |             |
|    approx_kl            | 0.004652575 |
|    clip_fraction        | 0.0796      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.404      |
|    explained_variance   | 0.657       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00798    |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.00347    |
|    value_loss           | 0.000809    |
-----------------------------------------


----------------------------------------
| time/                   |            |
|    fps                  | 1111       |
|    iterations           | 52         |
|    time_elapsed         | 95         |
|    total_timesteps      | 106496     |
| train/                  |            |
|    approx_kl            | 0.00745029 |
|    clip_fraction        | 0.182      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.325     |
|    explained_variance   | 0.667      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00104   |
|    n_updates            | 510        |
|    policy_gradient_loss | -0.00542   |
|    value_loss           | 0.000273   |
----------------------------------------


----------------------------------------
| time/                   |            |
|    fps                  | 1112       |
|    iterations           | 53         |
|    time_elapsed         | 97         |
|    total_timesteps      | 108544     |
| train/                  |            |
|    approx_kl            | 0.12725887 |
|    clip_fraction        | 0.285      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.151     |
|    explained_variance   | 0.884      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0207    |
|    n_updates            | 520        |
|    policy_gradient_loss | -0.00773   |
|    value_loss           | 0.00013    |
----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1113         |
|    iterations           | 54           |
|    time_elapsed         | 99           |
|    total_timesteps      | 110592       |
| train/                  |              |
|    approx_kl            | 0.0005826403 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0928      |
|    explained_variance   | 0.868        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00132     |
|    n_updates            | 530          |
|    policy_gradient_loss | -0.000125    |
|    value_loss           | 4.49e-05     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1113          |
|    iterations           | 55            |
|    time_elapsed         | 101           |
|    total_timesteps      | 112640        |
| train/                  |               |
|    approx_kl            | 5.2176445e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0865       |
|    explained_variance   | 0.906         |
|    learning_rate        | 0.0003        |
|    loss                 | 7.28e-05      |
|    n_updates            | 540           |
|    policy_gradient_loss | 3.69e-05      |
|    value_loss           | 2.3e-05       |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1114          |
|    iterations           | 56            |
|    time_elapsed         | 102           |
|    total_timesteps      | 114688        |
| train/                  |               |
|    approx_kl            | 0.00052700995 |
|    clip_fraction        | 0.00166       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0839       |
|    explained_variance   | 0.847         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.000408      |
|    n_updates            | 550           |
|    policy_gradient_loss | -3.56e-05     |
|    value_loss           | 1.28e-05      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1112         |
|    iterations           | 57           |
|    time_elapsed         | 104          |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 0.0020969433 |
|    clip_fraction        | 0.00991      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0535      |
|    explained_variance   | 0.901        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.000799    |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.000603    |
|    value_loss           | 2.96e-05     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1111          |
|    iterations           | 58            |
|    time_elapsed         | 106           |
|    total_timesteps      | 118784        |
| train/                  |               |
|    approx_kl            | 0.00019942067 |
|    clip_fraction        | 0.00601       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0372       |
|    explained_variance   | 0.907         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000268     |
|    n_updates            | 570           |
|    policy_gradient_loss | -0.000322     |
|    value_loss           | 2.56e-05      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1110          |
|    iterations           | 59            |
|    time_elapsed         | 108           |
|    total_timesteps      | 120832        |
| train/                  |               |
|    approx_kl            | 4.5458437e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.034        |
|    explained_variance   | 0.911         |
|    learning_rate        | 0.0003        |
|    loss                 | -7.63e-05     |
|    n_updates            | 580           |
|    policy_gradient_loss | -5.87e-05     |
|    value_loss           | 0.000103      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1111          |
|    iterations           | 60            |
|    time_elapsed         | 110           |
|    total_timesteps      | 122880        |
| train/                  |               |
|    approx_kl            | 8.5464126e-05 |
|    clip_fraction        | 4.88e-05      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0638       |
|    explained_variance   | 0.843         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.00513       |
|    n_updates            | 590           |
|    policy_gradient_loss | 0.000231      |
|    value_loss           | 0.00662       |
-------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1113        |
|    iterations           | 61          |
|    time_elapsed         | 112         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 7.55619e-05 |
|    clip_fraction        | 0.000684    |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0304     |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000125    |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.000106   |
|    value_loss           | 0.000102    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1108         |
|    iterations           | 62           |
|    time_elapsed         | 114          |
|    total_timesteps      | 126976       |
| train/                  |              |
|    approx_kl            | 0.0007494667 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.076       |
|    explained_variance   | 0.92         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0019       |
|    n_updates            | 610          |
|    policy_gradient_loss | -0.00109     |
|    value_loss           | 0.0024       |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1106          |
|    iterations           | 63            |
|    time_elapsed         | 116           |
|    total_timesteps      | 129024        |
| train/                  |               |
|    approx_kl            | 0.00063946104 |
|    clip_fraction        | 0.00835       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0602       |
|    explained_variance   | 0.941         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.00801      |
|    n_updates            | 620           |
|    policy_gradient_loss | -0.00282      |
|    value_loss           | 0.00139       |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1103          |
|    iterations           | 64            |
|    time_elapsed         | 118           |
|    total_timesteps      | 131072        |
| train/                  |               |
|    approx_kl            | 0.00017652605 |
|    clip_fraction        | 0.00293       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0522       |
|    explained_variance   | 0.769         |
|    learning_rate        | 0.0003        |
|    loss                 | -3.35e-05     |
|    n_updates            | 630           |
|    policy_gradient_loss | -0.000111     |
|    value_loss           | 9.18e-06      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1102          |
|    iterations           | 65            |
|    time_elapsed         | 120           |
|    total_timesteps      | 133120        |
| train/                  |               |
|    approx_kl            | 0.00013152446 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0511       |
|    explained_variance   | 0.755         |
|    learning_rate        | 0.0003        |
|    loss                 | -5.72e-05     |
|    n_updates            | 640           |
|    policy_gradient_loss | -4.17e-06     |
|    value_loss           | 6.89e-06      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1101          |
|    iterations           | 66            |
|    time_elapsed         | 122           |
|    total_timesteps      | 135168        |
| train/                  |               |
|    approx_kl            | 0.00015817911 |
|    clip_fraction        | 0.00142       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0429       |
|    explained_variance   | 0.792         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000147     |
|    n_updates            | 650           |
|    policy_gradient_loss | -4.92e-05     |
|    value_loss           | 8.08e-06      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1100          |
|    iterations           | 67            |
|    time_elapsed         | 124           |
|    total_timesteps      | 137216        |
| train/                  |               |
|    approx_kl            | 0.00015969097 |
|    clip_fraction        | 0.00356       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.045        |
|    explained_variance   | 0.813         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.000672      |
|    n_updates            | 660           |
|    policy_gradient_loss | -7.06e-05     |
|    value_loss           | 7.43e-06      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1103         |
|    iterations           | 68           |
|    time_elapsed         | 126          |
|    total_timesteps      | 139264       |
| train/                  |              |
|    approx_kl            | 7.568422e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0446      |
|    explained_variance   | 0.887        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.000212    |
|    n_updates            | 670          |
|    policy_gradient_loss | -5.09e-06    |
|    value_loss           | 7.12e-06     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1102         |
|    iterations           | 69           |
|    time_elapsed         | 128          |
|    total_timesteps      | 141312       |
| train/                  |              |
|    approx_kl            | 3.672071e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0408      |
|    explained_variance   | 0.897        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00033      |
|    n_updates            | 680          |
|    policy_gradient_loss | 7.77e-07     |
|    value_loss           | 1e-05        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1102          |
|    iterations           | 70            |
|    time_elapsed         | 129           |
|    total_timesteps      | 143360        |
| train/                  |               |
|    approx_kl            | 0.00087810087 |
|    clip_fraction        | 0.00439       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0514       |
|    explained_variance   | 0.939         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000946     |
|    n_updates            | 690           |
|    policy_gradient_loss | -0.000359     |
|    value_loss           | 1.7e-05       |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1104         |
|    iterations           | 71           |
|    time_elapsed         | 131          |
|    total_timesteps      | 145408       |
| train/                  |              |
|    approx_kl            | 0.0002248002 |
|    clip_fraction        | 0.00425      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0692      |
|    explained_variance   | 0.936        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00124      |
|    n_updates            | 700          |
|    policy_gradient_loss | -7.16e-05    |
|    value_loss           | 7.24e-06     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1107          |
|    iterations           | 72            |
|    time_elapsed         | 133           |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 0.00013442355 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0765       |
|    explained_variance   | 0.796         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.000322      |
|    n_updates            | 710           |
|    policy_gradient_loss | -8.67e-06     |
|    value_loss           | 4.91e-06      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1106          |
|    iterations           | 73            |
|    time_elapsed         | 135           |
|    total_timesteps      | 149504        |
| train/                  |               |
|    approx_kl            | 0.00018859911 |
|    clip_fraction        | 0.00156       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0847       |
|    explained_variance   | 0.834         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.00026      |
|    n_updates            | 720           |
|    policy_gradient_loss | -2.78e-05     |
|    value_loss           | 7.59e-06      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1107         |
|    iterations           | 74           |
|    time_elapsed         | 136          |
|    total_timesteps      | 151552       |
| train/                  |              |
|    approx_kl            | 6.091874e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0867      |
|    explained_variance   | 0.907        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.77e-05     |
|    n_updates            | 730          |
|    policy_gradient_loss | -8.1e-05     |
|    value_loss           | 0.000505     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1107         |
|    iterations           | 75           |
|    time_elapsed         | 138          |
|    total_timesteps      | 153600       |
| train/                  |              |
|    approx_kl            | 0.0045268442 |
|    clip_fraction        | 0.0391       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.098       |
|    explained_variance   | 0.969        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00711     |
|    n_updates            | 740          |
|    policy_gradient_loss | -0.00664     |
|    value_loss           | 0.000106     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1107         |
|    iterations           | 76           |
|    time_elapsed         | 140          |
|    total_timesteps      | 155648       |
| train/                  |              |
|    approx_kl            | 0.0003184868 |
|    clip_fraction        | 0.00381      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.065       |
|    explained_variance   | 0.625        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.000141     |
|    n_updates            | 750          |
|    policy_gradient_loss | -3.88e-05    |
|    value_loss           | 6.99e-06     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1108         |
|    iterations           | 77           |
|    time_elapsed         | 142          |
|    total_timesteps      | 157696       |
| train/                  |              |
|    approx_kl            | 6.808751e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0725      |
|    explained_variance   | 0.679        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.26e-05     |
|    n_updates            | 760          |
|    policy_gradient_loss | -4.06e-06    |
|    value_loss           | 5.59e-06     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1108          |
|    iterations           | 78            |
|    time_elapsed         | 144           |
|    total_timesteps      | 159744        |
| train/                  |               |
|    approx_kl            | 0.00041149318 |
|    clip_fraction        | 0.0042        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0672       |
|    explained_variance   | 0.727         |
|    learning_rate        | 0.0003        |
|    loss                 | -5.83e-05     |
|    n_updates            | 770           |
|    policy_gradient_loss | -0.000187     |
|    value_loss           | 6.8e-06       |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1109          |
|    iterations           | 79            |
|    time_elapsed         | 145           |
|    total_timesteps      | 161792        |
| train/                  |               |
|    approx_kl            | 3.4428667e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0651       |
|    explained_variance   | 0.781         |
|    learning_rate        | 0.0003        |
|    loss                 | -6.88e-05     |
|    n_updates            | 780           |
|    policy_gradient_loss | 7.31e-06      |
|    value_loss           | 7.6e-06       |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1110         |
|    iterations           | 80           |
|    time_elapsed         | 147          |
|    total_timesteps      | 163840       |
| train/                  |              |
|    approx_kl            | 0.0017480629 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.113       |
|    explained_variance   | 0.961        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00928     |
|    n_updates            | 790          |
|    policy_gradient_loss | -0.00447     |
|    value_loss           | 0.000401     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1111         |
|    iterations           | 81           |
|    time_elapsed         | 149          |
|    total_timesteps      | 165888       |
| train/                  |              |
|    approx_kl            | 7.794681e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0661      |
|    explained_variance   | 0.673        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.78e-06     |
|    n_updates            | 800          |
|    policy_gradient_loss | 1.75e-05     |
|    value_loss           | 6.64e-06     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1112          |
|    iterations           | 82            |
|    time_elapsed         | 150           |
|    total_timesteps      | 167936        |
| train/                  |               |
|    approx_kl            | 0.00038459155 |
|    clip_fraction        | 0.01          |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.092        |
|    explained_variance   | 0.751         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.0001       |
|    n_updates            | 810           |
|    policy_gradient_loss | -0.000601     |
|    value_loss           | 1.78e-05      |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1113          |
|    iterations           | 83            |
|    time_elapsed         | 152           |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00025593193 |
|    clip_fraction        | 0.00225       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.117        |
|    explained_variance   | 0.96          |
|    learning_rate        | 0.0003        |
|    loss                 | 0.000229      |
|    n_updates            | 820           |
|    policy_gradient_loss | 5.34e-05      |
|    value_loss           | 0.000357      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1113         |
|    iterations           | 84           |
|    time_elapsed         | 154          |
|    total_timesteps      | 172032       |
| train/                  |              |
|    approx_kl            | 0.0010267451 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.114       |
|    explained_variance   | 0.513        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.000382    |
|    n_updates            | 830          |
|    policy_gradient_loss | -0.000319    |
|    value_loss           | 1.39e-05     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1112          |
|    iterations           | 85            |
|    time_elapsed         | 156           |
|    total_timesteps      | 174080        |
| train/                  |               |
|    approx_kl            | 0.00029535894 |
|    clip_fraction        | 0.0292        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.185        |
|    explained_variance   | 0.961         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.00153       |
|    n_updates            | 840           |
|    policy_gradient_loss | -0.00127      |
|    value_loss           | 0.000308      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1113         |
|    iterations           | 86           |
|    time_elapsed         | 158          |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 0.0026833513 |
|    clip_fraction        | 0.0155       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.179       |
|    explained_variance   | 0.936        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00227     |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.00045     |
|    value_loss           | 0.000684     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1111         |
|    iterations           | 87           |
|    time_elapsed         | 160          |
|    total_timesteps      | 178176       |
| train/                  |              |
|    approx_kl            | 0.0026225867 |
|    clip_fraction        | 0.0284       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.124       |
|    explained_variance   | 0.951        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00711      |
|    n_updates            | 860          |
|    policy_gradient_loss | -0.000878    |
|    value_loss           | 0.000172     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1111          |
|    iterations           | 88            |
|    time_elapsed         | 162           |
|    total_timesteps      | 180224        |
| train/                  |               |
|    approx_kl            | 0.00048761518 |
|    clip_fraction        | 0.00845       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.09         |
|    explained_variance   | 0.485         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.00134      |
|    n_updates            | 870           |
|    policy_gradient_loss | -0.000184     |
|    value_loss           | 1.37e-05      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1109         |
|    iterations           | 89           |
|    time_elapsed         | 164          |
|    total_timesteps      | 182272       |
| train/                  |              |
|    approx_kl            | 0.0011190447 |
|    clip_fraction        | 0.0136       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.102       |
|    explained_variance   | 0.941        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0029       |
|    n_updates            | 880          |
|    policy_gradient_loss | -0.0018      |
|    value_loss           | 0.000221     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 1108          |
|    iterations           | 90            |
|    time_elapsed         | 166           |
|    total_timesteps      | 184320        |
| train/                  |               |
|    approx_kl            | 0.00083020725 |
|    clip_fraction        | 0.00518       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0969       |
|    explained_variance   | 0.816         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000132     |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.000164     |
|    value_loss           | 1.65e-05      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1106         |
|    iterations           | 91           |
|    time_elapsed         | 168          |
|    total_timesteps      | 186368       |
| train/                  |              |
|    approx_kl            | 0.0018086281 |
|    clip_fraction        | 0.00347      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.163       |
|    explained_variance   | 0.953        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0028      |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.000493    |
|    value_loss           | 0.000161     |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 1102        |
|    iterations           | 92          |
|    time_elapsed         | 170         |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.001208057 |
|    clip_fraction        | 0.00654     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.157      |
|    explained_variance   | 0.949       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00318    |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.00143    |
|    value_loss           | 0.00013     |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1101         |
|    iterations           | 93           |
|    time_elapsed         | 172          |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0007435434 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.128       |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | -9.21e-05    |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.000712    |
|    value_loss           | 5.89e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1101         |
|    iterations           | 94           |
|    time_elapsed         | 174          |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0033323949 |
|    clip_fraction        | 0.0168       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.237       |
|    explained_variance   | 0.953        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00442     |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.00291     |
|    value_loss           | 9.78e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1098         |
|    iterations           | 95           |
|    time_elapsed         | 177          |
|    total_timesteps      | 194560       |
| train/                  |              |
|    approx_kl            | 0.0027433499 |
|    clip_fraction        | 0.0111       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.206       |
|    explained_variance   | 0.967        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00678     |
|    n_updates            | 940          |
|    policy_gradient_loss | -0.00188     |
|    value_loss           | 3.58e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1097         |
|    iterations           | 96           |
|    time_elapsed         | 179          |
|    total_timesteps      | 196608       |
| train/                  |              |
|    approx_kl            | 0.0015068908 |
|    clip_fraction        | 0.00933      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.226       |
|    explained_variance   | 0.914        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00021      |
|    n_updates            | 950          |
|    policy_gradient_loss | -0.00145     |
|    value_loss           | 3.54e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1096         |
|    iterations           | 97           |
|    time_elapsed         | 181          |
|    total_timesteps      | 198656       |
| train/                  |              |
|    approx_kl            | 0.0032570502 |
|    clip_fraction        | 0.0525       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.28        |
|    explained_variance   | 0.934        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00746     |
|    n_updates            | 960          |
|    policy_gradient_loss | -0.0024      |
|    value_loss           | 5.31e-05     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 1094         |
|    iterations           | 98           |
|    time_elapsed         | 183          |
|    total_timesteps      | 200704       |
| train/                  |              |
|    approx_kl            | 0.0013758931 |
|    clip_fraction        | 0.00933      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.308       |
|    explained_variance   | 0.953        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.00378     |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.0008      |
|    value_loss           | 3.39e-05     |
------------------------------------------


In [None]:
# --- To evaluate ---
eval_env = MazeCarEnv(render_mode="human") # Render during evaluation
model = PPO.load("ppo_mazecar_model_2_multi", env=eval_env)

obs, info = eval_env.reset()
for _ in range(200): # Max steps for evaluation
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = eval_env.step(action)
    time.sleep(1./120.0)  # Adjust sleep time for rendering speed
    if terminated or truncated:
        print(f"Evaluation episode finished. Reached target goal: {info.get('target_goal_index')}, Reward: {reward}")
        obs, info = eval_env.reset() # Reset for next evaluation episode

eval_env.close()

startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=2
argv[0] = --unused
argv[1] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=Intel
GL_RENDERER=Mesa Intel(R) Graphics (RPL-P)
GL_VERSION=4.6 (Core Profile) Mesa 23.2.1-1ubuntu3.1~22.04.3
GL_SHADING_LANGUAGE_VERSION=4.60
pthread_getconcurrency()=0
Version = 4.6 (Core Profile) Mesa 23.2.1-1ubuntu3.1~22.04.3
Vendor = Intel
Renderer = Mesa Intel(R) Graphics (RPL-P)
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubuntu
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubuntu
Wrapping the env

In [7]:
env.close()

check_env.close()

eval_env.close()


AttributeError: 'function' object has no attribute 'close'

In [None]:
# ~/masterthesis/mt_start$   tensorboard --logdir=./ppo_mazecar_tensorboard/

In [None]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available
print(torch.cuda.get_device_name(0))  # Prints the name of the GPU

True
NVIDIA GeForce RTX 4060 Laptop GPU
