In [None]:
SCENARIOS = 500

In [2]:
!pip install stable_baselines3[extra]



In [3]:
!pip install git+https://github.com/metadriverse/metadrive.git

Collecting git+https://github.com/metadriverse/metadrive.git
  Cloning https://github.com/metadriverse/metadrive.git to c:\users\colton\appdata\local\temp\pip-req-build-ziae32ff
  Resolved https://github.com/metadriverse/metadrive.git to commit a7f7d0b6fcf9b7422f9a9cd5c674fb1661c06a7e
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/metadriverse/metadrive.git 'C:\Users\Colton\AppData\Local\Temp\pip-req-build-ziae32ff'


## RL Environment

In [4]:
import copy
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv

DEFAULT_CONFIG = {
    # The below are default configs copied from SafeMetaDriveEnv
    # Environment difficulty
    "accident_prob": 0.8,
    "traffic_density": 0.05,
    # Termination conditions
    "crash_vehicle_done": False,
    "crash_object_done": False,
    # Reward
    "success_reward": 10.0,
    "driving_reward": 1.0,
    "speed_reward": 0.1,
    # Penalty will be negated and added to reward
    "out_of_road_penalty": 5.0,
    "crash_vehicle_penalty": 1.0,
    "crash_object_penalty": 1.0,
    # Cost will be return in info["cost"] and you can do constrained optimization with it
    "crash_vehicle_cost": 1.0,
    "crash_object_cost": 1.0,
    "out_of_road_cost": 1.0,
}

# Use deepcopy to avoid modifying the DEFAULT_CONFIG
TRAINING_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
TRAINING_CONFIG.update(
    {  # Environment setting
        "num_scenarios": SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 100,  # We will use the map with seeds in [100, 150) as the default training environment.
    }
)


def get_training_env(extra_config=None):
    config = copy.deepcopy(TRAINING_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


VALIDATION_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
VALIDATION_CONFIG.update(
    {  # Environment setting
        "num_scenarios": SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 1000,  # We will use the map with seeds in [1000, 1050) as the default validation environment.
    }
)


def get_validation_env(extra_config=None):
    config = copy.deepcopy(VALIDATION_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


## Import and utilities

In [5]:
import argparse
import datetime
import logging
import os
import uuid
from collections import defaultdict
from pathlib import Path

import numpy as np
from metadrive.engine.logger import set_log_level
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO
from stable_baselines3.ppo.policies import ActorCriticPolicy

from stable_baselines3.td3 import TD3
from stable_baselines3.td3.policies import TD3Policy

from stable_baselines3.sac import SAC
from stable_baselines3.sac.policies import SACPolicy

from wandb.integration.sb3 import WandbCallback

import wandb


# Remove MetaDrive's logging information when episode ends.
set_log_level(logging.ERROR)

In [6]:

def get_time_str():
    return datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


def remove_reset_seed_and_add_monitor(make_env, trial_dir):
    """
    MetaDrive env's reset function takes a seed argument and use it to determine the map to load.
    However, in stable-baselines3, it calls reset function with a seed argument serving as the random seed,
    which is not what we want. We do a trick here to remap the random seed to map index.

    Stable-baselines3 recommends using Monitor wrapper to log training data. We add a Monitor wrapper here.
    """
    from gymnasium import Wrapper
    from stable_baselines3.common.monitor import Monitor
    class NewClass(Wrapper):
        def reset(self, seed=None, **kwargs):
            # PZH: We do a trick here to remap the seed to the map index. This can help randomize the maps.
            if seed is not None:
                new_seed = self.env.start_index + (seed % self.env.num_scenarios)
            else:
                new_seed = None
            return self.env.reset(seed=new_seed, **kwargs)

    def new_make_env():
        env = make_env()
        NewClass.__name__ = env.__class__.__name__ + "WithoutResetSeed"
        wrapped_env = NewClass(env)
        wrapped_env = Monitor(env=wrapped_env, filename=str(trial_dir))
        return wrapped_env

    return new_make_env


class CustomizedEvalCallback(EvalCallback):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.evaluations_info_buffer = defaultdict(list)
        self.training_info_buffer = defaultdict(list)
        self.train_timesteps = list()
        self.train_results = list()
        self.train_length = list()

    def _log_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.evaluations_info_buffer[k].append(info[k])
    
    def _log_train_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.training_info_buffer[k].append(info[k])

        if "raw_action" in info:
            self.evaluations_info_buffer["raw_action"].append(info["raw_action"])

    def _on_step(self) -> bool:
        """
        PZH Note: Overall this function is copied from original EvalCallback._on_step.
        We additionally record evaluations_info_buffer to the logger.
        """

        from stable_baselines3.common.evaluation import evaluate_policy
        from stable_baselines3.common.vec_env import sync_envs_normalization

        continue_training = True

        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Sync training and eval env if there is VecNormalize
            if self.model.get_vec_normalize_env() is not None:
                try:
                    sync_envs_normalization(self.training_env, self.eval_env)
                except AttributeError as e:
                    raise AssertionError(
                        "Training and eval env are not wrapped the same way, "
                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
                        "and warning above."
                    ) from e

            # Reset success rate buffer
            self._is_success_buffer = []

            episode_rewards, episode_lengths = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_success_callback,
            )

            train_episode_rewards, train_episode_lengths = evaluate_policy(
                self.model,
                self.training_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_train_success_callback,
            )

            if self.log_path is not None:
                assert isinstance(episode_rewards, list)
                assert isinstance(episode_lengths, list)
                assert isinstance(train_episode_rewards, list)
                assert isinstance(train_episode_lengths, list)
                self.evaluations_timesteps.append(self.num_timesteps)
                self.evaluations_results.append(episode_rewards)
                self.evaluations_length.append(episode_lengths)

                kwargs = {}
                # Save success log if present
                if len(self._is_success_buffer) > 0:
                    self.evaluations_successes.append(self._is_success_buffer)
                    kwargs = dict(successes=self.evaluations_successes)

                # PZH: Save evaluations_info_buffer to the log file
                for k, v in self.evaluations_info_buffer.items():
                    kwargs[k] = v

                np.savez(
                    self.log_path,
                    timesteps=self.evaluations_timesteps,
                    results=self.evaluations_results,
                    ep_lengths=self.evaluations_length,
                    **kwargs,  # type: ignore[arg-type]
                )

            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = float(mean_reward)

            if self.verbose >= 1:
                print(
                    f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
            self.logger.record("eval/mean_reward", float(mean_reward))
            self.logger.record("eval/mean_ep_length", mean_ep_length)

            # PZH: Add this metric.
            self.logger.record("eval/num_episodes", len(episode_rewards))

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
                if self.verbose >= 1:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

            # PZH: We record evaluations_info_buffer to the logger
            for k, v in self.evaluations_info_buffer.items():
                self.logger.record("eval/{}".format(k), np.mean(np.asarray(v)))

            for k, v in self.training_info_buffer.items():
                self.logger.record("train/{}".format(k), np.mean(np.asarray(v)))

            # Dump log so the evaluation results are printed with the correct timestep
            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
                if self.verbose >= 1:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
                self.best_mean_reward = float(mean_reward)
                # Trigger callback on new best model, if needed
                if self.callback_on_new_best is not None:
                    continue_training = self.callback_on_new_best.on_step()

            # Trigger callback after every evaluation, if needed
            if self.callback is not None:
                continue_training = continue_training and self._on_event()

        return continue_training


## Setup PPO trainer


In [7]:

# ===== Set up some arguments =====
exp_name = "ppo_metadrive" + "_" + str(SCENARIOS) + "_scenarios"
use_wandb = True

experiment_batch_name = "{}".format(exp_name)
trial_name = "{}_{}_{}".format(experiment_batch_name, get_time_str(), uuid.uuid4().hex[:8])
experiment_dir = Path("runs") / experiment_batch_name
trial_dir = experiment_dir / trial_name
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(trial_dir, exist_ok=True)
print(f"We start logging training data into {trial_dir}")


We start logging training data into runs\ppo_metadrive_10_scenarios\ppo_metadrive_10_scenarios_2025-03-19_12-18-01_c4e7621c


In [8]:
# ===== Setup environment =====
num_train_envs = 10
num_eval_envs = 5
train_env = make_vec_env(remove_reset_seed_and_add_monitor(get_training_env, trial_dir), n_envs=num_train_envs,
                            vec_env_cls=SubprocVecEnv)
eval_env = make_vec_env(remove_reset_seed_and_add_monitor(get_validation_env, trial_dir), n_envs=num_eval_envs,
                        vec_env_cls=SubprocVecEnv)

In [9]:
# ===== Setup evaluation, checkpointing, and wandb =====
save_freq = 10_000  # Number of steps per model checkpoint
eval_freq = 10_000  # Number of steps per evaluation

wandb_save_freq = 10_000  # Number of steps per evaluation

num_eval_episodes = 5

checkpoint_callback = CheckpointCallback(
    name_prefix="rl_model",
    verbose=2,
    save_freq=save_freq,
    save_path=str(trial_dir / "models")
)
eval_callback = CustomizedEvalCallback(
    eval_env,
    best_model_save_path=str(trial_dir / "eval"),
    log_path=str(trial_dir / "eval"),
    eval_freq=max(eval_freq // num_train_envs, 1),
    n_eval_episodes=num_eval_episodes,
)
callbacks = [checkpoint_callback, eval_callback]
if use_wandb:
    wandb.init(
        project="cs260r",
        id=trial_name,
        name=experiment_batch_name,
        sync_tensorboard=True,
        dir=str(trial_dir),
    )
    callbacks.append(WandbCallback(model_save_path=str(trial_dir / "wandb_models"), model_save_freq=wandb_save_freq))
callbacks = CallbackList(callbacks)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: coltonrowe (coltonrowe-ucla) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [10]:

# ===== Setup the training algorithm =====
# model = SAC(
#     env=train_env,
#     policy=SACPolicy,
#     verbose=2,
#     batch_size = 256,
#     buffer_size = 1000000,
#     learning_rate = 5e-5,
#     gamma=0.98,
#     tau = 0.002,
#     device = "cuda",
#     tensorboard_log=str(trial_dir))
# model = TD3(
#     env=train_env,
#     policy=TD3Policy,
#     learning_rate=1e-3,
#     buffer_size=1_000_000,
#     learning_starts=100,
#     batch_size=256,
#     tau=0.005,
#     gamma = 0.99,
#     train_freq=1,
#     gradient_steps=1,
#     action_noise=None,
#     replay_buffer_class=None,
#     replay_buffer_kwargs=None,
#     optimize_memory_usage=False,
#     policy_delay=2,
#     target_policy_noise=0.2,
#     target_noise_clip=0.5,
#     stats_window_size=100,
#     tensorboard_log=None,
#     policy_kwargs=None,
#     verbose=2,
#     seed=None,
#     device='auto',
#     _init_setup_model=True
#     )
model = PPO(
    env=train_env,
    policy=ActorCriticPolicy,
    n_steps=256,  # n_steps * n_envs = total_batch_size
    n_epochs=20,
    learning_rate=5e-5,
    batch_size=256,
    clip_range=0.1,
    vf_coef=0.5,
    ent_coef=0.0,
    max_grad_norm=10.0,
    tensorboard_log=str(trial_dir),
    verbose=2,
    device="auto",
)


Using cpu device


In [11]:
ckpt = None
if ckpt:
    ckpt = Path(ckpt)
    print(f"Loading checkpoint from {ckpt}!")
    from stable_baselines3.common.save_util import load_from_zip_file
    data, params, pytorch_variables = load_from_zip_file(ckpt, device=model.device, print_system_info=False)
    model.set_parameters(params, exact_match=True, device=model.device)


In [None]:
# ===== Launch training =====
total_timesteps = 2_000_000  # 1M steps
model.learn(
    total_timesteps=total_timesteps,
    callback=callbacks,
    reset_num_timesteps=True,
    tb_log_name=experiment_batch_name,
    log_interval=1,
    progress_bar=True,
)

Logging to runs\ppo_metadrive_10_scenarios\ppo_metadrive_10_scenarios_2025-03-19_12-18-01_c4e7621c\ppo_metadrive_10_scenarios_1


-----------------------------
| time/              |      |
|    fps             | 1412 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2560 |
-----------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 397        |
|    ep_rew_mean          | -0.97      |
| time/                   |            |
|    fps                  | 1194       |
|    iterations           | 2          |
|    time_elapsed         | 4          |
|    total_timesteps      | 5120       |
| train/                  |            |
|    approx_kl            | 0.00389062 |
|    clip_fraction        | 0.22       |
|    clip_range           | 0.1        |
|    entropy_loss         | -2.84      |
|    explained_variance   | -0.0263    |
|    learning_rate        | 5e-05      |
|    loss                 | -0.00838   |
|    n_updates            | 20         |
|    policy_gradient_loss | -0.014     |
|    std   

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 67.6         |
|    mean_reward          | 9.54         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.08253882   |
|    route_completion     | 0.0547       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0030311928 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.164        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.83        |
|    explained_variance   | -0.00567     |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 551      |
|    ep_rew_mean     | 1.59     |
| time/              |          |
|    fps             | 784      |
|    iterations      | 4        |
|    time_elapsed    | 13       |
|    total_timesteps | 10240    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 495         |
|    ep_rew_mean          | 1.39        |
| time/                   |             |
|    fps                  | 846         |
|    iterations           | 5           |
|    time_elapsed         | 15          |
|    total_timesteps      | 12800       |
| train/                  |             |
|    approx_kl            | 0.003623811 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.83       |
|    explained_variance   | -0.0389     |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 53.4         |
|    mean_reward          | 15.7         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.13814469   |
|    route_completion     | 0.0648       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 20000        |
| train/                  |              |
|    approx_kl            | 0.0020089336 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.118        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.81        |
|    explained_variance   | -0.00957     |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 407      |
|    ep_rew_mean     | 5.21     |
| time/              |          |
|    fps             | 779      |
|    iterations      | 8        |
|    time_elapsed    | 26       |
|    total_timesteps | 20480    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 354          |
|    ep_rew_mean          | 4.74         |
| time/                   |              |
|    fps                  | 796          |
|    iterations           | 9            |
|    time_elapsed         | 28           |
|    total_timesteps      | 23040        |
| train/                  |              |
|    approx_kl            | 0.0011651834 |
|    clip_fraction        | 0.0277       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.81        |
|    explained_variance   | 0.0021       |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 77           |
|    mean_reward          | 45           |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.17646788   |
|    route_completion     | 0.0913       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 30000        |
| train/                  |              |
|    approx_kl            | 0.0014455203 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0568       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.78        |
|    explained_variance   | -0.00469     |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 326      |
|    ep_rew_mean     | 5.6      |
| time/              |          |
|    fps             | 771      |
|    iterations      | 12       |
|    time_elapsed    | 39       |
|    total_timesteps | 30720    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 308          |
|    ep_rew_mean          | 5.4          |
| time/                   |              |
|    fps                  | 788          |
|    iterations           | 13           |
|    time_elapsed         | 42           |
|    total_timesteps      | 33280        |
| train/                  |              |
|    approx_kl            | 0.0018299359 |
|    clip_fraction        | 0.0944       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.78        |
|    explained_variance   | -0.00597     |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.05         |
|    max_step             | 0            |
|    mean_ep_length       | 53.6         |
|    mean_reward          | 31           |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.21997134   |
|    route_completion     | 0.0944       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 40000        |
| train/                  |              |
|    approx_kl            | 0.0010498565 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0215       |
|    clip_range           | 0.1          |
|    crash                | 0.1          |
|    entropy_loss         | -2.76        |
|    explained_variance   | -0.000599    |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.04         |
|    max_step             | 0            |
|    mean_ep_length       | 45.6         |
|    mean_reward          | 22.4         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.25256306   |
|    route_completion     | 0.09         |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0016658638 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0627       |
|    clip_range           | 0.1          |
|    crash                | 0.08         |
|    entropy_loss         | -2.75        |
|    explained_variance   | 0.0249       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0333       |
|    max_step             | 0            |
|    mean_ep_length       | 33.8         |
|    mean_reward          | 10.9         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.26868466   |
|    route_completion     | 0.0842       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 60000        |
| train/                  |              |
|    approx_kl            | 0.0016305441 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0582       |
|    clip_range           | 0.1          |
|    crash                | 0.0667       |
|    entropy_loss         | -2.73        |
|    explained_variance   | 0.0103       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0857       |
|    max_step             | 0            |
|    mean_ep_length       | 67.8         |
|    mean_reward          | 58.9         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.30798125   |
|    route_completion     | 0.098        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 70000        |
| train/                  |              |
|    approx_kl            | 0.0020414323 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0866       |
|    clip_range           | 0.1          |
|    crash                | 0.143        |
|    entropy_loss         | -2.7         |
|    explained_variance   | 0.0469       |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 138      |
|    ep_rew_mean     | 11.8     |
| time/              |          |
|    fps             | 776      |
|    iterations      | 28       |
|    time_elapsed    | 92       |
|    total_timesteps | 71680    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 136         |
|    ep_rew_mean          | 13.1        |
| time/                   |             |
|    fps                  | 782         |
|    iterations           | 29          |
|    time_elapsed         | 94          |
|    total_timesteps      | 74240       |
| train/                  |             |
|    approx_kl            | 0.001276755 |
|    clip_fraction        | 0.0496      |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.69       |
|    explained_variance   | 0.0137      |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.125        |
|    max_step             | 0            |
|    mean_ep_length       | 88.2         |
|    mean_reward          | 84.7         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.3424902    |
|    route_completion     | 0.124        |
|    success_rate         | 0            |
|    total_cost           | 1.68         |
| time/                   |              |
|    total_timesteps      | 80000        |
| train/                  |              |
|    approx_kl            | 0.0015400138 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0689       |
|    clip_range           | 0.1          |
|    crash                | 0.125        |
|    entropy_loss         | -2.67        |
|    explained_variance   | 0.00456      |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 117      |
|    ep_rew_mean     | 17.8     |
| time/              |          |
|    fps             | 777      |
|    iterations      | 32       |
|    time_elapsed    | 105      |
|    total_timesteps | 81920    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 120          |
|    ep_rew_mean          | 17.4         |
| time/                   |              |
|    fps                  | 781          |
|    iterations           | 33           |
|    time_elapsed         | 108          |
|    total_timesteps      | 84480        |
| train/                  |              |
|    approx_kl            | 0.0007413561 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.67        |
|    explained_variance   | 0.0214       |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.111        |
|    max_step             | 0            |
|    mean_ep_length       | 129          |
|    mean_reward          | 121          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.36921233   |
|    route_completion     | 0.155        |
|    success_rate         | 0            |
|    total_cost           | 3.56         |
| time/                   |              |
|    total_timesteps      | 90000        |
| train/                  |              |
|    approx_kl            | 0.0015371607 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0474       |
|    clip_range           | 0.1          |
|    crash                | 0.156        |
|    entropy_loss         | -2.66        |
|    explained_variance   | -0.0307      |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 116      |
|    ep_rew_mean     | 22.6     |
| time/              |          |
|    fps             | 773      |
|    iterations      | 36       |
|    time_elapsed    | 119      |
|    total_timesteps | 92160    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 120          |
|    ep_rew_mean          | 21.8         |
| time/                   |              |
|    fps                  | 778          |
|    iterations           | 37           |
|    time_elapsed         | 121          |
|    total_timesteps      | 94720        |
| train/                  |              |
|    approx_kl            | 0.0005653278 |
|    clip_fraction        | 0.00914      |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.66        |
|    explained_variance   | -0.00425     |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.12         |
|    max_step             | 0            |
|    mean_ep_length       | 77.8         |
|    mean_reward          | 83.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.38441485   |
|    route_completion     | 0.164        |
|    success_rate         | 0            |
|    total_cost           | 3.3          |
| time/                   |              |
|    total_timesteps      | 100000       |
| train/                  |              |
|    approx_kl            | 0.0013426123 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0531       |
|    clip_range           | 0.1          |
|    crash                | 0.14         |
|    entropy_loss         | -2.63        |
|    explained_variance   | 0.0149       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.127        |
|    max_step             | 0            |
|    mean_ep_length       | 90.2         |
|    mean_reward          | 106          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.39815438   |
|    route_completion     | 0.172        |
|    success_rate         | 0            |
|    total_cost           | 3.09         |
| time/                   |              |
|    total_timesteps      | 110000       |
| train/                  |              |
|    approx_kl            | 0.0013367791 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0259       |
|    clip_range           | 0.1          |
|    crash                | 0.145        |
|    entropy_loss         | -2.62        |
|    explained_variance   | 0.000369     |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0             |
|    crash                | 0.117         |
|    max_step             | 0             |
|    mean_ep_length       | 95.6          |
|    mean_reward          | 90.1          |
|    num_episodes         | 5             |
|    out_of_road          | 1             |
|    raw_action           | 0.41456276    |
|    route_completion     | 0.184         |
|    success_rate         | 0             |
|    total_cost           | 3.57          |
| time/                   |               |
|    total_timesteps      | 120000        |
| train/                  |               |
|    approx_kl            | 0.00052401505 |
|    arrive_dest          | 0             |
|    clip_fraction        | 0.029         |
|    clip_range           | 0.1           |
|    crash                | 0.167         |
|    entropy_loss         | -2.6          |
|    explained_variance   | 0.64

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.108       |
|    max_step             | 0           |
|    mean_ep_length       | 110         |
|    mean_reward          | 123         |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.4264239   |
|    route_completion     | 0.197       |
|    success_rate         | 0.1         |
|    total_cost           | 3.77        |
| time/                   |             |
|    total_timesteps      | 130000      |
| train/                  |             |
|    approx_kl            | 0.001452387 |
|    arrive_dest          | 0.0154      |
|    clip_fraction        | 0.0391      |
|    clip_range           | 0.1         |
|    crash                | 0.169       |
|    entropy_loss         | -2.58       |
|    explained_variance   | 0.687       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 175      |
|    ep_rew_mean     | 57       |
| time/              |          |
|    fps             | 783      |
|    iterations      | 51       |
|    time_elapsed    | 166      |
|    total_timesteps | 130560   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 180          |
|    ep_rew_mean          | 61.2         |
| time/                   |              |
|    fps                  | 788          |
|    iterations           | 52           |
|    time_elapsed         | 168          |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 0.0016321277 |
|    clip_fraction        | 0.0316       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.58        |
|    explained_variance   | 0.268        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.129        |
|    max_step             | 0            |
|    mean_ep_length       | 89.6         |
|    mean_reward          | 91.1         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.43809047   |
|    route_completion     | 0.201        |
|    success_rate         | 0            |
|    total_cost           | 3.89         |
| time/                   |              |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 0.0017274168 |
|    arrive_dest          | 0.0143       |
|    clip_fraction        | 0.0685       |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.57        |
|    explained_variance   | 0.0631       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.187        |
|    max_step             | 0            |
|    mean_ep_length       | 121          |
|    mean_reward          | 153          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.44447136   |
|    route_completion     | 0.224        |
|    success_rate         | 0            |
|    total_cost           | 4.17         |
| time/                   |              |
|    total_timesteps      | 150000       |
| train/                  |              |
|    approx_kl            | 0.0020399515 |
|    arrive_dest          | 0.0133       |
|    clip_fraction        | 0.0677       |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.55        |
|    explained_variance   | 0.375        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 203      |
|    ep_rew_mean     | 84.7     |
| time/              |          |
|    fps             | 790      |
|    iterations      | 59       |
|    time_elapsed    | 191      |
|    total_timesteps | 151040   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 206          |
|    ep_rew_mean          | 90.8         |
| time/                   |              |
|    fps                  | 794          |
|    iterations           | 60           |
|    time_elapsed         | 193          |
|    total_timesteps      | 153600       |
| train/                  |              |
|    approx_kl            | 0.0012946471 |
|    clip_fraction        | 0.0667       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.55        |
|    explained_variance   | 0.118        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.188       |
|    max_step             | 0           |
|    mean_ep_length       | 111         |
|    mean_reward          | 144         |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.45194     |
|    route_completion     | 0.236       |
|    success_rate         | 0.1         |
|    total_cost           | 4.09        |
| time/                   |             |
|    total_timesteps      | 160000      |
| train/                  |             |
|    approx_kl            | 0.001468861 |
|    arrive_dest          | 0.025       |
|    clip_fraction        | 0.0791      |
|    clip_range           | 0.1         |
|    crash                | 0.188       |
|    entropy_loss         | -2.54       |
|    explained_variance   | 0.0936      |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.176        |
|    max_step             | 0            |
|    mean_ep_length       | 93.6         |
|    mean_reward          | 111          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.4558228    |
|    route_completion     | 0.239        |
|    success_rate         | 0            |
|    total_cost           | 3.91         |
| time/                   |              |
|    total_timesteps      | 170000       |
| train/                  |              |
|    approx_kl            | 0.0013470423 |
|    arrive_dest          | 0.0235       |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.52        |
|    explained_variance   | 0.0512       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.178        |
|    max_step             | 0            |
|    mean_ep_length       | 106          |
|    mean_reward          | 126          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.46172205   |
|    route_completion     | 0.246        |
|    success_rate         | 0            |
|    total_cost           | 4.01         |
| time/                   |              |
|    total_timesteps      | 180000       |
| train/                  |              |
|    approx_kl            | 0.0021151397 |
|    arrive_dest          | 0.0222       |
|    clip_fraction        | 0.092        |
|    clip_range           | 0.1          |
|    crash                | 0.211        |
|    entropy_loss         | -2.52        |
|    explained_variance   | 0.109        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.168        |
|    max_step             | 0            |
|    mean_ep_length       | 107          |
|    mean_reward          | 130          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.4663886    |
|    route_completion     | 0.254        |
|    success_rate         | 0            |
|    total_cost           | 3.98         |
| time/                   |              |
|    total_timesteps      | 190000       |
| train/                  |              |
|    approx_kl            | 0.0024694526 |
|    arrive_dest          | 0.0211       |
|    clip_fraction        | 0.102        |
|    clip_range           | 0.1          |
|    crash                | 0.221        |
|    entropy_loss         | -2.49        |
|    explained_variance   | 0.247        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.16         |
|    max_step             | 0            |
|    mean_ep_length       | 80.8         |
|    mean_reward          | 85.2         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.47046143   |
|    route_completion     | 0.253        |
|    success_rate         | 0.1          |
|    total_cost           | 3.83         |
| time/                   |              |
|    total_timesteps      | 200000       |
| train/                  |              |
|    approx_kl            | 0.0007852408 |
|    arrive_dest          | 0.03         |
|    clip_fraction        | 0.0376       |
|    clip_range           | 0.1          |
|    crash                | 0.24         |
|    entropy_loss         | -2.47        |
|    explained_variance   | 0.0372       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.152        |
|    max_step             | 0            |
|    mean_ep_length       | 84.2         |
|    mean_reward          | 95.1         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.47526774   |
|    route_completion     | 0.253        |
|    success_rate         | 0.1          |
|    total_cost           | 3.7          |
| time/                   |              |
|    total_timesteps      | 210000       |
| train/                  |              |
|    approx_kl            | 0.0011926756 |
|    arrive_dest          | 0.0381       |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.1          |
|    crash                | 0.248        |
|    entropy_loss         | -2.46        |
|    explained_variance   | -0.00248     |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.155        |
|    max_step             | 0            |
|    mean_ep_length       | 149          |
|    mean_reward          | 170          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.47853386   |
|    route_completion     | 0.264        |
|    success_rate         | 0            |
|    total_cost           | 3.95         |
| time/                   |              |
|    total_timesteps      | 220000       |
| train/                  |              |
|    approx_kl            | 0.0019409845 |
|    arrive_dest          | 0.0364       |
|    clip_fraction        | 0.0949       |
|    clip_range           | 0.1          |
|    crash                | 0.245        |
|    entropy_loss         | -2.46        |
|    explained_variance   | 0.611        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 306      |
|    ep_rew_mean     | 185      |
| time/              |          |
|    fps             | 800      |
|    iterations      | 86       |
|    time_elapsed    | 275      |
|    total_timesteps | 220160   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 303          |
|    ep_rew_mean          | 186          |
| time/                   |              |
|    fps                  | 803          |
|    iterations           | 87           |
|    time_elapsed         | 277          |
|    total_timesteps      | 222720       |
| train/                  |              |
|    approx_kl            | 0.0019232206 |
|    clip_fraction        | 0.0689       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.45        |
|    explained_variance   | 0.728        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0087       |
|    crash                | 0.148        |
|    max_step             | 0            |
|    mean_ep_length       | 125          |
|    mean_reward          | 134          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.47978193   |
|    route_completion     | 0.27         |
|    success_rate         | 0.2          |
|    total_cost           | 4.15         |
| time/                   |              |
|    total_timesteps      | 230000       |
| train/                  |              |
|    approx_kl            | 0.0018156485 |
|    arrive_dest          | 0.0435       |
|    clip_fraction        | 0.118        |
|    clip_range           | 0.1          |
|    crash                | 0.261        |
|    entropy_loss         | -2.43        |
|    explained_variance   | -0.0138      |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00833      |
|    crash                | 0.15         |
|    max_step             | 0            |
|    mean_ep_length       | 120          |
|    mean_reward          | 140          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.4813707    |
|    route_completion     | 0.276        |
|    success_rate         | 0            |
|    total_cost           | 4.27         |
| time/                   |              |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0024645843 |
|    arrive_dest          | 0.0417       |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.1          |
|    crash                | 0.267        |
|    entropy_loss         | -2.43        |
|    explained_variance   | 0.0348       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.008        |
|    crash                | 0.144        |
|    max_step             | 0            |
|    mean_ep_length       | 103          |
|    mean_reward          | 121          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.48199686   |
|    route_completion     | 0.278        |
|    success_rate         | 0            |
|    total_cost           | 4.14         |
| time/                   |              |
|    total_timesteps      | 250000       |
| train/                  |              |
|    approx_kl            | 0.0020948243 |
|    arrive_dest          | 0.04         |
|    clip_fraction        | 0.0656       |
|    clip_range           | 0.1          |
|    crash                | 0.288        |
|    entropy_loss         | -2.43        |
|    explained_variance   | 0.828        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00769      |
|    crash                | 0.138        |
|    max_step             | 0            |
|    mean_ep_length       | 122          |
|    mean_reward          | 147          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.486798     |
|    route_completion     | 0.284        |
|    success_rate         | 0            |
|    total_cost           | 4.13         |
| time/                   |              |
|    total_timesteps      | 260000       |
| train/                  |              |
|    approx_kl            | 0.0037397735 |
|    arrive_dest          | 0.0385       |
|    clip_fraction        | 0.082        |
|    clip_range           | 0.1          |
|    crash                | 0.277        |
|    entropy_loss         | -2.42        |
|    explained_variance   | 0.156        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00741      |
|    crash                | 0.148        |
|    max_step             | 0            |
|    mean_ep_length       | 113          |
|    mean_reward          | 146          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.48800406   |
|    route_completion     | 0.287        |
|    success_rate         | 0.1          |
|    total_cost           | 4.01         |
| time/                   |              |
|    total_timesteps      | 270000       |
| train/                  |              |
|    approx_kl            | 0.0018802655 |
|    arrive_dest          | 0.0444       |
|    clip_fraction        | 0.154        |
|    clip_range           | 0.1          |
|    crash                | 0.281        |
|    entropy_loss         | -2.4         |
|    explained_variance   | 0.195        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00714      |
|    crash                | 0.143        |
|    max_step             | 0            |
|    mean_ep_length       | 117          |
|    mean_reward          | 138          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.48636818   |
|    route_completion     | 0.293        |
|    success_rate         | 0            |
|    total_cost           | 4.05         |
| time/                   |              |
|    total_timesteps      | 280000       |
| train/                  |              |
|    approx_kl            | 0.0017055536 |
|    arrive_dest          | 0.0429       |
|    clip_fraction        | 0.103        |
|    clip_range           | 0.1          |
|    crash                | 0.279        |
|    entropy_loss         | -2.38        |
|    explained_variance   | 0.0776       |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0069      |
|    crash                | 0.159       |
|    max_step             | 0           |
|    mean_ep_length       | 105         |
|    mean_reward          | 132         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.48755747  |
|    route_completion     | 0.297       |
|    success_rate         | 0.1         |
|    total_cost           | 3.94        |
| time/                   |             |
|    total_timesteps      | 290000      |
| train/                  |             |
|    approx_kl            | 0.007635355 |
|    arrive_dest          | 0.0483      |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.1         |
|    crash                | 0.283       |
|    entropy_loss         | -2.35       |
|    explained_variance   | 0.227       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00667     |
|    crash                | 0.16        |
|    max_step             | 0           |
|    mean_ep_length       | 108         |
|    mean_reward          | 125         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.48761448  |
|    route_completion     | 0.298       |
|    success_rate         | 0           |
|    total_cost           | 3.85        |
| time/                   |             |
|    total_timesteps      | 300000      |
| train/                  |             |
|    approx_kl            | 0.002081839 |
|    arrive_dest          | 0.0467      |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.1         |
|    crash                | 0.293       |
|    entropy_loss         | -2.34       |
|    explained_variance   | 0.0868      |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00645      |
|    crash                | 0.161        |
|    max_step             | 0            |
|    mean_ep_length       | 115          |
|    mean_reward          | 137          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.48982176   |
|    route_completion     | 0.301        |
|    success_rate         | 0            |
|    total_cost           | 3.87         |
| time/                   |              |
|    total_timesteps      | 310000       |
| train/                  |              |
|    approx_kl            | 0.0015358672 |
|    arrive_dest          | 0.0452       |
|    clip_fraction        | 0.146        |
|    clip_range           | 0.1          |
|    crash                | 0.297        |
|    entropy_loss         | -2.33        |
|    explained_variance   | 0.182        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00625      |
|    crash                | 0.163        |
|    max_step             | 0            |
|    mean_ep_length       | 104          |
|    mean_reward          | 128          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.4888222    |
|    route_completion     | 0.302        |
|    success_rate         | 0.2          |
|    total_cost           | 3.78         |
| time/                   |              |
|    total_timesteps      | 320000       |
| train/                  |              |
|    approx_kl            | 0.0011433637 |
|    arrive_dest          | 0.0563       |
|    clip_fraction        | 0.0752       |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.33        |
|    explained_variance   | 0.0337       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00606      |
|    crash                | 0.158        |
|    max_step             | 0            |
|    mean_ep_length       | 90.6         |
|    mean_reward          | 104          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.49045274   |
|    route_completion     | 0.301        |
|    success_rate         | 0            |
|    total_cost           | 3.7          |
| time/                   |              |
|    total_timesteps      | 330000       |
| train/                  |              |
|    approx_kl            | 0.0047472348 |
|    arrive_dest          | 0.0545       |
|    clip_fraction        | 0.186        |
|    clip_range           | 0.1          |
|    crash                | 0.297        |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.138        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0118       |
|    crash                | 0.159        |
|    max_step             | 0            |
|    mean_ep_length       | 138          |
|    mean_reward          | 145          |
|    num_episodes         | 5            |
|    out_of_road          | 0.988        |
|    raw_action           | 0.49097866   |
|    route_completion     | 0.306        |
|    success_rate         | 0.2          |
|    total_cost           | 4.07         |
| time/                   |              |
|    total_timesteps      | 340000       |
| train/                  |              |
|    approx_kl            | 0.0013385771 |
|    arrive_dest          | 0.0588       |
|    clip_fraction        | 0.153        |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.3         |
|    explained_variance   | 0.119        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0114       |
|    crash                | 0.154        |
|    max_step             | 0            |
|    mean_ep_length       | 92.8         |
|    mean_reward          | 97.5         |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.49319193   |
|    route_completion     | 0.305        |
|    success_rate         | 0            |
|    total_cost           | 3.98         |
| time/                   |              |
|    total_timesteps      | 350000       |
| train/                  |              |
|    approx_kl            | 0.0017757828 |
|    arrive_dest          | 0.0571       |
|    clip_fraction        | 0.141        |
|    clip_range           | 0.1          |
|    crash                | 0.303        |
|    entropy_loss         | -2.28        |
|    explained_variance   | 0.0641       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0111       |
|    crash                | 0.15         |
|    max_step             | 0            |
|    mean_ep_length       | 97.4         |
|    mean_reward          | 118          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.49402237   |
|    route_completion     | 0.305        |
|    success_rate         | 0            |
|    total_cost           | 3.92         |
| time/                   |              |
|    total_timesteps      | 360000       |
| train/                  |              |
|    approx_kl            | 0.0015932694 |
|    arrive_dest          | 0.0556       |
|    clip_fraction        | 0.0787       |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.28        |
|    explained_variance   | 0.251        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0108       |
|    crash                | 0.157        |
|    max_step             | 0            |
|    mean_ep_length       | 123          |
|    mean_reward          | 165          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.49493366   |
|    route_completion     | 0.308        |
|    success_rate         | 0.2          |
|    total_cost           | 3.85         |
| time/                   |              |
|    total_timesteps      | 370000       |
| train/                  |              |
|    approx_kl            | 0.0007315325 |
|    arrive_dest          | 0.0649       |
|    clip_fraction        | 0.0438       |
|    clip_range           | 0.1          |
|    crash                | 0.297        |
|    entropy_loss         | -2.27        |
|    explained_variance   | 0.235        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0105       |
|    crash                | 0.163        |
|    max_step             | 0            |
|    mean_ep_length       | 91.6         |
|    mean_reward          | 108          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.49741706   |
|    route_completion     | 0.308        |
|    success_rate         | 0.2          |
|    total_cost           | 3.78         |
| time/                   |              |
|    total_timesteps      | 380000       |
| train/                  |              |
|    approx_kl            | 0.0012093742 |
|    arrive_dest          | 0.0737       |
|    clip_fraction        | 0.0867       |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.26        |
|    explained_variance   | 0.276        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0103       |
|    crash                | 0.169        |
|    max_step             | 0            |
|    mean_ep_length       | 91.6         |
|    mean_reward          | 104          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.4979086    |
|    route_completion     | 0.308        |
|    success_rate         | 0.1          |
|    total_cost           | 3.72         |
| time/                   |              |
|    total_timesteps      | 390000       |
| train/                  |              |
|    approx_kl            | 0.0018617765 |
|    arrive_dest          | 0.0769       |
|    clip_fraction        | 0.0742       |
|    clip_range           | 0.1          |
|    crash                | 0.292        |
|    entropy_loss         | -2.25        |
|    explained_variance   | 0.404        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.01         |
|    crash                | 0.17         |
|    max_step             | 0            |
|    mean_ep_length       | 86.6         |
|    mean_reward          | 98.1         |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.49799109   |
|    route_completion     | 0.307        |
|    success_rate         | 0            |
|    total_cost           | 3.65         |
| time/                   |              |
|    total_timesteps      | 400000       |
| train/                  |              |
|    approx_kl            | 0.0027723536 |
|    arrive_dest          | 0.075        |
|    clip_fraction        | 0.16         |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.24        |
|    explained_variance   | 0.312        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00976     |
|    crash                | 0.176       |
|    max_step             | 0           |
|    mean_ep_length       | 123         |
|    mean_reward          | 164         |
|    num_episodes         | 5           |
|    out_of_road          | 0.99        |
|    raw_action           | 0.4969955   |
|    route_completion     | 0.311       |
|    success_rate         | 0           |
|    total_cost           | 3.6         |
| time/                   |             |
|    total_timesteps      | 410000      |
| train/                  |             |
|    approx_kl            | 0.011830799 |
|    arrive_dest          | 0.0732      |
|    clip_fraction        | 0.156       |
|    clip_range           | 0.1         |
|    crash                | 0.293       |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.27        |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00952      |
|    crash                | 0.176        |
|    max_step             | 0            |
|    mean_ep_length       | 103          |
|    mean_reward          | 123          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.49745792   |
|    route_completion     | 0.313        |
|    success_rate         | 0            |
|    total_cost           | 3.58         |
| time/                   |              |
|    total_timesteps      | 420000       |
| train/                  |              |
|    approx_kl            | 0.0014132444 |
|    arrive_dest          | 0.0714       |
|    clip_fraction        | 0.0696       |
|    clip_range           | 0.1          |
|    crash                | 0.29         |
|    entropy_loss         | -2.21        |
|    explained_variance   | 0.262        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0093      |
|    crash                | 0.172       |
|    max_step             | 0           |
|    mean_ep_length       | 82.4        |
|    mean_reward          | 86.7        |
|    num_episodes         | 5           |
|    out_of_road          | 0.991       |
|    raw_action           | 0.49935225  |
|    route_completion     | 0.31        |
|    success_rate         | 0.2         |
|    total_cost           | 3.52        |
| time/                   |             |
|    total_timesteps      | 430000      |
| train/                  |             |
|    approx_kl            | 0.001120758 |
|    arrive_dest          | 0.0791      |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.1         |
|    crash                | 0.293       |
|    entropy_loss         | -2.2        |
|    explained_variance   | 0.314       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00909      |
|    crash                | 0.177        |
|    max_step             | 0            |
|    mean_ep_length       | 134          |
|    mean_reward          | 176          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.49878395   |
|    route_completion     | 0.315        |
|    success_rate         | 0            |
|    total_cost           | 3.52         |
| time/                   |              |
|    total_timesteps      | 440000       |
| train/                  |              |
|    approx_kl            | 0.0048934286 |
|    arrive_dest          | 0.0773       |
|    clip_fraction        | 0.152        |
|    clip_range           | 0.1          |
|    crash                | 0.286        |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.209        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 287      |
|    ep_rew_mean     | 245      |
| time/              |          |
|    fps             | 809      |
|    iterations      | 172      |
|    time_elapsed    | 544      |
|    total_timesteps | 440320   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 283          |
|    ep_rew_mean          | 242          |
| time/                   |              |
|    fps                  | 810          |
|    iterations           | 173          |
|    time_elapsed         | 546          |
|    total_timesteps      | 442880       |
| train/                  |              |
|    approx_kl            | 0.0028545987 |
|    clip_fraction        | 0.0512       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.176        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00889      |
|    crash                | 0.178        |
|    max_step             | 0            |
|    mean_ep_length       | 120          |
|    mean_reward          | 141          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.49944472   |
|    route_completion     | 0.316        |
|    success_rate         | 0            |
|    total_cost           | 3.52         |
| time/                   |              |
|    total_timesteps      | 450000       |
| train/                  |              |
|    approx_kl            | 0.0010758158 |
|    arrive_dest          | 0.0756       |
|    clip_fraction        | 0.0656       |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.426        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0087      |
|    crash                | 0.187       |
|    max_step             | 0           |
|    mean_ep_length       | 114         |
|    mean_reward          | 129         |
|    num_episodes         | 5           |
|    out_of_road          | 0.991       |
|    raw_action           | 0.5003773   |
|    route_completion     | 0.317       |
|    success_rate         | 0           |
|    total_cost           | 3.54        |
| time/                   |             |
|    total_timesteps      | 460000      |
| train/                  |             |
|    approx_kl            | 0.005445419 |
|    arrive_dest          | 0.0739      |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.1         |
|    crash                | 0.309       |
|    entropy_loss         | -2.17       |
|    explained_variance   | 0.144       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00851      |
|    crash                | 0.183        |
|    max_step             | 0            |
|    mean_ep_length       | 99.2         |
|    mean_reward          | 102          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.49919423   |
|    route_completion     | 0.316        |
|    success_rate         | 0            |
|    total_cost           | 3.54         |
| time/                   |              |
|    total_timesteps      | 470000       |
| train/                  |              |
|    approx_kl            | 0.0041236817 |
|    arrive_dest          | 0.0723       |
|    clip_fraction        | 0.0974       |
|    clip_range           | 0.1          |
|    crash                | 0.315        |
|    entropy_loss         | -2.16        |
|    explained_variance   | 0.459        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00833      |
|    crash                | 0.183        |
|    max_step             | 0            |
|    mean_ep_length       | 85.6         |
|    mean_reward          | 85.5         |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50009733   |
|    route_completion     | 0.314        |
|    success_rate         | 0.2          |
|    total_cost           | 3.48         |
| time/                   |              |
|    total_timesteps      | 480000       |
| train/                  |              |
|    approx_kl            | 0.0110158045 |
|    arrive_dest          | 0.0792       |
|    clip_fraction        | 0.123        |
|    clip_range           | 0.1          |
|    crash                | 0.308        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.421        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00816      |
|    crash                | 0.18         |
|    max_step             | 0            |
|    mean_ep_length       | 125          |
|    mean_reward          | 161          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5002503    |
|    route_completion     | 0.317        |
|    success_rate         | 0            |
|    total_cost           | 3.44         |
| time/                   |              |
|    total_timesteps      | 490000       |
| train/                  |              |
|    approx_kl            | 0.0015321232 |
|    arrive_dest          | 0.0776       |
|    clip_fraction        | 0.0909       |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -2.16        |
|    explained_variance   | 0.435        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.008        |
|    crash                | 0.176        |
|    max_step             | 0            |
|    mean_ep_length       | 145          |
|    mean_reward          | 123          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50163335   |
|    route_completion     | 0.319        |
|    success_rate         | 0.2          |
|    total_cost           | 3.67         |
| time/                   |              |
|    total_timesteps      | 500000       |
| train/                  |              |
|    approx_kl            | 0.0012801288 |
|    arrive_dest          | 0.084        |
|    clip_fraction        | 0.0556       |
|    clip_range           | 0.1          |
|    crash                | 0.304        |
|    entropy_loss         | -2.15        |
|    explained_variance   | 0.472        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00784     |
|    crash                | 0.173       |
|    max_step             | 0           |
|    mean_ep_length       | 147         |
|    mean_reward          | 205         |
|    num_episodes         | 5           |
|    out_of_road          | 0.992       |
|    raw_action           | 0.5016348   |
|    route_completion     | 0.323       |
|    success_rate         | 0.1         |
|    total_cost           | 3.65        |
| time/                   |             |
|    total_timesteps      | 510000      |
| train/                  |             |
|    approx_kl            | 0.002375093 |
|    arrive_dest          | 0.0863      |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.1         |
|    crash                | 0.302       |
|    entropy_loss         | -2.15       |
|    explained_variance   | 0.582       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 269      |
|    ep_rew_mean     | 241      |
| time/              |          |
|    fps             | 809      |
|    iterations      | 200      |
|    time_elapsed    | 632      |
|    total_timesteps | 512000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 266          |
|    ep_rew_mean          | 239          |
| time/                   |              |
|    fps                  | 809          |
|    iterations           | 201          |
|    time_elapsed         | 635          |
|    total_timesteps      | 514560       |
| train/                  |              |
|    approx_kl            | 0.0015254931 |
|    clip_fraction        | 0.0723       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.14        |
|    explained_variance   | 0.462        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00769      |
|    crash                | 0.181        |
|    max_step             | 0            |
|    mean_ep_length       | 103          |
|    mean_reward          | 126          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5033636    |
|    route_completion     | 0.324        |
|    success_rate         | 0.2          |
|    total_cost           | 3.62         |
| time/                   |              |
|    total_timesteps      | 520000       |
| train/                  |              |
|    approx_kl            | 0.0011089094 |
|    arrive_dest          | 0.0923       |
|    clip_fraction        | 0.105        |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -2.13        |
|    explained_variance   | 0.479        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0113       |
|    crash                | 0.177        |
|    max_step             | 0            |
|    mean_ep_length       | 153          |
|    mean_reward          | 174          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.50354415   |
|    route_completion     | 0.326        |
|    success_rate         | 0.1          |
|    total_cost           | 3.71         |
| time/                   |              |
|    total_timesteps      | 530000       |
| train/                  |              |
|    approx_kl            | 0.0026783666 |
|    arrive_dest          | 0.0906       |
|    clip_fraction        | 0.0656       |
|    clip_range           | 0.1          |
|    crash                | 0.306        |
|    entropy_loss         | -2.13        |
|    explained_variance   | 0.483        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0111       |
|    crash                | 0.181        |
|    max_step             | 0            |
|    mean_ep_length       | 151          |
|    mean_reward          | 211          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.5031879    |
|    route_completion     | 0.329        |
|    success_rate         | 0            |
|    total_cost           | 3.68         |
| time/                   |              |
|    total_timesteps      | 540000       |
| train/                  |              |
|    approx_kl            | 0.0104407165 |
|    arrive_dest          | 0.0889       |
|    clip_fraction        | 0.159        |
|    clip_range           | 0.1          |
|    crash                | 0.311        |
|    entropy_loss         | -2.12        |
|    explained_variance   | 0.582        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 275      |
|    ep_rew_mean     | 246      |
| time/              |          |
|    fps             | 806      |
|    iterations      | 211      |
|    time_elapsed    | 669      |
|    total_timesteps | 540160   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 269          |
|    ep_rew_mean          | 240          |
| time/                   |              |
|    fps                  | 807          |
|    iterations           | 212          |
|    time_elapsed         | 672          |
|    total_timesteps      | 542720       |
| train/                  |              |
|    approx_kl            | 0.0012970776 |
|    clip_fraction        | 0.0912       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.12        |
|    explained_variance   | 0.39         |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0109      |
|    crash                | 0.178       |
|    max_step             | 0           |
|    mean_ep_length       | 127         |
|    mean_reward          | 167         |
|    num_episodes         | 5           |
|    out_of_road          | 0.989       |
|    raw_action           | 0.5029231   |
|    route_completion     | 0.331       |
|    success_rate         | 0           |
|    total_cost           | 3.63        |
| time/                   |             |
|    total_timesteps      | 550000      |
| train/                  |             |
|    approx_kl            | 0.004594213 |
|    arrive_dest          | 0.0873      |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.1         |
|    crash                | 0.305       |
|    entropy_loss         | -2.11       |
|    explained_variance   | 0.37        |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0107      |
|    crash                | 0.179       |
|    max_step             | 0           |
|    mean_ep_length       | 114         |
|    mean_reward          | 125         |
|    num_episodes         | 5           |
|    out_of_road          | 0.989       |
|    raw_action           | 0.5030721   |
|    route_completion     | 0.331       |
|    success_rate         | 0.1         |
|    total_cost           | 3.59        |
| time/                   |             |
|    total_timesteps      | 560000      |
| train/                  |             |
|    approx_kl            | 0.004582475 |
|    arrive_dest          | 0.0893      |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.1         |
|    crash                | 0.3         |
|    entropy_loss         | -2.1        |
|    explained_variance   | 0.225       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0105       |
|    crash                | 0.179        |
|    max_step             | 0            |
|    mean_ep_length       | 138          |
|    mean_reward          | 185          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.50380504   |
|    route_completion     | 0.334        |
|    success_rate         | 0            |
|    total_cost           | 3.56         |
| time/                   |              |
|    total_timesteps      | 570000       |
| train/                  |              |
|    approx_kl            | 0.0017954893 |
|    arrive_dest          | 0.0877       |
|    clip_fraction        | 0.157        |
|    clip_range           | 0.1          |
|    crash                | 0.298        |
|    entropy_loss         | -2.09        |
|    explained_variance   | 0.361        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0103       |
|    crash                | 0.179        |
|    max_step             | 0            |
|    mean_ep_length       | 92.2         |
|    mean_reward          | 105          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.504178     |
|    route_completion     | 0.333        |
|    success_rate         | 0            |
|    total_cost           | 3.53         |
| time/                   |              |
|    total_timesteps      | 580000       |
| train/                  |              |
|    approx_kl            | 0.0019125605 |
|    arrive_dest          | 0.0862       |
|    clip_fraction        | 0.209        |
|    clip_range           | 0.1          |
|    crash                | 0.307        |
|    entropy_loss         | -2.09        |
|    explained_variance   | 0.284        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0102       |
|    crash                | 0.176        |
|    max_step             | 0            |
|    mean_ep_length       | 88.8         |
|    mean_reward          | 95.1         |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.50448394   |
|    route_completion     | 0.333        |
|    success_rate         | 0            |
|    total_cost           | 3.5          |
| time/                   |              |
|    total_timesteps      | 590000       |
| train/                  |              |
|    approx_kl            | 0.0018405365 |
|    arrive_dest          | 0.0847       |
|    clip_fraction        | 0.0886       |
|    clip_range           | 0.1          |
|    crash                | 0.305        |
|    entropy_loss         | -2.08        |
|    explained_variance   | 0.521        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.01        |
|    crash                | 0.183       |
|    max_step             | 0           |
|    mean_ep_length       | 108         |
|    mean_reward          | 146         |
|    num_episodes         | 5           |
|    out_of_road          | 0.99        |
|    raw_action           | 0.50453985  |
|    route_completion     | 0.335       |
|    success_rate         | 0.1         |
|    total_cost           | 3.46        |
| time/                   |             |
|    total_timesteps      | 600000      |
| train/                  |             |
|    approx_kl            | 0.002165205 |
|    arrive_dest          | 0.0867      |
|    clip_fraction        | 0.0698      |
|    clip_range           | 0.1         |
|    crash                | 0.303       |
|    entropy_loss         | -2.06       |
|    explained_variance   | 0.564       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00984      |
|    crash                | 0.184        |
|    max_step             | 0            |
|    mean_ep_length       | 121          |
|    mean_reward          | 158          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.5043231    |
|    route_completion     | 0.336        |
|    success_rate         | 0            |
|    total_cost           | 3.43         |
| time/                   |              |
|    total_timesteps      | 610000       |
| train/                  |              |
|    approx_kl            | 0.0011106912 |
|    arrive_dest          | 0.0852       |
|    clip_fraction        | 0.116        |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.575        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00968     |
|    crash                | 0.181       |
|    max_step             | 0           |
|    mean_ep_length       | 138         |
|    mean_reward          | 171         |
|    num_episodes         | 5           |
|    out_of_road          | 0.99        |
|    raw_action           | 0.50365573  |
|    route_completion     | 0.337       |
|    success_rate         | 0.2         |
|    total_cost           | 3.43        |
| time/                   |             |
|    total_timesteps      | 620000      |
| train/                  |             |
|    approx_kl            | 0.002448033 |
|    arrive_dest          | 0.0903      |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.1         |
|    crash                | 0.303       |
|    entropy_loss         | -2.05       |
|    explained_variance   | 0.467       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00952      |
|    crash                | 0.178        |
|    max_step             | 0            |
|    mean_ep_length       | 101          |
|    mean_reward          | 113          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.5044866    |
|    route_completion     | 0.337        |
|    success_rate         | 0.2          |
|    total_cost           | 3.39         |
| time/                   |              |
|    total_timesteps      | 630000       |
| train/                  |              |
|    approx_kl            | 0.0051714154 |
|    arrive_dest          | 0.0952       |
|    clip_fraction        | 0.151        |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.473        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00937      |
|    crash                | 0.178        |
|    max_step             | 0            |
|    mean_ep_length       | 113          |
|    mean_reward          | 134          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.50471956   |
|    route_completion     | 0.337        |
|    success_rate         | 0.1          |
|    total_cost           | 3.38         |
| time/                   |              |
|    total_timesteps      | 640000       |
| train/                  |              |
|    approx_kl            | 0.0013121326 |
|    arrive_dest          | 0.0969       |
|    clip_fraction        | 0.152        |
|    clip_range           | 0.1          |
|    crash                | 0.303        |
|    entropy_loss         | -2.04        |
|    explained_variance   | 0.322        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00923      |
|    crash                | 0.178        |
|    max_step             | 0            |
|    mean_ep_length       | 130          |
|    mean_reward          | 170          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.5054919    |
|    route_completion     | 0.339        |
|    success_rate         | 0            |
|    total_cost           | 3.34         |
| time/                   |              |
|    total_timesteps      | 650000       |
| train/                  |              |
|    approx_kl            | 0.0017918786 |
|    arrive_dest          | 0.0954       |
|    clip_fraction        | 0.125        |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -2.03        |
|    explained_variance   | 0.365        |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.00909       |
|    crash                | 0.182         |
|    max_step             | 0             |
|    mean_ep_length       | 107           |
|    mean_reward          | 131           |
|    num_episodes         | 5             |
|    out_of_road          | 0.991         |
|    raw_action           | 0.5057526     |
|    route_completion     | 0.339         |
|    success_rate         | 0.1           |
|    total_cost           | 3.32          |
| time/                   |               |
|    total_timesteps      | 660000        |
| train/                  |               |
|    approx_kl            | 0.00072814326 |
|    arrive_dest          | 0.097         |
|    clip_fraction        | 0.0819        |
|    clip_range           | 0.1           |
|    crash                | 0.297         |
|    entropy_loss         | -2.04         |
|    explained_variance   | 0.54

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00896     |
|    crash                | 0.182       |
|    max_step             | 0           |
|    mean_ep_length       | 123         |
|    mean_reward          | 169         |
|    num_episodes         | 5           |
|    out_of_road          | 0.991       |
|    raw_action           | 0.5062322   |
|    route_completion     | 0.341       |
|    success_rate         | 0.1         |
|    total_cost           | 3.28        |
| time/                   |             |
|    total_timesteps      | 670000      |
| train/                  |             |
|    approx_kl            | 0.002674142 |
|    arrive_dest          | 0.0985      |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.1         |
|    crash                | 0.299       |
|    entropy_loss         | -2.02       |
|    explained_variance   | 0.703       |
|    learning_rate        | 5e-05 

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.00882    |
|    crash                | 0.185      |
|    max_step             | 0          |
|    mean_ep_length       | 100        |
|    mean_reward          | 119        |
|    num_episodes         | 5          |
|    out_of_road          | 0.991      |
|    raw_action           | 0.50538224 |
|    route_completion     | 0.34       |
|    success_rate         | 0          |
|    total_cost           | 3.25       |
| time/                   |            |
|    total_timesteps      | 680000     |
| train/                  |            |
|    approx_kl            | 0.06561373 |
|    arrive_dest          | 0.0971     |
|    clip_fraction        | 0.396      |
|    clip_range           | 0.1        |
|    crash                | 0.297      |
|    entropy_loss         | -2.02      |
|    explained_variance   | 0.74       |
|    learning_rate        | 5e-05      |
|    loss       

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0087      |
|    crash                | 0.197       |
|    max_step             | 0           |
|    mean_ep_length       | 151         |
|    mean_reward          | 231         |
|    num_episodes         | 5           |
|    out_of_road          | 0.991       |
|    raw_action           | 0.5055856   |
|    route_completion     | 0.344       |
|    success_rate         | 0           |
|    total_cost           | 3.23        |
| time/                   |             |
|    total_timesteps      | 690000      |
| train/                  |             |
|    approx_kl            | 0.002374458 |
|    arrive_dest          | 0.0957      |
|    clip_fraction        | 0.0573      |
|    clip_range           | 0.1         |
|    crash                | 0.293       |
|    entropy_loss         | -2.01       |
|    explained_variance   | 0.664       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 271      |
|    ep_rew_mean     | 259      |
| time/              |          |
|    fps             | 803      |
|    iterations      | 270      |
|    time_elapsed    | 860      |
|    total_timesteps | 691200   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 263          |
|    ep_rew_mean          | 257          |
| time/                   |              |
|    fps                  | 804          |
|    iterations           | 271          |
|    time_elapsed         | 862          |
|    total_timesteps      | 693760       |
| train/                  |              |
|    approx_kl            | 0.0022210516 |
|    clip_fraction        | 0.108        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.489        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00857      |
|    crash                | 0.194        |
|    max_step             | 0            |
|    mean_ep_length       | 92           |
|    mean_reward          | 101          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.5063836    |
|    route_completion     | 0.344        |
|    success_rate         | 0.1          |
|    total_cost           | 3.2          |
| time/                   |              |
|    total_timesteps      | 700000       |
| train/                  |              |
|    approx_kl            | 0.0016769336 |
|    arrive_dest          | 0.0971       |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.1          |
|    crash                | 0.289        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.509        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00845      |
|    crash                | 0.2          |
|    max_step             | 0            |
|    mean_ep_length       | 145          |
|    mean_reward          | 209          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5050227    |
|    route_completion     | 0.347        |
|    success_rate         | 0.1          |
|    total_cost           | 3.17         |
| time/                   |              |
|    total_timesteps      | 710000       |
| train/                  |              |
|    approx_kl            | 0.0024287808 |
|    arrive_dest          | 0.0986       |
|    clip_fraction        | 0.138        |
|    clip_range           | 0.1          |
|    crash                | 0.285        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.315        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00833      |
|    crash                | 0.203        |
|    max_step             | 0            |
|    mean_ep_length       | 132          |
|    mean_reward          | 171          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5046897    |
|    route_completion     | 0.348        |
|    success_rate         | 0.1          |
|    total_cost           | 3.16         |
| time/                   |              |
|    total_timesteps      | 720000       |
| train/                  |              |
|    approx_kl            | 0.0017205518 |
|    arrive_dest          | 0.1          |
|    clip_fraction        | 0.102        |
|    clip_range           | 0.1          |
|    crash                | 0.281        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.527        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00822     |
|    crash                | 0.203       |
|    max_step             | 0           |
|    mean_ep_length       | 109         |
|    mean_reward          | 126         |
|    num_episodes         | 5           |
|    out_of_road          | 0.992       |
|    raw_action           | 0.5043543   |
|    route_completion     | 0.348       |
|    success_rate         | 0.1         |
|    total_cost           | 3.13        |
| time/                   |             |
|    total_timesteps      | 730000      |
| train/                  |             |
|    approx_kl            | 0.005002088 |
|    arrive_dest          | 0.101       |
|    clip_fraction        | 0.0978      |
|    clip_range           | 0.1         |
|    crash                | 0.282       |
|    entropy_loss         | -2.01       |
|    explained_variance   | 0.623       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00811      |
|    crash                | 0.2          |
|    max_step             | 0            |
|    mean_ep_length       | 104          |
|    mean_reward          | 128          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5044266    |
|    route_completion     | 0.348        |
|    success_rate         | 0.2          |
|    total_cost           | 3.12         |
| time/                   |              |
|    total_timesteps      | 740000       |
| train/                  |              |
|    approx_kl            | 0.0032032307 |
|    arrive_dest          | 0.105        |
|    clip_fraction        | 0.19         |
|    clip_range           | 0.1          |
|    crash                | 0.286        |
|    entropy_loss         | -2           |
|    explained_variance   | 0.736        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.008        |
|    crash                | 0.2          |
|    max_step             | 0            |
|    mean_ep_length       | 105          |
|    mean_reward          | 125          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50477785   |
|    route_completion     | 0.348        |
|    success_rate         | 0            |
|    total_cost           | 3.09         |
| time/                   |              |
|    total_timesteps      | 750000       |
| train/                  |              |
|    approx_kl            | 0.0019484675 |
|    arrive_dest          | 0.104        |
|    clip_fraction        | 0.117        |
|    clip_range           | 0.1          |
|    crash                | 0.283        |
|    entropy_loss         | -2           |
|    explained_variance   | 0.577        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00789     |
|    crash                | 0.208       |
|    max_step             | 0           |
|    mean_ep_length       | 121         |
|    mean_reward          | 148         |
|    num_episodes         | 5           |
|    out_of_road          | 0.992       |
|    raw_action           | 0.50474334  |
|    route_completion     | 0.35        |
|    success_rate         | 0           |
|    total_cost           | 3.11        |
| time/                   |             |
|    total_timesteps      | 760000      |
| train/                  |             |
|    approx_kl            | 0.003507678 |
|    arrive_dest          | 0.103       |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.1         |
|    crash                | 0.287       |
|    entropy_loss         | -1.99       |
|    explained_variance   | 0.593       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00779     |
|    crash                | 0.21        |
|    max_step             | 0           |
|    mean_ep_length       | 117         |
|    mean_reward          | 135         |
|    num_episodes         | 5           |
|    out_of_road          | 0.992       |
|    raw_action           | 0.5051413   |
|    route_completion     | 0.351       |
|    success_rate         | 0.3         |
|    total_cost           | 3.15        |
| time/                   |             |
|    total_timesteps      | 770000      |
| train/                  |             |
|    approx_kl            | 0.012491569 |
|    arrive_dest          | 0.109       |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.1         |
|    crash                | 0.283       |
|    entropy_loss         | -1.98       |
|    explained_variance   | 0.394       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00769     |
|    crash                | 0.208       |
|    max_step             | 0           |
|    mean_ep_length       | 104         |
|    mean_reward          | 117         |
|    num_episodes         | 5           |
|    out_of_road          | 0.992       |
|    raw_action           | 0.50511634  |
|    route_completion     | 0.35        |
|    success_rate         | 0.2         |
|    total_cost           | 3.14        |
| time/                   |             |
|    total_timesteps      | 780000      |
| train/                  |             |
|    approx_kl            | 0.001243911 |
|    arrive_dest          | 0.113       |
|    clip_fraction        | 0.067       |
|    clip_range           | 0.1         |
|    crash                | 0.282       |
|    entropy_loss         | -1.97       |
|    explained_variance   | 0.491       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00759      |
|    crash                | 0.208        |
|    max_step             | 0            |
|    mean_ep_length       | 113          |
|    mean_reward          | 148          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50487536   |
|    route_completion     | 0.351        |
|    success_rate         | 0.1          |
|    total_cost           | 3.12         |
| time/                   |              |
|    total_timesteps      | 790000       |
| train/                  |              |
|    approx_kl            | 0.0009553937 |
|    arrive_dest          | 0.114        |
|    clip_fraction        | 0.0692       |
|    clip_range           | 0.1          |
|    crash                | 0.281        |
|    entropy_loss         | -1.96        |
|    explained_variance   | 0.551        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0075       |
|    crash                | 0.212        |
|    max_step             | 0            |
|    mean_ep_length       | 97.2         |
|    mean_reward          | 111          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.5054172    |
|    route_completion     | 0.351        |
|    success_rate         | 0.1          |
|    total_cost           | 3.11         |
| time/                   |              |
|    total_timesteps      | 800000       |
| train/                  |              |
|    approx_kl            | 0.0012024216 |
|    arrive_dest          | 0.115        |
|    clip_fraction        | 0.0913       |
|    clip_range           | 0.1          |
|    crash                | 0.285        |
|    entropy_loss         | -1.95        |
|    explained_variance   | 0.8          |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00741      |
|    crash                | 0.217        |
|    max_step             | 0            |
|    mean_ep_length       | 153          |
|    mean_reward          | 221          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.5055329    |
|    route_completion     | 0.354        |
|    success_rate         | 0.1          |
|    total_cost           | 3.11         |
| time/                   |              |
|    total_timesteps      | 810000       |
| train/                  |              |
|    approx_kl            | 0.0020214142 |
|    arrive_dest          | 0.116        |
|    clip_fraction        | 0.14         |
|    clip_range           | 0.1          |
|    crash                | 0.286        |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.442        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00732      |
|    crash                | 0.215        |
|    max_step             | 0            |
|    mean_ep_length       | 114          |
|    mean_reward          | 148          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.5058248    |
|    route_completion     | 0.354        |
|    success_rate         | 0.1          |
|    total_cost           | 3.09         |
| time/                   |              |
|    total_timesteps      | 820000       |
| train/                  |              |
|    approx_kl            | 0.0017926326 |
|    arrive_dest          | 0.117        |
|    clip_fraction        | 0.175        |
|    clip_range           | 0.1          |
|    crash                | 0.29         |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.564        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00723      |
|    crash                | 0.212        |
|    max_step             | 0            |
|    mean_ep_length       | 131          |
|    mean_reward          | 151          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.5053559    |
|    route_completion     | 0.355        |
|    success_rate         | 0            |
|    total_cost           | 3.1          |
| time/                   |              |
|    total_timesteps      | 830000       |
| train/                  |              |
|    approx_kl            | 0.0016992319 |
|    arrive_dest          | 0.116        |
|    clip_fraction        | 0.124        |
|    clip_range           | 0.1          |
|    crash                | 0.294        |
|    entropy_loss         | -1.91        |
|    explained_variance   | 0.561        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00714      |
|    crash                | 0.21         |
|    max_step             | 0            |
|    mean_ep_length       | 108          |
|    mean_reward          | 124          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.5056198    |
|    route_completion     | 0.355        |
|    success_rate         | 0.1          |
|    total_cost           | 3.07         |
| time/                   |              |
|    total_timesteps      | 840000       |
| train/                  |              |
|    approx_kl            | 0.0027283393 |
|    arrive_dest          | 0.117        |
|    clip_fraction        | 0.156        |
|    clip_range           | 0.1          |
|    crash                | 0.293        |
|    entropy_loss         | -1.9         |
|    explained_variance   | 0.608        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00706      |
|    crash                | 0.212        |
|    max_step             | 0            |
|    mean_ep_length       | 127          |
|    mean_reward          | 169          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.50547475   |
|    route_completion     | 0.357        |
|    success_rate         | 0            |
|    total_cost           | 3.06         |
| time/                   |              |
|    total_timesteps      | 850000       |
| train/                  |              |
|    approx_kl            | 0.0039635794 |
|    arrive_dest          | 0.115        |
|    clip_fraction        | 0.0918       |
|    clip_range           | 0.1          |
|    crash                | 0.296        |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.622        |
|    learni

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.00698    |
|    crash                | 0.212      |
|    max_step             | 0          |
|    mean_ep_length       | 140        |
|    mean_reward          | 202        |
|    num_episodes         | 5          |
|    out_of_road          | 0.993      |
|    raw_action           | 0.5050492  |
|    route_completion     | 0.359      |
|    success_rate         | 0.1        |
|    total_cost           | 3.04       |
| time/                   |            |
|    total_timesteps      | 860000     |
| train/                  |            |
|    approx_kl            | 0.04903429 |
|    arrive_dest          | 0.116      |
|    clip_fraction        | 0.143      |
|    clip_range           | 0.1        |
|    crash                | 0.295      |
|    entropy_loss         | -1.89      |
|    explained_variance   | 0.7        |
|    learning_rate        | 5e-05      |
|    loss       

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0069       |
|    crash                | 0.211        |
|    max_step             | 0            |
|    mean_ep_length       | 108          |
|    mean_reward          | 124          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.50482863   |
|    route_completion     | 0.36         |
|    success_rate         | 0.1          |
|    total_cost           | 3.04         |
| time/                   |              |
|    total_timesteps      | 870000       |
| train/                  |              |
|    approx_kl            | 0.0035007414 |
|    arrive_dest          | 0.117        |
|    clip_fraction        | 0.186        |
|    clip_range           | 0.1          |
|    crash                | 0.297        |
|    entropy_loss         | -1.88        |
|    explained_variance   | 0.491        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00682     |
|    crash                | 0.214       |
|    max_step             | 0           |
|    mean_ep_length       | 119         |
|    mean_reward          | 157         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.5052759   |
|    route_completion     | 0.361       |
|    success_rate         | 0           |
|    total_cost           | 3.03        |
| time/                   |             |
|    total_timesteps      | 880000      |
| train/                  |             |
|    approx_kl            | 0.005692319 |
|    arrive_dest          | 0.116       |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.1         |
|    crash                | 0.3         |
|    entropy_loss         | -1.87       |
|    explained_variance   | 0.749       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00674     |
|    crash                | 0.218       |
|    max_step             | 0           |
|    mean_ep_length       | 159         |
|    mean_reward          | 241         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.5046685   |
|    route_completion     | 0.364       |
|    success_rate         | 0.1         |
|    total_cost           | 3.02        |
| time/                   |             |
|    total_timesteps      | 890000      |
| train/                  |             |
|    approx_kl            | 0.028057862 |
|    arrive_dest          | 0.117       |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.1         |
|    crash                | 0.299       |
|    entropy_loss         | -1.87       |
|    explained_variance   | 0.713       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 248      |
|    ep_rew_mean     | 254      |
| time/              |          |
|    fps             | 789      |
|    iterations      | 348      |
|    time_elapsed    | 1127     |
|    total_timesteps | 890880   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 239          |
|    ep_rew_mean          | 249          |
| time/                   |              |
|    fps                  | 790          |
|    iterations           | 349          |
|    time_elapsed         | 1130         |
|    total_timesteps      | 893440       |
| train/                  |              |
|    approx_kl            | 0.0019176077 |
|    clip_fraction        | 0.0867       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.87        |
|    explained_variance   | 0.355        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00667      |
|    crash                | 0.22         |
|    max_step             | 0            |
|    mean_ep_length       | 133          |
|    mean_reward          | 175          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.50447094   |
|    route_completion     | 0.366        |
|    success_rate         | 0.1          |
|    total_cost           | 3.01         |
| time/                   |              |
|    total_timesteps      | 900000       |
| train/                  |              |
|    approx_kl            | 0.0030423794 |
|    arrive_dest          | 0.118        |
|    clip_fraction        | 0.159        |
|    clip_range           | 0.1          |
|    crash                | 0.3          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.451        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00659     |
|    crash                | 0.22        |
|    max_step             | 0           |
|    mean_ep_length       | 116         |
|    mean_reward          | 153         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.504226    |
|    route_completion     | 0.366       |
|    success_rate         | 0           |
|    total_cost           | 2.99        |
| time/                   |             |
|    total_timesteps      | 910000      |
| train/                  |             |
|    approx_kl            | 0.009121658 |
|    arrive_dest          | 0.116       |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.1         |
|    crash                | 0.301       |
|    entropy_loss         | -1.85       |
|    explained_variance   | 0.76        |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00652     |
|    crash                | 0.217       |
|    max_step             | 0           |
|    mean_ep_length       | 117         |
|    mean_reward          | 144         |
|    num_episodes         | 5           |
|    out_of_road          | 0.993       |
|    raw_action           | 0.50385326  |
|    route_completion     | 0.366       |
|    success_rate         | 0.1         |
|    total_cost           | 3.01        |
| time/                   |             |
|    total_timesteps      | 920000      |
| train/                  |             |
|    approx_kl            | 0.005702075 |
|    arrive_dest          | 0.117       |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.1         |
|    crash                | 0.302       |
|    entropy_loss         | -1.85       |
|    explained_variance   | 0.766       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00645      |
|    crash                | 0.224        |
|    max_step             | 0            |
|    mean_ep_length       | 114          |
|    mean_reward          | 153          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.5042244    |
|    route_completion     | 0.367        |
|    success_rate         | 0.1          |
|    total_cost           | 2.99         |
| time/                   |              |
|    total_timesteps      | 930000       |
| train/                  |              |
|    approx_kl            | 0.0032837254 |
|    arrive_dest          | 0.118        |
|    clip_fraction        | 0.0561       |
|    clip_range           | 0.1          |
|    crash                | 0.301        |
|    entropy_loss         | -1.84        |
|    explained_variance   | 0.415        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00638     |
|    crash                | 0.223       |
|    max_step             | 0           |
|    mean_ep_length       | 126         |
|    mean_reward          | 173         |
|    num_episodes         | 5           |
|    out_of_road          | 0.994       |
|    raw_action           | 0.5041177   |
|    route_completion     | 0.368       |
|    success_rate         | 0.1         |
|    total_cost           | 2.98        |
| time/                   |             |
|    total_timesteps      | 940000      |
| train/                  |             |
|    approx_kl            | 0.003130878 |
|    arrive_dest          | 0.119       |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.1         |
|    crash                | 0.302       |
|    entropy_loss         | -1.84       |
|    explained_variance   | 0.815       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00632      |
|    crash                | 0.223        |
|    max_step             | 0            |
|    mean_ep_length       | 118          |
|    mean_reward          | 158          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.50419664   |
|    route_completion     | 0.368        |
|    success_rate         | 0            |
|    total_cost           | 2.96         |
| time/                   |              |
|    total_timesteps      | 950000       |
| train/                  |              |
|    approx_kl            | 0.0025411847 |
|    arrive_dest          | 0.118        |
|    clip_fraction        | 0.14         |
|    clip_range           | 0.1          |
|    crash                | 0.303        |
|    entropy_loss         | -1.83        |
|    explained_variance   | 0.677        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00625      |
|    crash                | 0.221        |
|    max_step             | 0            |
|    mean_ep_length       | 105          |
|    mean_reward          | 125          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.5045645    |
|    route_completion     | 0.367        |
|    success_rate         | 0.2          |
|    total_cost           | 2.95         |
| time/                   |              |
|    total_timesteps      | 960000       |
| train/                  |              |
|    approx_kl            | 0.0011515182 |
|    arrive_dest          | 0.121        |
|    clip_fraction        | 0.139        |
|    clip_range           | 0.1          |
|    crash                | 0.306        |
|    entropy_loss         | -1.82        |
|    explained_variance   | 0.677        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00619      |
|    crash                | 0.219        |
|    max_step             | 0            |
|    mean_ep_length       | 112          |
|    mean_reward          | 150          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.5051603    |
|    route_completion     | 0.368        |
|    success_rate         | 0.2          |
|    total_cost           | 2.94         |
| time/                   |              |
|    total_timesteps      | 970000       |
| train/                  |              |
|    approx_kl            | 0.0013483902 |
|    arrive_dest          | 0.124        |
|    clip_fraction        | 0.0989       |
|    clip_range           | 0.1          |
|    crash                | 0.307        |
|    entropy_loss         | -1.82        |
|    explained_variance   | 0.581        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00816      |
|    crash                | 0.216        |
|    max_step             | 0            |
|    mean_ep_length       | 206          |
|    mean_reward          | 314          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50500315   |
|    route_completion     | 0.372        |
|    success_rate         | 0.3          |
|    total_cost           | 2.97         |
| time/                   |              |
|    total_timesteps      | 980000       |
| train/                  |              |
|    approx_kl            | 0.0026812986 |
|    arrive_dest          | 0.127        |
|    clip_fraction        | 0.18         |
|    clip_range           | 0.1          |
|    crash                | 0.304        |
|    entropy_loss         | -1.81        |
|    explained_variance   | 0.579        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 242      |
|    ep_rew_mean     | 253      |
| time/              |          |
|    fps             | 782      |
|    iterations      | 383      |
|    time_elapsed    | 1253     |
|    total_timesteps | 980480   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 230         |
|    ep_rew_mean          | 242         |
| time/                   |             |
|    fps                  | 782         |
|    iterations           | 384         |
|    time_elapsed         | 1255        |
|    total_timesteps      | 983040      |
| train/                  |             |
|    approx_kl            | 0.008551387 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.81       |
|    explained_variance   | 0.65        |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00808      |
|    crash                | 0.216        |
|    max_step             | 0            |
|    mean_ep_length       | 172          |
|    mean_reward          | 259          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.50478524   |
|    route_completion     | 0.375        |
|    success_rate         | 0.2          |
|    total_cost           | 2.96         |
| time/                   |              |
|    total_timesteps      | 990000       |
| train/                  |              |
|    approx_kl            | 0.0010815787 |
|    arrive_dest          | 0.129        |
|    clip_fraction        | 0.137        |
|    clip_range           | 0.1          |
|    crash                | 0.303        |
|    entropy_loss         | -1.81        |
|    explained_variance   | 0.664        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.008        |
|    crash                | 0.216        |
|    max_step             | 0            |
|    mean_ep_length       | 137          |
|    mean_reward          | 178          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.5048416    |
|    route_completion     | 0.376        |
|    success_rate         | 0.2          |
|    total_cost           | 2.97         |
| time/                   |              |
|    total_timesteps      | 1000000      |
| train/                  |              |
|    approx_kl            | 0.0015018651 |
|    arrive_dest          | 0.132        |
|    clip_fraction        | 0.187        |
|    clip_range           | 0.1          |
|    crash                | 0.302        |
|    entropy_loss         | -1.8         |
|    explained_variance   | 0.698        |
|    learni

<stable_baselines3.ppo.ppo.PPO at 0x1585e1fd350>