In [1]:
SCENARIOS = 1
VALUATION_SCENARIOS = 50

In [2]:
!pip install stable_baselines3[extra]



In [3]:
!pip install git+https://github.com/metadriverse/metadrive.git

Collecting git+https://github.com/metadriverse/metadrive.git
  Cloning https://github.com/metadriverse/metadrive.git to c:\users\colton\appdata\local\temp\pip-req-build-b104pse2
  Resolved https://github.com/metadriverse/metadrive.git to commit a7f7d0b6fcf9b7422f9a9cd5c674fb1661c06a7e
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/metadriverse/metadrive.git 'C:\Users\Colton\AppData\Local\Temp\pip-req-build-b104pse2'


## RL Environment

In [4]:
import copy
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv

DEFAULT_CONFIG = {
    # The below are default configs copied from SafeMetaDriveEnv
    # Environment difficulty
    "accident_prob": 0.8,
    "traffic_density": 0.05,
    # Termination conditions
    "crash_vehicle_done": False,
    "crash_object_done": False,
    # Reward
    "success_reward": 10.0,
    "driving_reward": 1.0,
    "speed_reward": 0.1,
    # Penalty will be negated and added to reward
    "out_of_road_penalty": 5.0,
    "crash_vehicle_penalty": 1.0,
    "crash_object_penalty": 1.0,
    # Cost will be return in info["cost"] and you can do constrained optimization with it
    "crash_vehicle_cost": 1.0,
    "crash_object_cost": 1.0,
    "out_of_road_cost": 1.0,
}

# Use deepcopy to avoid modifying the DEFAULT_CONFIG
TRAINING_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
TRAINING_CONFIG.update(
    {  # Environment setting
        "num_scenarios": SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 100,  # We will use the map with seeds in [100, 150) as the default training environment.
    }
)


def get_training_env(extra_config=None):
    config = copy.deepcopy(TRAINING_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


VALIDATION_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
VALIDATION_CONFIG.update(
    {  # Environment setting
        "num_scenarios": VALUATION_SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 1000,  # We will use the map with seeds in [1000, 1050) as the default validation environment.
    }
)


def get_validation_env(extra_config=None):
    config = copy.deepcopy(VALIDATION_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


## Import and utilities

In [5]:
import argparse
import datetime
import logging
import os
import uuid
from collections import defaultdict
from pathlib import Path

import numpy as np
from metadrive.engine.logger import set_log_level
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO
from stable_baselines3.ppo.policies import ActorCriticPolicy

from stable_baselines3.td3 import TD3
from stable_baselines3.td3.policies import TD3Policy

from stable_baselines3.sac import SAC
from stable_baselines3.sac.policies import SACPolicy

from wandb.integration.sb3 import WandbCallback

import wandb


# Remove MetaDrive's logging information when episode ends.
set_log_level(logging.ERROR)

In [6]:

def get_time_str():
    return datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


def remove_reset_seed_and_add_monitor(make_env, trial_dir):
    """
    MetaDrive env's reset function takes a seed argument and use it to determine the map to load.
    However, in stable-baselines3, it calls reset function with a seed argument serving as the random seed,
    which is not what we want. We do a trick here to remap the random seed to map index.

    Stable-baselines3 recommends using Monitor wrapper to log training data. We add a Monitor wrapper here.
    """
    from gymnasium import Wrapper
    from stable_baselines3.common.monitor import Monitor
    class NewClass(Wrapper):
        def reset(self, seed=None, **kwargs):
            # PZH: We do a trick here to remap the seed to the map index. This can help randomize the maps.
            if seed is not None:
                new_seed = self.env.start_index + (seed % self.env.num_scenarios)
            else:
                new_seed = None
            return self.env.reset(seed=new_seed, **kwargs)

    def new_make_env():
        env = make_env()
        NewClass.__name__ = env.__class__.__name__ + "WithoutResetSeed"
        wrapped_env = NewClass(env)
        wrapped_env = Monitor(env=wrapped_env, filename=str(trial_dir))
        return wrapped_env

    return new_make_env


class CustomizedEvalCallback(EvalCallback):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.evaluations_info_buffer = defaultdict(list)
        self.training_info_buffer = defaultdict(list)
        self.train_timesteps = list()
        self.train_results = list()
        self.train_length = list()

    def _log_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.evaluations_info_buffer[k].append(info[k])
    
    def _log_train_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.training_info_buffer[k].append(info[k])

        if "raw_action" in info:
            self.evaluations_info_buffer["raw_action"].append(info["raw_action"])

    def _on_step(self) -> bool:
        """
        PZH Note: Overall this function is copied from original EvalCallback._on_step.
        We additionally record evaluations_info_buffer to the logger.
        """

        from stable_baselines3.common.evaluation import evaluate_policy
        from stable_baselines3.common.vec_env import sync_envs_normalization

        continue_training = True

        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Sync training and eval env if there is VecNormalize
            if self.model.get_vec_normalize_env() is not None:
                try:
                    sync_envs_normalization(self.training_env, self.eval_env)
                except AttributeError as e:
                    raise AssertionError(
                        "Training and eval env are not wrapped the same way, "
                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
                        "and warning above."
                    ) from e

            # Reset success rate buffer
            self._is_success_buffer = []

            episode_rewards, episode_lengths = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_success_callback,
            )

            train_episode_rewards, train_episode_lengths = evaluate_policy(
                self.model,
                self.training_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_train_success_callback,
            )

            if self.log_path is not None:
                assert isinstance(episode_rewards, list)
                assert isinstance(episode_lengths, list)
                assert isinstance(train_episode_rewards, list)
                assert isinstance(train_episode_lengths, list)
                self.evaluations_timesteps.append(self.num_timesteps)
                self.evaluations_results.append(episode_rewards)
                self.evaluations_length.append(episode_lengths)

                kwargs = {}
                # Save success log if present
                if len(self._is_success_buffer) > 0:
                    self.evaluations_successes.append(self._is_success_buffer)
                    kwargs = dict(successes=self.evaluations_successes)

                # PZH: Save evaluations_info_buffer to the log file
                for k, v in self.evaluations_info_buffer.items():
                    kwargs[k] = v

                np.savez(
                    self.log_path,
                    timesteps=self.evaluations_timesteps,
                    results=self.evaluations_results,
                    ep_lengths=self.evaluations_length,
                    **kwargs,  # type: ignore[arg-type]
                )

            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = float(mean_reward)

            if self.verbose >= 1:
                print(
                    f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
            self.logger.record("eval/mean_reward", float(mean_reward))
            self.logger.record("eval/mean_ep_length", mean_ep_length)

            # PZH: Add this metric.
            self.logger.record("eval/num_episodes", len(episode_rewards))

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
                if self.verbose >= 1:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

            # PZH: We record evaluations_info_buffer to the logger
            for k, v in self.evaluations_info_buffer.items():
                self.logger.record("eval/{}".format(k), np.mean(np.asarray(v)))

            for k, v in self.training_info_buffer.items():
                self.logger.record("train/{}".format(k), np.mean(np.asarray(v)))

            # Dump log so the evaluation results are printed with the correct timestep
            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
                if self.verbose >= 1:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
                self.best_mean_reward = float(mean_reward)
                # Trigger callback on new best model, if needed
                if self.callback_on_new_best is not None:
                    continue_training = self.callback_on_new_best.on_step()

            # Trigger callback after every evaluation, if needed
            if self.callback is not None:
                continue_training = continue_training and self._on_event()

        return continue_training


## Setup PPO trainer


In [7]:

# ===== Set up some arguments =====
exp_name = "ppo_metadrive" + "_" + str(SCENARIOS) + "_scenarios"
use_wandb = True

experiment_batch_name = "{}".format(exp_name)
trial_name = "{}_{}_{}".format(experiment_batch_name, get_time_str(), uuid.uuid4().hex[:8])
experiment_dir = Path("runs") / experiment_batch_name
trial_dir = experiment_dir / trial_name
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(trial_dir, exist_ok=True)
print(f"We start logging training data into {trial_dir}")


We start logging training data into runs\ppo_metadrive_1_scenarios\ppo_metadrive_1_scenarios_2025-03-19_13-31-01_e2c76247


In [8]:
# ===== Setup environment =====
num_train_envs = 10
num_eval_envs = 5
train_env = make_vec_env(remove_reset_seed_and_add_monitor(get_training_env, trial_dir), n_envs=num_train_envs,
                            vec_env_cls=SubprocVecEnv)
eval_env = make_vec_env(remove_reset_seed_and_add_monitor(get_validation_env, trial_dir), n_envs=num_eval_envs,
                        vec_env_cls=SubprocVecEnv)

In [9]:
# ===== Setup evaluation, checkpointing, and wandb =====
save_freq = 10_000  # Number of steps per model checkpoint
eval_freq = 10_000  # Number of steps per evaluation

wandb_save_freq = 10_000  # Number of steps per evaluation

num_eval_episodes = 5

checkpoint_callback = CheckpointCallback(
    name_prefix="rl_model",
    verbose=2,
    save_freq=save_freq,
    save_path=str(trial_dir / "models")
)
eval_callback = CustomizedEvalCallback(
    eval_env,
    best_model_save_path=str(trial_dir / "eval"),
    log_path=str(trial_dir / "eval"),
    eval_freq=max(eval_freq // num_train_envs, 1),
    n_eval_episodes=num_eval_episodes,
)
callbacks = [checkpoint_callback, eval_callback]
if use_wandb:
    wandb.init(
        project="cs260r",
        id=trial_name,
        name=experiment_batch_name,
        sync_tensorboard=True,
        dir=str(trial_dir),
    )
    callbacks.append(WandbCallback(model_save_path=str(trial_dir / "wandb_models"), model_save_freq=wandb_save_freq))
callbacks = CallbackList(callbacks)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: coltonrowe (coltonrowe-ucla) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [10]:

# ===== Setup the training algorithm =====
# model = SAC(
#     env=train_env,
#     policy=SACPolicy,
#     verbose=2,
#     batch_size = 256,
#     buffer_size = 1000000,
#     learning_rate = 5e-5,
#     gamma=0.98,
#     tau = 0.002,
#     device = "cuda",
#     tensorboard_log=str(trial_dir))
# model = TD3(
#     env=train_env,
#     policy=TD3Policy,
#     learning_rate=1e-3,
#     buffer_size=1_000_000,
#     learning_starts=100,
#     batch_size=256,
#     tau=0.005,
#     gamma = 0.99,
#     train_freq=1,
#     gradient_steps=1,
#     action_noise=None,
#     replay_buffer_class=None,
#     replay_buffer_kwargs=None,
#     optimize_memory_usage=False,
#     policy_delay=2,
#     target_policy_noise=0.2,
#     target_noise_clip=0.5,
#     stats_window_size=100,
#     tensorboard_log=None,
#     policy_kwargs=None,
#     verbose=2,
#     seed=None,
#     device='auto',
#     _init_setup_model=True
#     )
model = PPO(
    env=train_env,
    policy=ActorCriticPolicy,
    n_steps=256,  # n_steps * n_envs = total_batch_size
    n_epochs=20,
    learning_rate=5e-5,
    batch_size=256,
    clip_range=0.1,
    vf_coef=0.5,
    ent_coef=0.0,
    max_grad_norm=10.0,
    tensorboard_log=str(trial_dir),
    verbose=2,
    device="auto",
)


Using cpu device


In [11]:
ckpt = None
if ckpt:
    ckpt = Path(ckpt)
    print(f"Loading checkpoint from {ckpt}!")
    from stable_baselines3.common.save_util import load_from_zip_file
    data, params, pytorch_variables = load_from_zip_file(ckpt, device=model.device, print_system_info=False)
    model.set_parameters(params, exact_match=True, device=model.device)


In [None]:
# ===== Launch training =====
total_timesteps = 2_000_000  # 1M steps
model.learn(
    total_timesteps=total_timesteps,
    callback=callbacks,
    reset_num_timesteps=True,
    tb_log_name=experiment_batch_name,
    log_interval=1,
    progress_bar=True,
)

Logging to runs\ppo_metadrive_1_scenarios\ppo_metadrive_1_scenarios_2025-03-19_13-31-01_e2c76247\ppo_metadrive_1_scenarios_1


-----------------------------
| time/              |      |
|    fps             | 1477 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2560 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 420          |
|    ep_rew_mean          | 0.386        |
| time/                   |              |
|    fps                  | 1392         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 5120         |
| train/                  |              |
|    approx_kl            | 0.0034976683 |
|    clip_fraction        | 0.197        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.84        |
|    explained_variance   | -0.0985      |
|    learning_rate        | 5e-05        |
|    loss                 | -0.00616     |
|    n_updates            | 20           |
|    policy_grad

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 59.8         |
|    mean_reward          | 5.18         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.1489305    |
|    route_completion     | 0.0436       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0030138248 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.156        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.84        |
|    explained_variance   | 0.00642      |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 404      |
|    ep_rew_mean     | 0.224    |
| time/              |          |
|    fps             | 1116     |
|    iterations      | 4        |
|    time_elapsed    | 9        |
|    total_timesteps | 10240    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 404          |
|    ep_rew_mean          | 0.224        |
| time/                   |              |
|    fps                  | 1135         |
|    iterations           | 5            |
|    time_elapsed         | 11           |
|    total_timesteps      | 12800        |
| train/                  |              |
|    approx_kl            | 0.0040975437 |
|    clip_fraction        | 0.259        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.83        |
|    explained_variance   | 0.0122       |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 30.4         |
|    mean_reward          | 1.88         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.21275437   |
|    route_completion     | 0.0389       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 20000        |
| train/                  |              |
|    approx_kl            | 0.0022839517 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.136        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.81        |
|    explained_variance   | -0.00141     |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 40.6         |
|    mean_reward          | 11.4         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.26597214   |
|    route_completion     | 0.0447       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 30000        |
| train/                  |              |
|    approx_kl            | 0.0017422615 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.086        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.78        |
|    explained_variance   | 0.261        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 445      |
|    ep_rew_mean     | 10       |
| time/              |          |
|    fps             | 1055     |
|    iterations      | 12       |
|    time_elapsed    | 29       |
|    total_timesteps | 30720    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 427          |
|    ep_rew_mean          | 10.3         |
| time/                   |              |
|    fps                  | 1064         |
|    iterations           | 13           |
|    time_elapsed         | 31           |
|    total_timesteps      | 33280        |
| train/                  |              |
|    approx_kl            | 0.0015327324 |
|    clip_fraction        | 0.0608       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.78        |
|    explained_variance   | 0.387        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.05         |
|    max_step             | 0            |
|    mean_ep_length       | 58.4         |
|    mean_reward          | 44.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.32368365   |
|    route_completion     | 0.072        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 40000        |
| train/                  |              |
|    approx_kl            | 0.0017607443 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0676       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.75        |
|    explained_variance   | 0.589        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 356      |
|    ep_rew_mean     | 11.6     |
| time/              |          |
|    fps             | 1002     |
|    iterations      | 16       |
|    time_elapsed    | 40       |
|    total_timesteps | 40960    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 334          |
|    ep_rew_mean          | 11.7         |
| time/                   |              |
|    fps                  | 997          |
|    iterations           | 17           |
|    time_elapsed         | 43           |
|    total_timesteps      | 43520        |
| train/                  |              |
|    approx_kl            | 0.0013040805 |
|    clip_fraction        | 0.0478       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.74        |
|    explained_variance   | 0.204        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.04        |
|    max_step             | 0           |
|    mean_ep_length       | 26.8        |
|    mean_reward          | 5.98        |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.3551948   |
|    route_completion     | 0.065       |
|    success_rate         | 0           |
|    total_cost           | 1           |
| time/                   |             |
|    total_timesteps      | 50000       |
| train/                  |             |
|    approx_kl            | 0.001252122 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0393      |
|    clip_range           | 0.1         |
|    crash                | 0           |
|    entropy_loss         | -2.72       |
|    explained_variance   | 0.426       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0333       |
|    max_step             | 0            |
|    mean_ep_length       | 36.8         |
|    mean_reward          | 14.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.37895107   |
|    route_completion     | 0.066        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 60000        |
| train/                  |              |
|    approx_kl            | 0.0011053956 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0278       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.7         |
|    explained_variance   | 0.498        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0286       |
|    max_step             | 0            |
|    mean_ep_length       | 35.8         |
|    mean_reward          | 12.3         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.39718556   |
|    route_completion     | 0.0654       |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 70000        |
| train/                  |              |
|    approx_kl            | 0.0016348973 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0679       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.69        |
|    explained_variance   | 0.388        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.05         |
|    max_step             | 0            |
|    mean_ep_length       | 74.4         |
|    mean_reward          | 68.4         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.41932294   |
|    route_completion     | 0.0833       |
|    success_rate         | 0            |
|    total_cost           | 1.07         |
| time/                   |              |
|    total_timesteps      | 80000        |
| train/                  |              |
|    approx_kl            | 0.0014686355 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0538       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.67        |
|    explained_variance   | 0.688        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | 12.1     |
| time/              |          |
|    fps             | 890      |
|    iterations      | 32       |
|    time_elapsed    | 92       |
|    total_timesteps | 81920    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 104          |
|    ep_rew_mean          | 13.2         |
| time/                   |              |
|    fps                  | 889          |
|    iterations           | 33           |
|    time_elapsed         | 95           |
|    total_timesteps      | 84480        |
| train/                  |              |
|    approx_kl            | 0.0012274153 |
|    clip_fraction        | 0.054        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.66        |
|    explained_variance   | 0.372        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0444       |
|    max_step             | 0            |
|    mean_ep_length       | 65           |
|    mean_reward          | 57.1         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.4295478    |
|    route_completion     | 0.0925       |
|    success_rate         | 0            |
|    total_cost           | 1.07         |
| time/                   |              |
|    total_timesteps      | 90000        |
| train/                  |              |
|    approx_kl            | 0.0008392199 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.65        |
|    explained_variance   | 0.542        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.06        |
|    max_step             | 0           |
|    mean_ep_length       | 113         |
|    mean_reward          | 79.1        |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.4392219   |
|    route_completion     | 0.115       |
|    success_rate         | 0           |
|    total_cost           | 2.98        |
| time/                   |             |
|    total_timesteps      | 100000      |
| train/                  |             |
|    approx_kl            | 0.001374891 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0393      |
|    clip_range           | 0.1         |
|    crash                | 0           |
|    entropy_loss         | -2.62       |
|    explained_variance   | 0.393       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 92.1     |
|    ep_rew_mean     | 15.8     |
| time/              |          |
|    fps             | 841      |
|    iterations      | 40       |
|    time_elapsed    | 121      |
|    total_timesteps | 102400   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 95.7          |
|    ep_rew_mean          | 17.6          |
| time/                   |               |
|    fps                  | 842           |
|    iterations           | 41            |
|    time_elapsed         | 124           |
|    total_timesteps      | 104960        |
| train/                  |               |
|    approx_kl            | 0.00056433864 |
|    clip_fraction        | 0.0104        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.61         |
|    explained_variance   | 0.332         |


------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0545       |
|    max_step             | 0            |
|    mean_ep_length       | 80           |
|    mean_reward          | 83           |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.44685218   |
|    route_completion     | 0.129        |
|    success_rate         | 0            |
|    total_cost           | 2.85         |
| time/                   |              |
|    total_timesteps      | 110000       |
| train/                  |              |
|    approx_kl            | 0.0011694848 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0326       |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.6         |
|    explained_variance   | 0.341        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 109      |
|    ep_rew_mean     | 22.1     |
| time/              |          |
|    fps             | 831      |
|    iterations      | 43       |
|    time_elapsed    | 132      |
|    total_timesteps | 110080   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 103           |
|    ep_rew_mean          | 21.1          |
| time/                   |               |
|    fps                  | 833           |
|    iterations           | 44            |
|    time_elapsed         | 135           |
|    total_timesteps      | 112640        |
| train/                  |               |
|    approx_kl            | 0.00089991046 |
|    clip_fraction        | 0.0219        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.6          |
|    explained_variance   | 0.364         |


-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0             |
|    crash                | 0.0833        |
|    max_step             | 0             |
|    mean_ep_length       | 102           |
|    mean_reward          | 126           |
|    num_episodes         | 5             |
|    out_of_road          | 1             |
|    raw_action           | 0.45386443    |
|    route_completion     | 0.151         |
|    success_rate         | 0             |
|    total_cost           | 2.92          |
| time/                   |               |
|    total_timesteps      | 120000        |
| train/                  |               |
|    approx_kl            | 0.00095700036 |
|    arrive_dest          | 0             |
|    clip_fraction        | 0.0107        |
|    clip_range           | 0.1           |
|    crash                | 0.0833        |
|    entropy_loss         | -2.58         |
|    explained_variance   | 0.33

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 125      |
|    ep_rew_mean     | 27.8     |
| time/              |          |
|    fps             | 823      |
|    iterations      | 47       |
|    time_elapsed    | 146      |
|    total_timesteps | 120320   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 129          |
|    ep_rew_mean          | 29.6         |
| time/                   |              |
|    fps                  | 826          |
|    iterations           | 48           |
|    time_elapsed         | 148          |
|    total_timesteps      | 122880       |
| train/                  |              |
|    approx_kl            | 0.0011453987 |
|    clip_fraction        | 0.0325       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.57        |
|    explained_variance   | 0.252        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.138       |
|    max_step             | 0           |
|    mean_ep_length       | 88          |
|    mean_reward          | 92.4        |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.45959774  |
|    route_completion     | 0.167       |
|    success_rate         | 0           |
|    total_cost           | 2.8         |
| time/                   |             |
|    total_timesteps      | 130000      |
| train/                  |             |
|    approx_kl            | 0.001085806 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0462      |
|    clip_range           | 0.1         |
|    crash                | 0.0769      |
|    entropy_loss         | -2.55       |
|    explained_variance   | 0.327       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.143        |
|    max_step             | 0            |
|    mean_ep_length       | 93.4         |
|    mean_reward          | 96.1         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.46373758   |
|    route_completion     | 0.178        |
|    success_rate         | 0            |
|    total_cost           | 3.01         |
| time/                   |              |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 0.0012948388 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0579       |
|    clip_range           | 0.1          |
|    crash                | 0.0714       |
|    entropy_loss         | -2.54        |
|    explained_variance   | 0.473        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.16        |
|    max_step             | 0           |
|    mean_ep_length       | 88.4        |
|    mean_reward          | 101         |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.46639666  |
|    route_completion     | 0.187       |
|    success_rate         | 0           |
|    total_cost           | 2.88        |
| time/                   |             |
|    total_timesteps      | 150000      |
| train/                  |             |
|    approx_kl            | 0.001826104 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0526      |
|    clip_range           | 0.1         |
|    crash                | 0.0667      |
|    entropy_loss         | -2.52       |
|    explained_variance   | 0.15        |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.163        |
|    max_step             | 0            |
|    mean_ep_length       | 84.8         |
|    mean_reward          | 82.5         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.46882787   |
|    route_completion     | 0.195        |
|    success_rate         | 0            |
|    total_cost           | 2.83         |
| time/                   |              |
|    total_timesteps      | 160000       |
| train/                  |              |
|    approx_kl            | 0.0017821701 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0892       |
|    clip_range           | 0.1          |
|    crash                | 0.0625       |
|    entropy_loss         | -2.5         |
|    explained_variance   | 0.0108       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.176        |
|    max_step             | 0            |
|    mean_ep_length       | 94.8         |
|    mean_reward          | 97.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.4699917    |
|    route_completion     | 0.204        |
|    success_rate         | 0            |
|    total_cost           | 2.81         |
| time/                   |              |
|    total_timesteps      | 170000       |
| train/                  |              |
|    approx_kl            | 0.0014976977 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0746       |
|    clip_range           | 0.1          |
|    crash                | 0.0588       |
|    entropy_loss         | -2.48        |
|    explained_variance   | 0.0792       |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0.167       |
|    max_step             | 0           |
|    mean_ep_length       | 92          |
|    mean_reward          | 103         |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.4666259   |
|    route_completion     | 0.21        |
|    success_rate         | 0           |
|    total_cost           | 2.77        |
| time/                   |             |
|    total_timesteps      | 180000      |
| train/                  |             |
|    approx_kl            | 0.004471784 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.1         |
|    crash                | 0.0556      |
|    entropy_loss         | -2.45       |
|    explained_variance   | 0.0259      |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0105       |
|    crash                | 0.179        |
|    max_step             | 0            |
|    mean_ep_length       | 172          |
|    mean_reward          | 220          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.4628448    |
|    route_completion     | 0.235        |
|    success_rate         | 0.1          |
|    total_cost           | 3.16         |
| time/                   |              |
|    total_timesteps      | 190000       |
| train/                  |              |
|    approx_kl            | 0.0027125317 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0602       |
|    clip_range           | 0.1          |
|    crash                | 0.0632       |
|    entropy_loss         | -2.42        |
|    explained_variance   | 0.0171       |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 293      |
|    ep_rew_mean     | 135      |
| time/              |          |
|    fps             | 802      |
|    iterations      | 75       |
|    time_elapsed    | 239      |
|    total_timesteps | 192000   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 291         |
|    ep_rew_mean          | 138         |
| time/                   |             |
|    fps                  | 803         |
|    iterations           | 76          |
|    time_elapsed         | 242         |
|    total_timesteps      | 194560      |
| train/                  |             |
|    approx_kl            | 0.001609922 |
|    clip_fraction        | 0.0491      |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.41       |
|    explained_variance   | 0.0557      |
|    learning_rate        | 5e

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.01        |
|    crash                | 0.17        |
|    max_step             | 0           |
|    mean_ep_length       | 79.4        |
|    mean_reward          | 78.3        |
|    num_episodes         | 5           |
|    out_of_road          | 0.99        |
|    raw_action           | 0.46275747  |
|    route_completion     | 0.236       |
|    success_rate         | 0           |
|    total_cost           | 3.06        |
| time/                   |             |
|    total_timesteps      | 200000      |
| train/                  |             |
|    approx_kl            | 0.004028824 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0847      |
|    clip_range           | 0.1         |
|    crash                | 0.07        |
|    entropy_loss         | -2.41       |
|    explained_variance   | 0.0441      |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.00952     |
|    crash                | 0.19        |
|    max_step             | 0           |
|    mean_ep_length       | 136         |
|    mean_reward          | 153         |
|    num_episodes         | 5           |
|    out_of_road          | 0.99        |
|    raw_action           | 0.4635569   |
|    route_completion     | 0.25        |
|    success_rate         | 0           |
|    total_cost           | 3.17        |
| time/                   |             |
|    total_timesteps      | 210000      |
| train/                  |             |
|    approx_kl            | 0.000873875 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.0831      |
|    clip_range           | 0.1         |
|    crash                | 0.0952      |
|    entropy_loss         | -2.39       |
|    explained_variance   | 0.126       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00909      |
|    crash                | 0.209        |
|    max_step             | 0            |
|    mean_ep_length       | 140          |
|    mean_reward          | 188          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.46295664   |
|    route_completion     | 0.267        |
|    success_rate         | 0            |
|    total_cost           | 3.21         |
| time/                   |              |
|    total_timesteps      | 220000       |
| train/                  |              |
|    approx_kl            | 0.0010505744 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    crash                | 0.0909       |
|    entropy_loss         | -2.37        |
|    explained_variance   | 0.0715       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0087       |
|    crash                | 0.2          |
|    max_step             | 0            |
|    mean_ep_length       | 85           |
|    mean_reward          | 92.2         |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.46513566   |
|    route_completion     | 0.267        |
|    success_rate         | 0            |
|    total_cost           | 3.13         |
| time/                   |              |
|    total_timesteps      | 230000       |
| train/                  |              |
|    approx_kl            | 0.0017314838 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0925       |
|    clip_range           | 0.1          |
|    crash                | 0.087        |
|    entropy_loss         | -2.34        |
|    explained_variance   | 0.0979       |
|    learni

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.00833    |
|    crash                | 0.2        |
|    max_step             | 0          |
|    mean_ep_length       | 131        |
|    mean_reward          | 162        |
|    num_episodes         | 5          |
|    out_of_road          | 0.992      |
|    raw_action           | 0.46593037 |
|    route_completion     | 0.275      |
|    success_rate         | 0          |
|    total_cost           | 3.08       |
| time/                   |            |
|    total_timesteps      | 240000     |
| train/                  |            |
|    approx_kl            | 0.00632412 |
|    arrive_dest          | 0          |
|    clip_fraction        | 0.118      |
|    clip_range           | 0.1        |
|    crash                | 0.0833     |
|    entropy_loss         | -2.33      |
|    explained_variance   | 0.0192     |
|    learning_rate        | 5e-05      |
|    loss       

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.008        |
|    crash                | 0.208        |
|    max_step             | 0            |
|    mean_ep_length       | 109          |
|    mean_reward          | 114          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.46923858   |
|    route_completion     | 0.28         |
|    success_rate         | 0            |
|    total_cost           | 3.13         |
| time/                   |              |
|    total_timesteps      | 250000       |
| train/                  |              |
|    approx_kl            | 0.0008802861 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0599       |
|    clip_range           | 0.1          |
|    crash                | 0.096        |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.27         |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00769      |
|    crash                | 0.2          |
|    max_step             | 0            |
|    mean_ep_length       | 89.4         |
|    mean_reward          | 86.5         |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.46953472   |
|    route_completion     | 0.279        |
|    success_rate         | 0            |
|    total_cost           | 3.05         |
| time/                   |              |
|    total_timesteps      | 260000       |
| train/                  |              |
|    approx_kl            | 0.0010933296 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0553       |
|    clip_range           | 0.1          |
|    crash                | 0.0923       |
|    entropy_loss         | -2.3         |
|    explained_variance   | 0.138        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00741      |
|    crash                | 0.215        |
|    max_step             | 0            |
|    mean_ep_length       | 103          |
|    mean_reward          | 131          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.47031292   |
|    route_completion     | 0.284        |
|    success_rate         | 0            |
|    total_cost           | 2.98         |
| time/                   |              |
|    total_timesteps      | 270000       |
| train/                  |              |
|    approx_kl            | 0.0025541713 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0708       |
|    clip_range           | 0.1          |
|    crash                | 0.0963       |
|    entropy_loss         | -2.29        |
|    explained_variance   | 0.429        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00714      |
|    crash                | 0.221        |
|    max_step             | 0            |
|    mean_ep_length       | 132          |
|    mean_reward          | 144          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.47072724   |
|    route_completion     | 0.29         |
|    success_rate         | 0            |
|    total_cost           | 3.11         |
| time/                   |              |
|    total_timesteps      | 280000       |
| train/                  |              |
|    approx_kl            | 0.0014957695 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0699       |
|    clip_range           | 0.1          |
|    crash                | 0.107        |
|    entropy_loss         | -2.28        |
|    explained_variance   | 0.627        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0069       |
|    crash                | 0.221        |
|    max_step             | 0            |
|    mean_ep_length       | 120          |
|    mean_reward          | 144          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.47016189   |
|    route_completion     | 0.295        |
|    success_rate         | 0            |
|    total_cost           | 3.08         |
| time/                   |              |
|    total_timesteps      | 290000       |
| train/                  |              |
|    approx_kl            | 0.0041207904 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.117        |
|    clip_range           | 0.1          |
|    crash                | 0.103        |
|    entropy_loss         | -2.26        |
|    explained_variance   | 0.375        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00667      |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 99.8         |
|    mean_reward          | 104          |
|    num_episodes         | 5            |
|    out_of_road          | 0.993        |
|    raw_action           | 0.46953416   |
|    route_completion     | 0.299        |
|    success_rate         | 0            |
|    total_cost           | 3.07         |
| time/                   |              |
|    total_timesteps      | 300000       |
| train/                  |              |
|    approx_kl            | 0.0009066608 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.11         |
|    clip_range           | 0.1          |
|    crash                | 0.107        |
|    entropy_loss         | -2.25        |
|    explained_variance   | 0.0546       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00645      |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 138          |
|    mean_reward          | 163          |
|    num_episodes         | 5            |
|    out_of_road          | 0.994        |
|    raw_action           | 0.4725283    |
|    route_completion     | 0.308        |
|    success_rate         | 0            |
|    total_cost           | 3.16         |
| time/                   |              |
|    total_timesteps      | 310000       |
| train/                  |              |
|    approx_kl            | 0.0018622061 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0862       |
|    clip_range           | 0.1          |
|    crash                | 0.116        |
|    entropy_loss         | -2.23        |
|    explained_variance   | 0.323        |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.00625       |
|    crash                | 0.244         |
|    max_step             | 0             |
|    mean_ep_length       | 127           |
|    mean_reward          | 162           |
|    num_episodes         | 5             |
|    out_of_road          | 0.994         |
|    raw_action           | 0.47002548    |
|    route_completion     | 0.314         |
|    success_rate         | 0             |
|    total_cost           | 3.23          |
| time/                   |               |
|    total_timesteps      | 320000        |
| train/                  |               |
|    approx_kl            | 0.00064169645 |
|    arrive_dest          | 0             |
|    clip_fraction        | 0.0127        |
|    clip_range           | 0.1           |
|    crash                | 0.113         |
|    entropy_loss         | -2.22         |
|    explained_variance   | 0.24

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0121       |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 118          |
|    mean_reward          | 137          |
|    num_episodes         | 5            |
|    out_of_road          | 0.988        |
|    raw_action           | 0.46929067   |
|    route_completion     | 0.318        |
|    success_rate         | 0.1          |
|    total_cost           | 3.26         |
| time/                   |              |
|    total_timesteps      | 330000       |
| train/                  |              |
|    approx_kl            | 0.0068990765 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.162        |
|    clip_range           | 0.1          |
|    crash                | 0.139        |
|    entropy_loss         | -2.21        |
|    explained_variance   | 0.299        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0118       |
|    crash                | 0.241        |
|    max_step             | 0            |
|    mean_ep_length       | 126          |
|    mean_reward          | 147          |
|    num_episodes         | 5            |
|    out_of_road          | 0.988        |
|    raw_action           | 0.46954674   |
|    route_completion     | 0.319        |
|    success_rate         | 0            |
|    total_cost           | 3.28         |
| time/                   |              |
|    total_timesteps      | 340000       |
| train/                  |              |
|    approx_kl            | 0.0011171727 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0649       |
|    clip_range           | 0.1          |
|    crash                | 0.153        |
|    entropy_loss         | -2.21        |
|    explained_variance   | 0.436        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0114       |
|    crash                | 0.24         |
|    max_step             | 0            |
|    mean_ep_length       | 153          |
|    mean_reward          | 127          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.4697273    |
|    route_completion     | 0.322        |
|    success_rate         | 0            |
|    total_cost           | 3.87         |
| time/                   |              |
|    total_timesteps      | 350000       |
| train/                  |              |
|    approx_kl            | 0.0017137539 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.12         |
|    clip_range           | 0.1          |
|    crash                | 0.154        |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.364        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0111       |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 102          |
|    mean_reward          | 113          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.46903703   |
|    route_completion     | 0.321        |
|    success_rate         | 0            |
|    total_cost           | 3.82         |
| time/                   |              |
|    total_timesteps      | 360000       |
| train/                  |              |
|    approx_kl            | 0.0017615821 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.154        |
|    clip_range           | 0.1          |
|    crash                | 0.161        |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.198        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0162       |
|    crash                | 0.243        |
|    max_step             | 0            |
|    mean_ep_length       | 146          |
|    mean_reward          | 172          |
|    num_episodes         | 5            |
|    out_of_road          | 0.984        |
|    raw_action           | 0.4687788    |
|    route_completion     | 0.328        |
|    success_rate         | 0.1          |
|    total_cost           | 3.88         |
| time/                   |              |
|    total_timesteps      | 370000       |
| train/                  |              |
|    approx_kl            | 0.0010183941 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.192        |
|    clip_range           | 0.1          |
|    crash                | 0.173        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.352        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0211       |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 183          |
|    mean_reward          | 135          |
|    num_episodes         | 5            |
|    out_of_road          | 0.979        |
|    raw_action           | 0.4675228    |
|    route_completion     | 0.333        |
|    success_rate         | 0.1          |
|    total_cost           | 4.59         |
| time/                   |              |
|    total_timesteps      | 380000       |
| train/                  |              |
|    approx_kl            | 0.0010839336 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0689       |
|    clip_range           | 0.1          |
|    crash                | 0.179        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.341        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0205       |
|    crash                | 0.251        |
|    max_step             | 0            |
|    mean_ep_length       | 91.8         |
|    mean_reward          | 102          |
|    num_episodes         | 5            |
|    out_of_road          | 0.979        |
|    raw_action           | 0.46860245   |
|    route_completion     | 0.336        |
|    success_rate         | 0            |
|    total_cost           | 4.53         |
| time/                   |              |
|    total_timesteps      | 390000       |
| train/                  |              |
|    approx_kl            | 0.0021319962 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0661       |
|    clip_range           | 0.1          |
|    crash                | 0.185        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.73         |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.02         |
|    crash                | 0.245        |
|    max_step             | 0            |
|    mean_ep_length       | 156          |
|    mean_reward          | 143          |
|    num_episodes         | 5            |
|    out_of_road          | 0.98         |
|    raw_action           | 0.46882      |
|    route_completion     | 0.339        |
|    success_rate         | 0            |
|    total_cost           | 4.75         |
| time/                   |              |
|    total_timesteps      | 400000       |
| train/                  |              |
|    approx_kl            | 0.0015563723 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0448       |
|    clip_range           | 0.1          |
|    crash                | 0.19         |
|    entropy_loss         | -2.16        |
|    explained_variance   | 0.443        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0244       |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 183          |
|    mean_reward          | 213          |
|    num_episodes         | 5            |
|    out_of_road          | 0.976        |
|    raw_action           | 0.46870154   |
|    route_completion     | 0.346        |
|    success_rate         | 0.1          |
|    total_cost           | 5.05         |
| time/                   |              |
|    total_timesteps      | 410000       |
| train/                  |              |
|    approx_kl            | 0.0021204012 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.126        |
|    clip_range           | 0.1          |
|    crash                | 0.185        |
|    entropy_loss         | -2.15        |
|    explained_variance   | 0.337        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0286      |
|    crash                | 0.238       |
|    max_step             | 0           |
|    mean_ep_length       | 215         |
|    mean_reward          | 170         |
|    num_episodes         | 5           |
|    out_of_road          | 0.971       |
|    raw_action           | 0.590944    |
|    route_completion     | 0.352       |
|    success_rate         | 0.1         |
|    total_cost           | 5.74        |
| time/                   |             |
|    total_timesteps      | 420000      |
| train/                  |             |
|    approx_kl            | 0.006762162 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.1         |
|    crash                | 0.19        |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.254       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0326       |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 183          |
|    mean_reward          | 169          |
|    num_episodes         | 5            |
|    out_of_road          | 0.967        |
|    raw_action           | 0.5889123    |
|    route_completion     | 0.357        |
|    success_rate         | 0.1          |
|    total_cost           | 5.96         |
| time/                   |              |
|    total_timesteps      | 430000       |
| train/                  |              |
|    approx_kl            | 0.0022589853 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0871       |
|    clip_range           | 0.1          |
|    crash                | 0.195        |
|    entropy_loss         | -2.13        |
|    explained_variance   | 0.788        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0318      |
|    crash                | 0.245       |
|    max_step             | 0           |
|    mean_ep_length       | 117         |
|    mean_reward          | 143         |
|    num_episodes         | 5           |
|    out_of_road          | 0.968       |
|    raw_action           | 0.5871543   |
|    route_completion     | 0.359       |
|    success_rate         | 0           |
|    total_cost           | 5.87        |
| time/                   |             |
|    total_timesteps      | 440000      |
| train/                  |             |
|    approx_kl            | 0.001377874 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.1         |
|    crash                | 0.205       |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.431       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0311       |
|    crash                | 0.249        |
|    max_step             | 0            |
|    mean_ep_length       | 73.6         |
|    mean_reward          | 68.9         |
|    num_episodes         | 5            |
|    out_of_road          | 0.969        |
|    raw_action           | 0.58546346   |
|    route_completion     | 0.356        |
|    success_rate         | 0            |
|    total_cost           | 5.76         |
| time/                   |              |
|    total_timesteps      | 450000       |
| train/                  |              |
|    approx_kl            | 0.0013443971 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0758       |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.12        |
|    explained_variance   | 0.684        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0304       |
|    crash                | 0.248        |
|    max_step             | 0            |
|    mean_ep_length       | 128          |
|    mean_reward          | 129          |
|    num_episodes         | 5            |
|    out_of_road          | 0.97         |
|    raw_action           | 0.58365595   |
|    route_completion     | 0.357        |
|    success_rate         | 0            |
|    total_cost           | 5.79         |
| time/                   |              |
|    total_timesteps      | 460000       |
| train/                  |              |
|    approx_kl            | 0.0022526602 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.0987       |
|    clip_range           | 0.1          |
|    crash                | 0.204        |
|    entropy_loss         | -2.11        |
|    explained_variance   | 0.475        |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.034         |
|    crash                | 0.255         |
|    max_step             | 0             |
|    mean_ep_length       | 194           |
|    mean_reward          | 192           |
|    num_episodes         | 5             |
|    out_of_road          | 0.966         |
|    raw_action           | 0.5818018     |
|    route_completion     | 0.362         |
|    success_rate         | 0.1           |
|    total_cost           | 6.07          |
| time/                   |               |
|    total_timesteps      | 470000        |
| train/                  |               |
|    approx_kl            | 0.00095049496 |
|    arrive_dest          | 0             |
|    clip_fraction        | 0.104         |
|    clip_range           | 0.1           |
|    crash                | 0.204         |
|    entropy_loss         | -2.09         |
|    explained_variance   | 0.31

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.0417     |
|    crash                | 0.25       |
|    max_step             | 0          |
|    mean_ep_length       | 185        |
|    mean_reward          | 221        |
|    num_episodes         | 5          |
|    out_of_road          | 0.958      |
|    raw_action           | 0.58020914 |
|    route_completion     | 0.368      |
|    success_rate         | 0.2        |
|    total_cost           | 6.15       |
| time/                   |            |
|    total_timesteps      | 480000     |
| train/                  |            |
|    approx_kl            | 0.01506494 |
|    arrive_dest          | 0          |
|    clip_fraction        | 0.163      |
|    clip_range           | 0.1        |
|    crash                | 0.2        |
|    entropy_loss         | -2.09      |
|    explained_variance   | 0.645      |
|    learning_rate        | 5e-05      |
|    loss       

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 414      |
|    ep_rew_mean     | 347      |
| time/              |          |
|    fps             | 341      |
|    iterations      | 188      |
|    time_elapsed    | 1410     |
|    total_timesteps | 481280   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 412          |
|    ep_rew_mean          | 348          |
| time/                   |              |
|    fps                  | 342          |
|    iterations           | 189          |
|    time_elapsed         | 1414         |
|    total_timesteps      | 483840       |
| train/                  |              |
|    approx_kl            | 0.0012704583 |
|    clip_fraction        | 0.0737       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.09        |
|    explained_variance   | 0.412        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0408       |
|    crash                | 0.253        |
|    max_step             | 0            |
|    mean_ep_length       | 113          |
|    mean_reward          | 138          |
|    num_episodes         | 5            |
|    out_of_road          | 0.959        |
|    raw_action           | 0.57850254   |
|    route_completion     | 0.37         |
|    success_rate         | 0            |
|    total_cost           | 6.09         |
| time/                   |              |
|    total_timesteps      | 490000       |
| train/                  |              |
|    approx_kl            | 0.0016155582 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.076        |
|    clip_range           | 0.1          |
|    crash                | 0.204        |
|    entropy_loss         | -2.1         |
|    explained_variance   | 0.646        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.052       |
|    crash                | 0.248       |
|    max_step             | 0           |
|    mean_ep_length       | 220         |
|    mean_reward          | 162         |
|    num_episodes         | 5           |
|    out_of_road          | 0.948       |
|    raw_action           | 0.5765129   |
|    route_completion     | 0.377       |
|    success_rate         | 0.5         |
|    total_cost           | 6.83        |
| time/                   |             |
|    total_timesteps      | 500000      |
| train/                  |             |
|    approx_kl            | 0.004152738 |
|    arrive_dest          | 0.008       |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.1         |
|    crash                | 0.208       |
|    entropy_loss         | -2.09       |
|    explained_variance   | 0.722       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0549       |
|    crash                | 0.251        |
|    max_step             | 0            |
|    mean_ep_length       | 171          |
|    mean_reward          | 143          |
|    num_episodes         | 5            |
|    out_of_road          | 0.945        |
|    raw_action           | 0.5753163    |
|    route_completion     | 0.38         |
|    success_rate         | 0.1          |
|    total_cost           | 7.18         |
| time/                   |              |
|    total_timesteps      | 510000       |
| train/                  |              |
|    approx_kl            | 0.0018049134 |
|    arrive_dest          | 0.00784      |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.1          |
|    crash                | 0.212        |
|    entropy_loss         | -2.08        |
|    explained_variance   | 0.713        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0577       |
|    crash                | 0.246        |
|    max_step             | 0            |
|    mean_ep_length       | 149          |
|    mean_reward          | 167          |
|    num_episodes         | 5            |
|    out_of_road          | 0.942        |
|    raw_action           | 0.57425916   |
|    route_completion     | 0.383        |
|    success_rate         | 0.1          |
|    total_cost           | 7.25         |
| time/                   |              |
|    total_timesteps      | 520000       |
| train/                  |              |
|    approx_kl            | 0.0015058592 |
|    arrive_dest          | 0.00769      |
|    clip_fraction        | 0.172        |
|    clip_range           | 0.1          |
|    crash                | 0.208        |
|    entropy_loss         | -2.07        |
|    explained_variance   | 0.795        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0604      |
|    crash                | 0.242       |
|    max_step             | 0           |
|    mean_ep_length       | 141         |
|    mean_reward          | 106         |
|    num_episodes         | 5           |
|    out_of_road          | 0.94        |
|    raw_action           | 0.5729673   |
|    route_completion     | 0.384       |
|    success_rate         | 0.3         |
|    total_cost           | 7.5         |
| time/                   |             |
|    total_timesteps      | 530000      |
| train/                  |             |
|    approx_kl            | 0.004115051 |
|    arrive_dest          | 0.0151      |
|    clip_fraction        | 0.0905      |
|    clip_range           | 0.1         |
|    crash                | 0.208       |
|    entropy_loss         | -2.08       |
|    explained_variance   | 0.834       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0593       |
|    crash                | 0.241        |
|    max_step             | 0            |
|    mean_ep_length       | 148          |
|    mean_reward          | 182          |
|    num_episodes         | 5            |
|    out_of_road          | 0.941        |
|    raw_action           | 0.5718364    |
|    route_completion     | 0.385        |
|    success_rate         | 0            |
|    total_cost           | 7.49         |
| time/                   |              |
|    total_timesteps      | 540000       |
| train/                  |              |
|    approx_kl            | 0.0024224464 |
|    arrive_dest          | 0.0148       |
|    clip_fraction        | 0.129        |
|    clip_range           | 0.1          |
|    crash                | 0.204        |
|    entropy_loss         | -2.07        |
|    explained_variance   | 0.703        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0582       |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 219          |
|    mean_reward          | 245          |
|    num_episodes         | 5            |
|    out_of_road          | 0.942        |
|    raw_action           | 0.57033163   |
|    route_completion     | 0.391        |
|    success_rate         | 0.1          |
|    total_cost           | 7.8          |
| time/                   |              |
|    total_timesteps      | 550000       |
| train/                  |              |
|    approx_kl            | 0.0025656873 |
|    arrive_dest          | 0.0182       |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    crash                | 0.204        |
|    entropy_loss         | -2.07        |
|    explained_variance   | 0.715        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 418      |
|    ep_rew_mean     | 361      |
| time/              |          |
|    fps             | 358      |
|    iterations      | 215      |
|    time_elapsed    | 1536     |
|    total_timesteps | 550400   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 414          |
|    ep_rew_mean          | 360          |
| time/                   |              |
|    fps                  | 359          |
|    iterations           | 216          |
|    time_elapsed         | 1539         |
|    total_timesteps      | 552960       |
| train/                  |              |
|    approx_kl            | 0.0012941146 |
|    clip_fraction        | 0.185        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.06        |
|    explained_variance   | 0.658        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0607      |
|    crash                | 0.246       |
|    max_step             | 0           |
|    mean_ep_length       | 156         |
|    mean_reward          | 157         |
|    num_episodes         | 5           |
|    out_of_road          | 0.939       |
|    raw_action           | 0.56899124  |
|    route_completion     | 0.395       |
|    success_rate         | 0.2         |
|    total_cost           | 7.88        |
| time/                   |             |
|    total_timesteps      | 560000      |
| train/                  |             |
|    approx_kl            | 0.002214248 |
|    arrive_dest          | 0.0214      |
|    clip_fraction        | 0.184       |
|    clip_range           | 0.1         |
|    crash                | 0.2         |
|    entropy_loss         | -2.05       |
|    explained_variance   | 0.773       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0702       |
|    crash                | 0.246        |
|    max_step             | 0            |
|    mean_ep_length       | 301          |
|    mean_reward          | 181          |
|    num_episodes         | 5            |
|    out_of_road          | 0.93         |
|    raw_action           | 0.5674536    |
|    route_completion     | 0.401        |
|    success_rate         | 0.5          |
|    total_cost           | 9.01         |
| time/                   |              |
|    total_timesteps      | 570000       |
| train/                  |              |
|    approx_kl            | 0.0010038359 |
|    arrive_dest          | 0.0281       |
|    clip_fraction        | 0.0587       |
|    clip_range           | 0.1          |
|    crash                | 0.204        |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.704        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0759       |
|    crash                | 0.248        |
|    max_step             | 0            |
|    mean_ep_length       | 225          |
|    mean_reward          | 173          |
|    num_episodes         | 5            |
|    out_of_road          | 0.924        |
|    raw_action           | 0.566014     |
|    route_completion     | 0.407        |
|    success_rate         | 0.3          |
|    total_cost           | 9.63         |
| time/                   |              |
|    total_timesteps      | 580000       |
| train/                  |              |
|    approx_kl            | 0.0034133862 |
|    arrive_dest          | 0.031        |
|    clip_fraction        | 0.0862       |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.657        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0746      |
|    crash                | 0.247       |
|    max_step             | 0           |
|    mean_ep_length       | 204         |
|    mean_reward          | 256         |
|    num_episodes         | 5           |
|    out_of_road          | 0.925       |
|    raw_action           | 0.56459624  |
|    route_completion     | 0.414       |
|    success_rate         | 0.2         |
|    total_cost           | 9.75        |
| time/                   |             |
|    total_timesteps      | 590000      |
| train/                  |             |
|    approx_kl            | 0.001731599 |
|    arrive_dest          | 0.0373      |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.1         |
|    crash                | 0.197       |
|    entropy_loss         | -2.04       |
|    explained_variance   | 0.507       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 463      |
|    ep_rew_mean     | 399      |
| time/              |          |
|    fps             | 365      |
|    iterations      | 231      |
|    time_elapsed    | 1618     |
|    total_timesteps | 591360   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 455          |
|    ep_rew_mean          | 394          |
| time/                   |              |
|    fps                  | 366          |
|    iterations           | 232          |
|    time_elapsed         | 1621         |
|    total_timesteps      | 593920       |
| train/                  |              |
|    approx_kl            | 0.0019520961 |
|    clip_fraction        | 0.146        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.04        |
|    explained_variance   | 0.51         |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0767       |
|    crash                | 0.243        |
|    max_step             | 0            |
|    mean_ep_length       | 178          |
|    mean_reward          | 156          |
|    num_episodes         | 5            |
|    out_of_road          | 0.923        |
|    raw_action           | 0.5632633    |
|    route_completion     | 0.417        |
|    success_rate         | 0.3          |
|    total_cost           | 9.93         |
| time/                   |              |
|    total_timesteps      | 600000       |
| train/                  |              |
|    approx_kl            | 0.0038559367 |
|    arrive_dest          | 0.0433       |
|    clip_fraction        | 0.105        |
|    clip_range           | 0.1          |
|    crash                | 0.193        |
|    entropy_loss         | -2.04        |
|    explained_variance   | 0.724        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0754      |
|    crash                | 0.246       |
|    max_step             | 0           |
|    mean_ep_length       | 78          |
|    mean_reward          | 70.2        |
|    num_episodes         | 5           |
|    out_of_road          | 0.925       |
|    raw_action           | 0.5624381   |
|    route_completion     | 0.414       |
|    success_rate         | 0.1         |
|    total_cost           | 9.78        |
| time/                   |             |
|    total_timesteps      | 610000      |
| train/                  |             |
|    approx_kl            | 0.002923158 |
|    arrive_dest          | 0.0459      |
|    clip_fraction        | 0.258       |
|    clip_range           | 0.1         |
|    crash                | 0.19        |
|    entropy_loss         | -2.04       |
|    explained_variance   | 0.819       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0774       |
|    crash                | 0.245        |
|    max_step             | 0            |
|    mean_ep_length       | 167          |
|    mean_reward          | 134          |
|    num_episodes         | 5            |
|    out_of_road          | 0.923        |
|    raw_action           | 0.5613916    |
|    route_completion     | 0.415        |
|    success_rate         | 0.4          |
|    total_cost           | 10.1         |
| time/                   |              |
|    total_timesteps      | 620000       |
| train/                  |              |
|    approx_kl            | 0.0009341057 |
|    arrive_dest          | 0.0548       |
|    clip_fraction        | 0.0517       |
|    clip_range           | 0.1          |
|    crash                | 0.19         |
|    entropy_loss         | -2.04        |
|    explained_variance   | 0.758        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0762       |
|    crash                | 0.251        |
|    max_step             | 0            |
|    mean_ep_length       | 159          |
|    mean_reward          | 208          |
|    num_episodes         | 5            |
|    out_of_road          | 0.924        |
|    raw_action           | 0.55979544   |
|    route_completion     | 0.419        |
|    success_rate         | 0.1          |
|    total_cost           | 10           |
| time/                   |              |
|    total_timesteps      | 630000       |
| train/                  |              |
|    approx_kl            | 0.0026156558 |
|    arrive_dest          | 0.0571       |
|    clip_fraction        | 0.232        |
|    clip_range           | 0.1          |
|    crash                | 0.194        |
|    entropy_loss         | -2.03        |
|    explained_variance   | 0.825        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.075       |
|    crash                | 0.253       |
|    max_step             | 0           |
|    mean_ep_length       | 120         |
|    mean_reward          | 164         |
|    num_episodes         | 5           |
|    out_of_road          | 0.925       |
|    raw_action           | 0.5586368   |
|    route_completion     | 0.42        |
|    success_rate         | 0.1         |
|    total_cost           | 9.88        |
| time/                   |             |
|    total_timesteps      | 640000      |
| train/                  |             |
|    approx_kl            | 0.028421124 |
|    arrive_dest          | 0.0594      |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.1         |
|    crash                | 0.194       |
|    entropy_loss         | -2.02       |
|    explained_variance   | 0.847       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.08         |
|    crash                | 0.255        |
|    max_step             | 0            |
|    mean_ep_length       | 246          |
|    mean_reward          | 230          |
|    num_episodes         | 5            |
|    out_of_road          | 0.92         |
|    raw_action           | 0.5575165    |
|    route_completion     | 0.426        |
|    success_rate         | 0.3          |
|    total_cost           | 10.3         |
| time/                   |              |
|    total_timesteps      | 650000       |
| train/                  |              |
|    approx_kl            | 0.0012910704 |
|    arrive_dest          | 0.0615       |
|    clip_fraction        | 0.124        |
|    clip_range           | 0.1          |
|    crash                | 0.191        |
|    entropy_loss         | -2.02        |
|    explained_variance   | 0.85         |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0848       |
|    crash                | 0.255        |
|    max_step             | 0            |
|    mean_ep_length       | 278          |
|    mean_reward          | 128          |
|    num_episodes         | 5            |
|    out_of_road          | 0.915        |
|    raw_action           | 0.55627507   |
|    route_completion     | 0.43         |
|    success_rate         | 0.5          |
|    total_cost           | 11.4         |
| time/                   |              |
|    total_timesteps      | 660000       |
| train/                  |              |
|    approx_kl            | 0.0020631265 |
|    arrive_dest          | 0.0697       |
|    clip_fraction        | 0.212        |
|    clip_range           | 0.1          |
|    crash                | 0.194        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.765        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0836       |
|    crash                | 0.251        |
|    max_step             | 0            |
|    mean_ep_length       | 195          |
|    mean_reward          | 198          |
|    num_episodes         | 5            |
|    out_of_road          | 0.916        |
|    raw_action           | 0.5554842    |
|    route_completion     | 0.432        |
|    success_rate         | 0            |
|    total_cost           | 11.4         |
| time/                   |              |
|    total_timesteps      | 670000       |
| train/                  |              |
|    approx_kl            | 0.0017936614 |
|    arrive_dest          | 0.0687       |
|    clip_fraction        | 0.127        |
|    clip_range           | 0.1          |
|    crash                | 0.194        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.835        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0882       |
|    crash                | 0.253        |
|    max_step             | 0            |
|    mean_ep_length       | 190          |
|    mean_reward          | 144          |
|    num_episodes         | 5            |
|    out_of_road          | 0.912        |
|    raw_action           | 0.5545048    |
|    route_completion     | 0.436        |
|    success_rate         | 0.4          |
|    total_cost           | 11.9         |
| time/                   |              |
|    total_timesteps      | 680000       |
| train/                  |              |
|    approx_kl            | 0.0014280205 |
|    arrive_dest          | 0.0735       |
|    clip_fraction        | 0.057        |
|    clip_range           | 0.1          |
|    crash                | 0.191        |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.749        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0928      |
|    crash                | 0.255       |
|    max_step             | 0           |
|    mean_ep_length       | 212         |
|    mean_reward          | 161         |
|    num_episodes         | 5           |
|    out_of_road          | 0.907       |
|    raw_action           | 0.553953    |
|    route_completion     | 0.44        |
|    success_rate         | 0.2         |
|    total_cost           | 12.4        |
| time/                   |             |
|    total_timesteps      | 690000      |
| train/                  |             |
|    approx_kl            | 0.003985566 |
|    arrive_dest          | 0.0725      |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.1         |
|    crash                | 0.188       |
|    entropy_loss         | -2.01       |
|    explained_variance   | 0.928       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0971       |
|    crash                | 0.254        |
|    max_step             | 0            |
|    mean_ep_length       | 204          |
|    mean_reward          | 177          |
|    num_episodes         | 5            |
|    out_of_road          | 0.903        |
|    raw_action           | 0.5530188    |
|    route_completion     | 0.443        |
|    success_rate         | 0.2          |
|    total_cost           | 12.7         |
| time/                   |              |
|    total_timesteps      | 700000       |
| train/                  |              |
|    approx_kl            | 0.0016122315 |
|    arrive_dest          | 0.0714       |
|    clip_fraction        | 0.0977       |
|    clip_range           | 0.1          |
|    crash                | 0.186        |
|    entropy_loss         | -2.02        |
|    explained_variance   | 0.789        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.101       |
|    crash                | 0.256       |
|    max_step             | 0           |
|    mean_ep_length       | 216         |
|    mean_reward          | 198         |
|    num_episodes         | 5           |
|    out_of_road          | 0.899       |
|    raw_action           | 0.5521048   |
|    route_completion     | 0.447       |
|    success_rate         | 0.2         |
|    total_cost           | 13          |
| time/                   |             |
|    total_timesteps      | 710000      |
| train/                  |             |
|    approx_kl            | 0.001629803 |
|    arrive_dest          | 0.0704      |
|    clip_fraction        | 0.0894      |
|    clip_range           | 0.1         |
|    crash                | 0.183       |
|    entropy_loss         | -2.01       |
|    explained_variance   | 0.699       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.103       |
|    crash                | 0.256       |
|    max_step             | 0           |
|    mean_ep_length       | 192         |
|    mean_reward          | 146         |
|    num_episodes         | 5           |
|    out_of_road          | 0.897       |
|    raw_action           | 0.55125487  |
|    route_completion     | 0.447       |
|    success_rate         | 0.4         |
|    total_cost           | 13.2        |
| time/                   |             |
|    total_timesteps      | 720000      |
| train/                  |             |
|    approx_kl            | 0.002705874 |
|    arrive_dest          | 0.0778      |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.1         |
|    crash                | 0.181       |
|    entropy_loss         | -2          |
|    explained_variance   | 0.837       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.107       |
|    crash                | 0.255       |
|    max_step             | 0           |
|    mean_ep_length       | 270         |
|    mean_reward          | 198         |
|    num_episodes         | 5           |
|    out_of_road          | 0.893       |
|    raw_action           | 0.5500409   |
|    route_completion     | 0.45        |
|    success_rate         | 0.4         |
|    total_cost           | 13.8        |
| time/                   |             |
|    total_timesteps      | 730000      |
| train/                  |             |
|    approx_kl            | 0.003289301 |
|    arrive_dest          | 0.0822      |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.1         |
|    crash                | 0.184       |
|    entropy_loss         | -1.99       |
|    explained_variance   | 0.873       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.105        |
|    crash                | 0.257        |
|    max_step             | 0            |
|    mean_ep_length       | 144          |
|    mean_reward          | 192          |
|    num_episodes         | 5            |
|    out_of_road          | 0.895        |
|    raw_action           | 0.54913783   |
|    route_completion     | 0.452        |
|    success_rate         | 0.1          |
|    total_cost           | 13.6         |
| time/                   |              |
|    total_timesteps      | 740000       |
| train/                  |              |
|    approx_kl            | 0.0040416652 |
|    arrive_dest          | 0.0838       |
|    clip_fraction        | 0.13         |
|    clip_range           | 0.1          |
|    crash                | 0.181        |
|    entropy_loss         | -1.99        |
|    explained_variance   | 0.864        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.109        |
|    crash                | 0.253        |
|    max_step             | 0            |
|    mean_ep_length       | 163          |
|    mean_reward          | 175          |
|    num_episodes         | 5            |
|    out_of_road          | 0.891        |
|    raw_action           | 0.54820544   |
|    route_completion     | 0.454        |
|    success_rate         | 0.3          |
|    total_cost           | 13.6         |
| time/                   |              |
|    total_timesteps      | 750000       |
| train/                  |              |
|    approx_kl            | 0.0010532858 |
|    arrive_dest          | 0.0853       |
|    clip_fraction        | 0.0979       |
|    clip_range           | 0.1          |
|    crash                | 0.179        |
|    entropy_loss         | -1.98        |
|    explained_variance   | 0.774        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.108       |
|    crash                | 0.255       |
|    max_step             | 0           |
|    mean_ep_length       | 195         |
|    mean_reward          | 166         |
|    num_episodes         | 5           |
|    out_of_road          | 0.892       |
|    raw_action           | 0.5475573   |
|    route_completion     | 0.455       |
|    success_rate         | 0           |
|    total_cost           | 13.8        |
| time/                   |             |
|    total_timesteps      | 760000      |
| train/                  |             |
|    approx_kl            | 0.011550272 |
|    arrive_dest          | 0.0842      |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.1         |
|    crash                | 0.176       |
|    entropy_loss         | -1.97       |
|    explained_variance   | 0.825       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.106        |
|    crash                | 0.252        |
|    max_step             | 0            |
|    mean_ep_length       | 100          |
|    mean_reward          | 120          |
|    num_episodes         | 5            |
|    out_of_road          | 0.894        |
|    raw_action           | 0.54674405   |
|    route_completion     | 0.454        |
|    success_rate         | 0            |
|    total_cost           | 13.7         |
| time/                   |              |
|    total_timesteps      | 770000       |
| train/                  |              |
|    approx_kl            | 0.0012631483 |
|    arrive_dest          | 0.0831       |
|    clip_fraction        | 0.0708       |
|    clip_range           | 0.1          |
|    crash                | 0.174        |
|    entropy_loss         | -1.97        |
|    explained_variance   | 0.762        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.105        |
|    crash                | 0.254        |
|    max_step             | 0            |
|    mean_ep_length       | 149          |
|    mean_reward          | 185          |
|    num_episodes         | 5            |
|    out_of_road          | 0.895        |
|    raw_action           | 0.5464452    |
|    route_completion     | 0.455        |
|    success_rate         | 0.2          |
|    total_cost           | 13.6         |
| time/                   |              |
|    total_timesteps      | 780000       |
| train/                  |              |
|    approx_kl            | 0.0061455774 |
|    arrive_dest          | 0.0872       |
|    clip_fraction        | 0.104        |
|    clip_range           | 0.1          |
|    crash                | 0.174        |
|    entropy_loss         | -1.97        |
|    explained_variance   | 0.881        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.109        |
|    crash                | 0.253        |
|    max_step             | 0            |
|    mean_ep_length       | 277          |
|    mean_reward          | 227          |
|    num_episodes         | 5            |
|    out_of_road          | 0.891        |
|    raw_action           | 0.5451493    |
|    route_completion     | 0.459        |
|    success_rate         | 0.5          |
|    total_cost           | 13.8         |
| time/                   |              |
|    total_timesteps      | 790000       |
| train/                  |              |
|    approx_kl            | 0.0051647453 |
|    arrive_dest          | 0.0937       |
|    clip_fraction        | 0.376        |
|    clip_range           | 0.1          |
|    crash                | 0.175        |
|    entropy_loss         | -1.97        |
|    explained_variance   | 0.912        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.107        |
|    crash                | 0.25         |
|    max_step             | 0            |
|    mean_ep_length       | 89.4         |
|    mean_reward          | 99.6         |
|    num_episodes         | 5            |
|    out_of_road          | 0.892        |
|    raw_action           | 0.54489666   |
|    route_completion     | 0.456        |
|    success_rate         | 0            |
|    total_cost           | 13.6         |
| time/                   |              |
|    total_timesteps      | 800000       |
| train/                  |              |
|    approx_kl            | 0.0025254448 |
|    arrive_dest          | 0.0925       |
|    clip_fraction        | 0.204        |
|    clip_range           | 0.1          |
|    crash                | 0.172        |
|    entropy_loss         | -1.97        |
|    explained_variance   | 0.894        |
|    learni