In [None]:
SCENARIOS = 10
VALUATION_SCENARIOS = 50

In [2]:
!pip install stable_baselines3[extra]



In [3]:
!pip install git+https://github.com/metadriverse/metadrive.git

Collecting git+https://github.com/metadriverse/metadrive.git
  Cloning https://github.com/metadriverse/metadrive.git to c:\users\colton\appdata\local\temp\pip-req-build-icpffgkz
  Resolved https://github.com/metadriverse/metadrive.git to commit a7f7d0b6fcf9b7422f9a9cd5c674fb1661c06a7e
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/metadriverse/metadrive.git 'C:\Users\Colton\AppData\Local\Temp\pip-req-build-icpffgkz'


## RL Environment

In [None]:
import copy
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv

DEFAULT_CONFIG = {
    # The below are default configs copied from SafeMetaDriveEnv
    # Environment difficulty
    "accident_prob": 0.8,
    "traffic_density": 0.05,
    # Termination conditions
    "crash_vehicle_done": False,
    "crash_object_done": False,
    # Reward
    "success_reward": 10.0,
    "driving_reward": 1.0,
    "speed_reward": 0.1,
    # Penalty will be negated and added to reward
    "out_of_road_penalty": 5.0,
    "crash_vehicle_penalty": 1.0,
    "crash_object_penalty": 1.0,
    # Cost will be return in info["cost"] and you can do constrained optimization with it
    "crash_vehicle_cost": 1.0,
    "crash_object_cost": 1.0,
    "out_of_road_cost": 1.0,
}

# Use deepcopy to avoid modifying the DEFAULT_CONFIG
TRAINING_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
TRAINING_CONFIG.update(
    {  # Environment setting
        "num_scenarios": SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 100,  # We will use the map with seeds in [100, 150) as the default training environment.
    }
)


def get_training_env(extra_config=None):
    config = copy.deepcopy(TRAINING_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


VALIDATION_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
VALIDATION_CONFIG.update(
    {  # Environment setting
        "num_scenarios": VALUATION_SCENARIOS,  # There are totally 50 possible maps.
        "start_seed": 1000,  # We will use the map with seeds in [1000, 1050) as the default validation environment.
    }
)


def get_validation_env(extra_config=None):
    config = copy.deepcopy(VALIDATION_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


## Import and utilities

In [5]:
import argparse
import datetime
import logging
import os
import uuid
from collections import defaultdict
from pathlib import Path

import numpy as np
from metadrive.engine.logger import set_log_level
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO
from stable_baselines3.ppo.policies import ActorCriticPolicy

from stable_baselines3.td3 import TD3
from stable_baselines3.td3.policies import TD3Policy

from stable_baselines3.sac import SAC
from stable_baselines3.sac.policies import SACPolicy

from wandb.integration.sb3 import WandbCallback

import wandb


# Remove MetaDrive's logging information when episode ends.
set_log_level(logging.ERROR)

In [6]:

def get_time_str():
    return datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


def remove_reset_seed_and_add_monitor(make_env, trial_dir):
    """
    MetaDrive env's reset function takes a seed argument and use it to determine the map to load.
    However, in stable-baselines3, it calls reset function with a seed argument serving as the random seed,
    which is not what we want. We do a trick here to remap the random seed to map index.

    Stable-baselines3 recommends using Monitor wrapper to log training data. We add a Monitor wrapper here.
    """
    from gymnasium import Wrapper
    from stable_baselines3.common.monitor import Monitor
    class NewClass(Wrapper):
        def reset(self, seed=None, **kwargs):
            # PZH: We do a trick here to remap the seed to the map index. This can help randomize the maps.
            if seed is not None:
                new_seed = self.env.start_index + (seed % self.env.num_scenarios)
            else:
                new_seed = None
            return self.env.reset(seed=new_seed, **kwargs)

    def new_make_env():
        env = make_env()
        NewClass.__name__ = env.__class__.__name__ + "WithoutResetSeed"
        wrapped_env = NewClass(env)
        wrapped_env = Monitor(env=wrapped_env, filename=str(trial_dir))
        return wrapped_env

    return new_make_env


class CustomizedEvalCallback(EvalCallback):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.evaluations_info_buffer = defaultdict(list)
        self.training_info_buffer = defaultdict(list)
        self.train_timesteps = list()
        self.train_results = list()
        self.train_length = list()

    def _log_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.evaluations_info_buffer[k].append(info[k])
    
    def _log_train_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.training_info_buffer[k].append(info[k])

        if "raw_action" in info:
            self.evaluations_info_buffer["raw_action"].append(info["raw_action"])

    def _on_step(self) -> bool:
        """
        PZH Note: Overall this function is copied from original EvalCallback._on_step.
        We additionally record evaluations_info_buffer to the logger.
        """

        from stable_baselines3.common.evaluation import evaluate_policy
        from stable_baselines3.common.vec_env import sync_envs_normalization

        continue_training = True

        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Sync training and eval env if there is VecNormalize
            if self.model.get_vec_normalize_env() is not None:
                try:
                    sync_envs_normalization(self.training_env, self.eval_env)
                except AttributeError as e:
                    raise AssertionError(
                        "Training and eval env are not wrapped the same way, "
                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
                        "and warning above."
                    ) from e

            # Reset success rate buffer
            self._is_success_buffer = []

            episode_rewards, episode_lengths = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_success_callback,
            )

            train_episode_rewards, train_episode_lengths = evaluate_policy(
                self.model,
                self.training_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_train_success_callback,
            )

            if self.log_path is not None:
                assert isinstance(episode_rewards, list)
                assert isinstance(episode_lengths, list)
                assert isinstance(train_episode_rewards, list)
                assert isinstance(train_episode_lengths, list)
                self.evaluations_timesteps.append(self.num_timesteps)
                self.evaluations_results.append(episode_rewards)
                self.evaluations_length.append(episode_lengths)

                kwargs = {}
                # Save success log if present
                if len(self._is_success_buffer) > 0:
                    self.evaluations_successes.append(self._is_success_buffer)
                    kwargs = dict(successes=self.evaluations_successes)

                # PZH: Save evaluations_info_buffer to the log file
                for k, v in self.evaluations_info_buffer.items():
                    kwargs[k] = v

                np.savez(
                    self.log_path,
                    timesteps=self.evaluations_timesteps,
                    results=self.evaluations_results,
                    ep_lengths=self.evaluations_length,
                    **kwargs,  # type: ignore[arg-type]
                )

            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = float(mean_reward)

            if self.verbose >= 1:
                print(
                    f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
            self.logger.record("eval/mean_reward", float(mean_reward))
            self.logger.record("eval/mean_ep_length", mean_ep_length)

            # PZH: Add this metric.
            self.logger.record("eval/num_episodes", len(episode_rewards))

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
                if self.verbose >= 1:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

            # PZH: We record evaluations_info_buffer to the logger
            for k, v in self.evaluations_info_buffer.items():
                self.logger.record("eval/{}".format(k), np.mean(np.asarray(v)))

            for k, v in self.training_info_buffer.items():
                self.logger.record("train/{}".format(k), np.mean(np.asarray(v)))

            # Dump log so the evaluation results are printed with the correct timestep
            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
                if self.verbose >= 1:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
                self.best_mean_reward = float(mean_reward)
                # Trigger callback on new best model, if needed
                if self.callback_on_new_best is not None:
                    continue_training = self.callback_on_new_best.on_step()

            # Trigger callback after every evaluation, if needed
            if self.callback is not None:
                continue_training = continue_training and self._on_event()

        return continue_training


## Setup PPO trainer


In [7]:

# ===== Set up some arguments =====
exp_name = "ppo_metadrive" + "_" + str(SCENARIOS) + "_scenarios"
use_wandb = True

experiment_batch_name = "{}".format(exp_name)
trial_name = "{}_{}_{}".format(experiment_batch_name, get_time_str(), uuid.uuid4().hex[:8])
experiment_dir = Path("runs") / experiment_batch_name
trial_dir = experiment_dir / trial_name
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(trial_dir, exist_ok=True)
print(f"We start logging training data into {trial_dir}")


We start logging training data into runs\ppo_metadrive_500_scenarios\ppo_metadrive_500_scenarios_2025-03-19_12-57-43_55d523b4


In [8]:
# ===== Setup environment =====
num_train_envs = 10
num_eval_envs = 5
train_env = make_vec_env(remove_reset_seed_and_add_monitor(get_training_env, trial_dir), n_envs=num_train_envs,
                            vec_env_cls=SubprocVecEnv)
eval_env = make_vec_env(remove_reset_seed_and_add_monitor(get_validation_env, trial_dir), n_envs=num_eval_envs,
                        vec_env_cls=SubprocVecEnv)

In [9]:
# ===== Setup evaluation, checkpointing, and wandb =====
save_freq = 10_000  # Number of steps per model checkpoint
eval_freq = 10_000  # Number of steps per evaluation

wandb_save_freq = 10_000  # Number of steps per evaluation

num_eval_episodes = 5

checkpoint_callback = CheckpointCallback(
    name_prefix="rl_model",
    verbose=2,
    save_freq=save_freq,
    save_path=str(trial_dir / "models")
)
eval_callback = CustomizedEvalCallback(
    eval_env,
    best_model_save_path=str(trial_dir / "eval"),
    log_path=str(trial_dir / "eval"),
    eval_freq=max(eval_freq // num_train_envs, 1),
    n_eval_episodes=num_eval_episodes,
)
callbacks = [checkpoint_callback, eval_callback]
if use_wandb:
    wandb.init(
        project="cs260r",
        id=trial_name,
        name=experiment_batch_name,
        sync_tensorboard=True,
        dir=str(trial_dir),
    )
    callbacks.append(WandbCallback(model_save_path=str(trial_dir / "wandb_models"), model_save_freq=wandb_save_freq))
callbacks = CallbackList(callbacks)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: coltonrowe (coltonrowe-ucla) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [10]:

# ===== Setup the training algorithm =====
# model = SAC(
#     env=train_env,
#     policy=SACPolicy,
#     verbose=2,
#     batch_size = 256,
#     buffer_size = 1000000,
#     learning_rate = 5e-5,
#     gamma=0.98,
#     tau = 0.002,
#     device = "cuda",
#     tensorboard_log=str(trial_dir))
# model = TD3(
#     env=train_env,
#     policy=TD3Policy,
#     learning_rate=1e-3,
#     buffer_size=1_000_000,
#     learning_starts=100,
#     batch_size=256,
#     tau=0.005,
#     gamma = 0.99,
#     train_freq=1,
#     gradient_steps=1,
#     action_noise=None,
#     replay_buffer_class=None,
#     replay_buffer_kwargs=None,
#     optimize_memory_usage=False,
#     policy_delay=2,
#     target_policy_noise=0.2,
#     target_noise_clip=0.5,
#     stats_window_size=100,
#     tensorboard_log=None,
#     policy_kwargs=None,
#     verbose=2,
#     seed=None,
#     device='auto',
#     _init_setup_model=True
#     )
model = PPO(
    env=train_env,
    policy=ActorCriticPolicy,
    n_steps=256,  # n_steps * n_envs = total_batch_size
    n_epochs=20,
    learning_rate=5e-5,
    batch_size=256,
    clip_range=0.1,
    vf_coef=0.5,
    ent_coef=0.0,
    max_grad_norm=10.0,
    tensorboard_log=str(trial_dir),
    verbose=2,
    device="auto",
)


Using cpu device


In [11]:
ckpt = None
if ckpt:
    ckpt = Path(ckpt)
    print(f"Loading checkpoint from {ckpt}!")
    from stable_baselines3.common.save_util import load_from_zip_file
    data, params, pytorch_variables = load_from_zip_file(ckpt, device=model.device, print_system_info=False)
    model.set_parameters(params, exact_match=True, device=model.device)


In [None]:
# ===== Launch training =====
total_timesteps = 2_000_000  # 1M steps
model.learn(
    total_timesteps=total_timesteps,
    callback=callbacks,
    reset_num_timesteps=True,
    tb_log_name=experiment_batch_name,
    log_interval=1,
    progress_bar=True,
)

Logging to runs\ppo_metadrive_500_scenarios\ppo_metadrive_500_scenarios_2025-03-19_12-57-43_55d523b4\ppo_metadrive_500_scenarios_1


-----------------------------
| time/              |      |
|    fps             | 1558 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2560 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 447          |
|    ep_rew_mean          | -0.279       |
| time/                   |              |
|    fps                  | 1396         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 5120         |
| train/                  |              |
|    approx_kl            | 0.0035182114 |
|    clip_fraction        | 0.211        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.84        |
|    explained_variance   | -0.102       |
|    learning_rate        | 5e-05        |
|    loss                 | -0.00904     |
|    n_updates            | 20           |
|    policy_grad

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 125          |
|    mean_reward          | 53.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.12552252   |
|    route_completion     | 0.157        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0041001295 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.254        |
|    clip_range           | 0.1          |
|    crash                | 0            |
|    entropy_loss         | -2.83        |
|    explained_variance   | -0.0529      |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 231      |
|    ep_rew_mean     | 33.9     |
| time/              |          |
|    fps             | 786      |
|    iterations      | 4        |
|    time_elapsed    | 13       |
|    total_timesteps | 10240    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 197          |
|    ep_rew_mean          | 24.7         |
| time/                   |              |
|    fps                  | 817          |
|    iterations           | 5            |
|    time_elapsed         | 15           |
|    total_timesteps      | 12800        |
| train/                  |              |
|    approx_kl            | 0.0023020091 |
|    clip_fraction        | 0.117        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.82        |
|    explained_variance   | 0.0117       |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0           |
|    crash                | 0           |
|    max_step             | 0           |
|    mean_ep_length       | 82.6        |
|    mean_reward          | 52.9        |
|    num_episodes         | 5           |
|    out_of_road          | 1           |
|    raw_action           | 0.17694575  |
|    route_completion     | 0.167       |
|    success_rate         | 0           |
|    total_cost           | 1           |
| time/                   |             |
|    total_timesteps      | 20000       |
| train/                  |             |
|    approx_kl            | 0.002908679 |
|    arrive_dest          | 0           |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.1         |
|    crash                | 0.1         |
|    entropy_loss         | -2.79       |
|    explained_variance   | 0.0424      |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0667       |
|    max_step             | 0            |
|    mean_ep_length       | 109          |
|    mean_reward          | 110          |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.25039086   |
|    route_completion     | 0.218        |
|    success_rate         | 0            |
|    total_cost           | 1.13         |
| time/                   |              |
|    total_timesteps      | 30000        |
| train/                  |              |
|    approx_kl            | 0.0019709968 |
|    arrive_dest          | 0            |
|    clip_fraction        | 0.095        |
|    clip_range           | 0.1          |
|    crash                | 0.0667       |
|    entropy_loss         | -2.76        |
|    explained_variance   | 0.0205       |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 363      |
|    ep_rew_mean     | 24       |
| time/              |          |
|    fps             | 636      |
|    iterations      | 12       |
|    time_elapsed    | 48       |
|    total_timesteps | 30720    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 333          |
|    ep_rew_mean          | 22           |
| time/                   |              |
|    fps                  | 646          |
|    iterations           | 13           |
|    time_elapsed         | 51           |
|    total_timesteps      | 33280        |
| train/                  |              |
|    approx_kl            | 0.0016228377 |
|    clip_fraction        | 0.0639       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.76        |
|    explained_variance   | -0.00524     |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.05         |
|    crash                | 0.1          |
|    max_step             | 0            |
|    mean_ep_length       | 107          |
|    mean_reward          | 126          |
|    num_episodes         | 5            |
|    out_of_road          | 0.95         |
|    raw_action           | 0.33198738   |
|    route_completion     | 0.269        |
|    success_rate         | 0.2          |
|    total_cost           | 1.95         |
| time/                   |              |
|    total_timesteps      | 40000        |
| train/                  |              |
|    approx_kl            | 0.0013444412 |
|    arrive_dest          | 0.05         |
|    clip_fraction        | 0.0438       |
|    clip_range           | 0.1          |
|    crash                | 0.05         |
|    entropy_loss         | -2.73        |
|    explained_variance   | -0.0123      |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 283      |
|    ep_rew_mean     | 25.2     |
| time/              |          |
|    fps             | 524      |
|    iterations      | 16       |
|    time_elapsed    | 78       |
|    total_timesteps | 40960    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 276          |
|    ep_rew_mean          | 27.9         |
| time/                   |              |
|    fps                  | 531          |
|    iterations           | 17           |
|    time_elapsed         | 81           |
|    total_timesteps      | 43520        |
| train/                  |              |
|    approx_kl            | 0.0013075708 |
|    clip_fraction        | 0.0516       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.73        |
|    explained_variance   | 0.00318      |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.04         |
|    crash                | 0.08         |
|    max_step             | 0            |
|    mean_ep_length       | 43.4         |
|    mean_reward          | 21.8         |
|    num_episodes         | 5            |
|    out_of_road          | 0.96         |
|    raw_action           | 0.3478486    |
|    route_completion     | 0.234        |
|    success_rate         | 0            |
|    total_cost           | 1.76         |
| time/                   |              |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0017752241 |
|    arrive_dest          | 0.04         |
|    clip_fraction        | 0.0604       |
|    clip_range           | 0.1          |
|    crash                | 0.04         |
|    entropy_loss         | -2.7         |
|    explained_variance   | 0.00704      |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0333      |
|    crash                | 0.1         |
|    max_step             | 0           |
|    mean_ep_length       | 40.2        |
|    mean_reward          | 18.5        |
|    num_episodes         | 5           |
|    out_of_road          | 0.967       |
|    raw_action           | 0.35820052  |
|    route_completion     | 0.211       |
|    success_rate         | 0           |
|    total_cost           | 1.63        |
| time/                   |             |
|    total_timesteps      | 60000       |
| train/                  |             |
|    approx_kl            | 0.001132246 |
|    arrive_dest          | 0.0333      |
|    clip_fraction        | 0.0307      |
|    clip_range           | 0.1         |
|    crash                | 0.0667      |
|    entropy_loss         | -2.68       |
|    explained_variance   | 0.00164     |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0286      |
|    crash                | 0.0857      |
|    max_step             | 0           |
|    mean_ep_length       | 49.4        |
|    mean_reward          | 30.6        |
|    num_episodes         | 5           |
|    out_of_road          | 0.971       |
|    raw_action           | 0.3683983   |
|    route_completion     | 0.197       |
|    success_rate         | 0           |
|    total_cost           | 1.54        |
| time/                   |             |
|    total_timesteps      | 70000       |
| train/                  |             |
|    approx_kl            | 0.001199639 |
|    arrive_dest          | 0.0286      |
|    clip_fraction        | 0.0396      |
|    clip_range           | 0.1         |
|    crash                | 0.0857      |
|    entropy_loss         | -2.65       |
|    explained_variance   | -0.00706    |
|    learning_rate        | 5e-05 

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.025         |
|    crash                | 0.1           |
|    max_step             | 0             |
|    mean_ep_length       | 104           |
|    mean_reward          | 133           |
|    num_episodes         | 5             |
|    out_of_road          | 0.975         |
|    raw_action           | 0.3898862     |
|    route_completion     | 0.23          |
|    success_rate         | 0             |
|    total_cost           | 1.55          |
| time/                   |               |
|    total_timesteps      | 80000         |
| train/                  |               |
|    approx_kl            | 0.00028010106 |
|    arrive_dest          | 0.025         |
|    clip_fraction        | 0.00227       |
|    clip_range           | 0.1           |
|    crash                | 0.075         |
|    entropy_loss         | -2.62         |
|    explained_variance   | 0.02

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 112      |
|    ep_rew_mean     | 17.9     |
| time/              |          |
|    fps             | 428      |
|    iterations      | 32       |
|    time_elapsed    | 191      |
|    total_timesteps | 81920    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 104          |
|    ep_rew_mean          | 16.5         |
| time/                   |              |
|    fps                  | 423          |
|    iterations           | 33           |
|    time_elapsed         | 199          |
|    total_timesteps      | 84480        |
| train/                  |              |
|    approx_kl            | 0.0016858369 |
|    clip_fraction        | 0.0779       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.61        |
|    explained_variance   | 0.0302       |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0222       |
|    crash                | 0.111        |
|    max_step             | 0            |
|    mean_ep_length       | 68.8         |
|    mean_reward          | 40.9         |
|    num_episodes         | 5            |
|    out_of_road          | 0.978        |
|    raw_action           | 0.39643162   |
|    route_completion     | 0.223        |
|    success_rate         | 0            |
|    total_cost           | 1.58         |
| time/                   |              |
|    total_timesteps      | 90000        |
| train/                  |              |
|    approx_kl            | 0.0011343759 |
|    arrive_dest          | 0.0222       |
|    clip_fraction        | 0.037        |
|    clip_range           | 0.1          |
|    crash                | 0.0889       |
|    entropy_loss         | -2.61        |
|    explained_variance   | 0.0311       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.02         |
|    crash                | 0.1          |
|    max_step             | 0            |
|    mean_ep_length       | 39.2         |
|    mean_reward          | 18.4         |
|    num_episodes         | 5            |
|    out_of_road          | 0.98         |
|    raw_action           | 0.40219608   |
|    route_completion     | 0.207        |
|    success_rate         | 0            |
|    total_cost           | 1.52         |
| time/                   |              |
|    total_timesteps      | 100000       |
| train/                  |              |
|    approx_kl            | 0.0008103464 |
|    arrive_dest          | 0.02         |
|    clip_fraction        | 0.0091       |
|    clip_range           | 0.1          |
|    crash                | 0.08         |
|    entropy_loss         | -2.6         |
|    explained_variance   | 0.0511       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0182       |
|    crash                | 0.127        |
|    max_step             | 0            |
|    mean_ep_length       | 66.6         |
|    mean_reward          | 55.2         |
|    num_episodes         | 5            |
|    out_of_road          | 0.982        |
|    raw_action           | 0.40936884   |
|    route_completion     | 0.206        |
|    success_rate         | 0            |
|    total_cost           | 1.47         |
| time/                   |              |
|    total_timesteps      | 110000       |
| train/                  |              |
|    approx_kl            | 0.0009922581 |
|    arrive_dest          | 0.0182       |
|    clip_fraction        | 0.021        |
|    clip_range           | 0.1          |
|    crash                | 0.0727       |
|    entropy_loss         | -2.59        |
|    explained_variance   | 0.149        |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0167        |
|    crash                | 0.15          |
|    max_step             | 0             |
|    mean_ep_length       | 84.4          |
|    mean_reward          | 81.7          |
|    num_episodes         | 5             |
|    out_of_road          | 0.983         |
|    raw_action           | 0.41829288    |
|    route_completion     | 0.212         |
|    success_rate         | 0             |
|    total_cost           | 1.6           |
| time/                   |               |
|    total_timesteps      | 120000        |
| train/                  |               |
|    approx_kl            | 0.00095725973 |
|    arrive_dest          | 0.0167        |
|    clip_fraction        | 0.0341        |
|    clip_range           | 0.1           |
|    crash                | 0.0833        |
|    entropy_loss         | -2.57         |
|    explained_variance   | 0.19

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0154        |
|    crash                | 0.138         |
|    max_step             | 0             |
|    mean_ep_length       | 52            |
|    mean_reward          | 34.5          |
|    num_episodes         | 5             |
|    out_of_road          | 0.985         |
|    raw_action           | 0.4217875     |
|    route_completion     | 0.205         |
|    success_rate         | 0             |
|    total_cost           | 1.55          |
| time/                   |               |
|    total_timesteps      | 130000        |
| train/                  |               |
|    approx_kl            | 0.00092398294 |
|    arrive_dest          | 0.0154        |
|    clip_fraction        | 0.0331        |
|    clip_range           | 0.1           |
|    crash                | 0.0769        |
|    entropy_loss         | -2.56         |
|    explained_variance   | 0.33

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0143       |
|    crash                | 0.143        |
|    max_step             | 0            |
|    mean_ep_length       | 98.2         |
|    mean_reward          | 114          |
|    num_episodes         | 5            |
|    out_of_road          | 0.986        |
|    raw_action           | 0.4317081    |
|    route_completion     | 0.217        |
|    success_rate         | 0.1          |
|    total_cost           | 1.77         |
| time/                   |              |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 0.0011000761 |
|    arrive_dest          | 0.0286       |
|    clip_fraction        | 0.0312       |
|    clip_range           | 0.1          |
|    crash                | 0.114        |
|    entropy_loss         | -2.55        |
|    explained_variance   | 0.401        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0133       |
|    crash                | 0.187        |
|    max_step             | 0            |
|    mean_ep_length       | 70.4         |
|    mean_reward          | 61.2         |
|    num_episodes         | 5            |
|    out_of_road          | 0.987        |
|    raw_action           | 0.43645912   |
|    route_completion     | 0.216        |
|    success_rate         | 0            |
|    total_cost           | 1.72         |
| time/                   |              |
|    total_timesteps      | 150000       |
| train/                  |              |
|    approx_kl            | 0.0018781092 |
|    arrive_dest          | 0.0267       |
|    clip_fraction        | 0.064        |
|    clip_range           | 0.1          |
|    crash                | 0.12         |
|    entropy_loss         | -2.54        |
|    explained_variance   | 0.396        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0125       |
|    crash                | 0.188        |
|    max_step             | 0            |
|    mean_ep_length       | 73.4         |
|    mean_reward          | 72.9         |
|    num_episodes         | 5            |
|    out_of_road          | 0.988        |
|    raw_action           | 0.44297045   |
|    route_completion     | 0.217        |
|    success_rate         | 0.1          |
|    total_cost           | 1.68         |
| time/                   |              |
|    total_timesteps      | 160000       |
| train/                  |              |
|    approx_kl            | 0.0009861523 |
|    arrive_dest          | 0.0375       |
|    clip_fraction        | 0.0769       |
|    clip_range           | 0.1          |
|    crash                | 0.15         |
|    entropy_loss         | -2.53        |
|    explained_variance   | 0.486        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0353       |
|    crash                | 0.212        |
|    max_step             | 0            |
|    mean_ep_length       | 259          |
|    mean_reward          | 99           |
|    num_episodes         | 5            |
|    out_of_road          | 0.965        |
|    raw_action           | 0.45254835   |
|    route_completion     | 0.24         |
|    success_rate         | 0.4          |
|    total_cost           | 6.71         |
| time/                   |              |
|    total_timesteps      | 170000       |
| train/                  |              |
|    approx_kl            | 0.0018136324 |
|    arrive_dest          | 0.0588       |
|    clip_fraction        | 0.0891       |
|    clip_range           | 0.1          |
|    crash                | 0.165        |
|    entropy_loss         | -2.51        |
|    explained_variance   | 0.448        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0333       |
|    crash                | 0.222        |
|    max_step             | 0            |
|    mean_ep_length       | 71           |
|    mean_reward          | 62.3         |
|    num_episodes         | 5            |
|    out_of_road          | 0.967        |
|    raw_action           | 0.45543137   |
|    route_completion     | 0.238        |
|    success_rate         | 0            |
|    total_cost           | 6.39         |
| time/                   |              |
|    total_timesteps      | 180000       |
| train/                  |              |
|    approx_kl            | 0.0024371226 |
|    arrive_dest          | 0.0556       |
|    clip_fraction        | 0.0419       |
|    clip_range           | 0.1          |
|    crash                | 0.167        |
|    entropy_loss         | -2.48        |
|    explained_variance   | 0.343        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0421       |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 148          |
|    mean_reward          | 89.4         |
|    num_episodes         | 5            |
|    out_of_road          | 0.958        |
|    raw_action           | 0.45887783   |
|    route_completion     | 0.248        |
|    success_rate         | 0.1          |
|    total_cost           | 7.94         |
| time/                   |              |
|    total_timesteps      | 190000       |
| train/                  |              |
|    approx_kl            | 0.0008832732 |
|    arrive_dest          | 0.0526       |
|    clip_fraction        | 0.059        |
|    clip_range           | 0.1          |
|    crash                | 0.158        |
|    entropy_loss         | -2.46        |
|    explained_variance   | -0.0396      |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.05          |
|    crash                | 0.24          |
|    max_step             | 0             |
|    mean_ep_length       | 121           |
|    mean_reward          | 142           |
|    num_episodes         | 5             |
|    out_of_road          | 0.95          |
|    raw_action           | 0.46239087    |
|    route_completion     | 0.257         |
|    success_rate         | 0.2           |
|    total_cost           | 7.87          |
| time/                   |               |
|    total_timesteps      | 200000        |
| train/                  |               |
|    approx_kl            | 0.00094684225 |
|    arrive_dest          | 0.06          |
|    clip_fraction        | 0.0418        |
|    clip_range           | 0.1           |
|    crash                | 0.16          |
|    entropy_loss         | -2.45         |
|    explained_variance   | 0.23

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 191      |
|    ep_rew_mean     | 90.2     |
| time/              |          |
|    fps             | 346      |
|    iterations      | 79       |
|    time_elapsed    | 584      |
|    total_timesteps | 202240   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 192           |
|    ep_rew_mean          | 93.2          |
| time/                   |               |
|    fps                  | 348           |
|    iterations           | 80            |
|    time_elapsed         | 588           |
|    total_timesteps      | 204800        |
| train/                  |               |
|    approx_kl            | 0.00036688306 |
|    clip_fraction        | 0.0458        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.44         |
|    explained_variance   | 0.303         |


-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0571      |
|    crash                | 0.257       |
|    max_step             | 0           |
|    mean_ep_length       | 140         |
|    mean_reward          | 171         |
|    num_episodes         | 5           |
|    out_of_road          | 0.943       |
|    raw_action           | 0.4658269   |
|    route_completion     | 0.272       |
|    success_rate         | 0.1         |
|    total_cost           | 8.13        |
| time/                   |             |
|    total_timesteps      | 210000      |
| train/                  |             |
|    approx_kl            | 0.001188679 |
|    arrive_dest          | 0.0571      |
|    clip_fraction        | 0.0616      |
|    clip_range           | 0.1         |
|    crash                | 0.171       |
|    entropy_loss         | -2.43       |
|    explained_variance   | 0.376       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 207      |
|    ep_rew_mean     | 102      |
| time/              |          |
|    fps             | 350      |
|    iterations      | 83       |
|    time_elapsed    | 606      |
|    total_timesteps | 212480   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 213           |
|    ep_rew_mean          | 103           |
| time/                   |               |
|    fps                  | 351           |
|    iterations           | 84            |
|    time_elapsed         | 611           |
|    total_timesteps      | 215040        |
| train/                  |               |
|    approx_kl            | 0.00046560625 |
|    clip_fraction        | 0.0268        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.43         |
|    explained_variance   | 0.335         |


------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0636       |
|    crash                | 0.255        |
|    max_step             | 0            |
|    mean_ep_length       | 119          |
|    mean_reward          | 125          |
|    num_episodes         | 5            |
|    out_of_road          | 0.936        |
|    raw_action           | 0.46906608   |
|    route_completion     | 0.278        |
|    success_rate         | 0.3          |
|    total_cost           | 8.23         |
| time/                   |              |
|    total_timesteps      | 220000       |
| train/                  |              |
|    approx_kl            | 0.0007339701 |
|    arrive_dest          | 0.0727       |
|    clip_fraction        | 0.037        |
|    clip_range           | 0.1          |
|    crash                | 0.182        |
|    entropy_loss         | -2.42        |
|    explained_variance   | 0.365        |
|    learni

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0696        |
|    crash                | 0.252         |
|    max_step             | 0             |
|    mean_ep_length       | 137           |
|    mean_reward          | 109           |
|    num_episodes         | 5             |
|    out_of_road          | 0.93          |
|    raw_action           | 0.47013813    |
|    route_completion     | 0.286         |
|    success_rate         | 0.2           |
|    total_cost           | 9             |
| time/                   |               |
|    total_timesteps      | 230000        |
| train/                  |               |
|    approx_kl            | 0.00095076876 |
|    arrive_dest          | 0.0783        |
|    clip_fraction        | 0.105         |
|    clip_range           | 0.1           |
|    crash                | 0.191         |
|    entropy_loss         | -2.4          |
|    explained_variance   | 0.16

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0667       |
|    crash                | 0.25         |
|    max_step             | 0            |
|    mean_ep_length       | 107          |
|    mean_reward          | 146          |
|    num_episodes         | 5            |
|    out_of_road          | 0.933        |
|    raw_action           | 0.47191602   |
|    route_completion     | 0.295        |
|    success_rate         | 0.1          |
|    total_cost           | 8.68         |
| time/                   |              |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0020103762 |
|    arrive_dest          | 0.0833       |
|    clip_fraction        | 0.0788       |
|    clip_range           | 0.1          |
|    crash                | 0.2          |
|    entropy_loss         | -2.39        |
|    explained_variance   | 0.345        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.064        |
|    crash                | 0.248        |
|    max_step             | 0            |
|    mean_ep_length       | 133          |
|    mean_reward          | 155          |
|    num_episodes         | 5            |
|    out_of_road          | 0.936        |
|    raw_action           | 0.47092095   |
|    route_completion     | 0.299        |
|    success_rate         | 0.1          |
|    total_cost           | 8.58         |
| time/                   |              |
|    total_timesteps      | 250000       |
| train/                  |              |
|    approx_kl            | 0.0023479273 |
|    arrive_dest          | 0.088        |
|    clip_fraction        | 0.0583       |
|    clip_range           | 0.1          |
|    crash                | 0.208        |
|    entropy_loss         | -2.38        |
|    explained_variance   | 0.339        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0846       |
|    crash                | 0.254        |
|    max_step             | 0            |
|    mean_ep_length       | 209          |
|    mean_reward          | 196          |
|    num_episodes         | 5            |
|    out_of_road          | 0.915        |
|    raw_action           | 0.47172058   |
|    route_completion     | 0.318        |
|    success_rate         | 0.5          |
|    total_cost           | 9.92         |
| time/                   |              |
|    total_timesteps      | 260000       |
| train/                  |              |
|    approx_kl            | 0.0019094227 |
|    arrive_dest          | 0.1          |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.1          |
|    crash                | 0.208        |
|    entropy_loss         | -2.37        |
|    explained_variance   | 0.444        |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 271      |
|    ep_rew_mean     | 153      |
| time/              |          |
|    fps             | 358      |
|    iterations      | 102      |
|    time_elapsed    | 729      |
|    total_timesteps | 261120   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 262          |
|    ep_rew_mean          | 147          |
| time/                   |              |
|    fps                  | 359          |
|    iterations           | 103          |
|    time_elapsed         | 733          |
|    total_timesteps      | 263680       |
| train/                  |              |
|    approx_kl            | 0.0009943468 |
|    clip_fraction        | 0.0594       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.36        |
|    explained_variance   | 0.221        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0815      |
|    crash                | 0.259       |
|    max_step             | 0           |
|    mean_ep_length       | 84.2        |
|    mean_reward          | 82.1        |
|    num_episodes         | 5           |
|    out_of_road          | 0.919       |
|    raw_action           | 0.47300285  |
|    route_completion     | 0.316       |
|    success_rate         | 0.1         |
|    total_cost           | 9.66        |
| time/                   |             |
|    total_timesteps      | 270000      |
| train/                  |             |
|    approx_kl            | 0.001697525 |
|    arrive_dest          | 0.104       |
|    clip_fraction        | 0.0445      |
|    clip_range           | 0.1         |
|    crash                | 0.207       |
|    entropy_loss         | -2.35       |
|    explained_variance   | 0.801       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0929       |
|    crash                | 0.25         |
|    max_step             | 0            |
|    mean_ep_length       | 200          |
|    mean_reward          | 204          |
|    num_episodes         | 5            |
|    out_of_road          | 0.907        |
|    raw_action           | 0.47348824   |
|    route_completion     | 0.328        |
|    success_rate         | 0.3          |
|    total_cost           | 10.4         |
| time/                   |              |
|    total_timesteps      | 280000       |
| train/                  |              |
|    approx_kl            | 0.0062280158 |
|    arrive_dest          | 0.107        |
|    clip_fraction        | 0.105        |
|    clip_range           | 0.1          |
|    crash                | 0.207        |
|    entropy_loss         | -2.34        |
|    explained_variance   | 0.0535       |
|    learni

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 294      |
|    ep_rew_mean     | 178      |
| time/              |          |
|    fps             | 362      |
|    iterations      | 110      |
|    time_elapsed    | 777      |
|    total_timesteps | 281600   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 288           |
|    ep_rew_mean          | 178           |
| time/                   |               |
|    fps                  | 363           |
|    iterations           | 111           |
|    time_elapsed         | 781           |
|    total_timesteps      | 284160        |
| train/                  |               |
|    approx_kl            | 0.00052417995 |
|    clip_fraction        | 0.0515        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.34         |
|    explained_variance   | -0.0038       |


------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0966       |
|    crash                | 0.248        |
|    max_step             | 0            |
|    mean_ep_length       | 153          |
|    mean_reward          | 168          |
|    num_episodes         | 5            |
|    out_of_road          | 0.903        |
|    raw_action           | 0.47359383   |
|    route_completion     | 0.336        |
|    success_rate         | 0.1          |
|    total_cost           | 10.7         |
| time/                   |              |
|    total_timesteps      | 290000       |
| train/                  |              |
|    approx_kl            | 0.0016925186 |
|    arrive_dest          | 0.103        |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.1          |
|    crash                | 0.228        |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.204        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.1          |
|    crash                | 0.247        |
|    max_step             | 0            |
|    mean_ep_length       | 174          |
|    mean_reward          | 108          |
|    num_episodes         | 5            |
|    out_of_road          | 0.9          |
|    raw_action           | 0.4736491    |
|    route_completion     | 0.343        |
|    success_rate         | 0.1          |
|    total_cost           | 11.7         |
| time/                   |              |
|    total_timesteps      | 300000       |
| train/                  |              |
|    approx_kl            | 0.0011548521 |
|    arrive_dest          | 0.1          |
|    clip_fraction        | 0.036        |
|    clip_range           | 0.1          |
|    crash                | 0.233        |
|    entropy_loss         | -2.31        |
|    explained_variance   | 0.124        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.103       |
|    crash                | 0.245       |
|    max_step             | 0           |
|    mean_ep_length       | 131         |
|    mean_reward          | 181         |
|    num_episodes         | 5           |
|    out_of_road          | 0.897       |
|    raw_action           | 0.47522143  |
|    route_completion     | 0.348       |
|    success_rate         | 0.2         |
|    total_cost           | 11.3        |
| time/                   |             |
|    total_timesteps      | 310000      |
| train/                  |             |
|    approx_kl            | 0.004429861 |
|    arrive_dest          | 0.103       |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.1         |
|    crash                | 0.226       |
|    entropy_loss         | -2.3        |
|    explained_variance   | 0.274       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.1          |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 236          |
|    mean_reward          | 91.8         |
|    num_episodes         | 5            |
|    out_of_road          | 0.9          |
|    raw_action           | 0.4728783    |
|    route_completion     | 0.349        |
|    success_rate         | 0.1          |
|    total_cost           | 12.1         |
| time/                   |              |
|    total_timesteps      | 320000       |
| train/                  |              |
|    approx_kl            | 0.0019485619 |
|    arrive_dest          | 0.106        |
|    clip_fraction        | 0.0881       |
|    clip_range           | 0.1          |
|    crash                | 0.219        |
|    entropy_loss         | -2.29        |
|    explained_variance   | 0.293        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.103        |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 150          |
|    mean_reward          | 169          |
|    num_episodes         | 5            |
|    out_of_road          | 0.897        |
|    raw_action           | 0.47308958   |
|    route_completion     | 0.353        |
|    success_rate         | 0.2          |
|    total_cost           | 12           |
| time/                   |              |
|    total_timesteps      | 330000       |
| train/                  |              |
|    approx_kl            | 0.0012415589 |
|    arrive_dest          | 0.109        |
|    clip_fraction        | 0.0871       |
|    clip_range           | 0.1          |
|    crash                | 0.218        |
|    entropy_loss         | -2.28        |
|    explained_variance   | 0.261        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.1         |
|    crash                | 0.247       |
|    max_step             | 0           |
|    mean_ep_length       | 127         |
|    mean_reward          | 173         |
|    num_episodes         | 5           |
|    out_of_road          | 0.9         |
|    raw_action           | 0.47226906  |
|    route_completion     | 0.359       |
|    success_rate         | 0.1         |
|    total_cost           | 11.7        |
| time/                   |             |
|    total_timesteps      | 340000      |
| train/                  |             |
|    approx_kl            | 0.010957631 |
|    arrive_dest          | 0.112       |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.1         |
|    crash                | 0.218       |
|    entropy_loss         | -2.27       |
|    explained_variance   | -0.0868     |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0971       |
|    crash                | 0.246        |
|    max_step             | 0            |
|    mean_ep_length       | 110          |
|    mean_reward          | 135          |
|    num_episodes         | 5            |
|    out_of_road          | 0.903        |
|    raw_action           | 0.47326702   |
|    route_completion     | 0.36         |
|    success_rate         | 0            |
|    total_cost           | 11.5         |
| time/                   |              |
|    total_timesteps      | 350000       |
| train/                  |              |
|    approx_kl            | 0.0019657505 |
|    arrive_dest          | 0.109        |
|    clip_fraction        | 0.12         |
|    clip_range           | 0.1          |
|    crash                | 0.217        |
|    entropy_loss         | -2.27        |
|    explained_variance   | 0.626        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.1         |
|    crash                | 0.244       |
|    max_step             | 0           |
|    mean_ep_length       | 185         |
|    mean_reward          | 215         |
|    num_episodes         | 5           |
|    out_of_road          | 0.9         |
|    raw_action           | 0.47293934  |
|    route_completion     | 0.367       |
|    success_rate         | 0.1         |
|    total_cost           | 11.5        |
| time/                   |             |
|    total_timesteps      | 360000      |
| train/                  |             |
|    approx_kl            | 0.001096523 |
|    arrive_dest          | 0.106       |
|    clip_fraction        | 0.0321      |
|    clip_range           | 0.1         |
|    crash                | 0.211       |
|    entropy_loss         | -2.27       |
|    explained_variance   | 0.565       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 333      |
|    ep_rew_mean     | 248      |
| time/              |          |
|    fps             | 375      |
|    iterations      | 141      |
|    time_elapsed    | 961      |
|    total_timesteps | 360960   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 330          |
|    ep_rew_mean          | 245          |
| time/                   |              |
|    fps                  | 376          |
|    iterations           | 142          |
|    time_elapsed         | 964          |
|    total_timesteps      | 363520       |
| train/                  |              |
|    approx_kl            | 0.0023345228 |
|    clip_fraction        | 0.0523       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.27        |
|    explained_variance   | 0.294        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0973       |
|    crash                | 0.238        |
|    max_step             | 0            |
|    mean_ep_length       | 88.8         |
|    mean_reward          | 99.4         |
|    num_episodes         | 5            |
|    out_of_road          | 0.903        |
|    raw_action           | 0.4726658    |
|    route_completion     | 0.365        |
|    success_rate         | 0            |
|    total_cost           | 11.2         |
| time/                   |              |
|    total_timesteps      | 370000       |
| train/                  |              |
|    approx_kl            | 0.0003466638 |
|    arrive_dest          | 0.103        |
|    clip_fraction        | 0.0341       |
|    clip_range           | 0.1          |
|    crash                | 0.216        |
|    entropy_loss         | -2.26        |
|    explained_variance   | 0.21         |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0947       |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 101          |
|    mean_reward          | 125          |
|    num_episodes         | 5            |
|    out_of_road          | 0.905        |
|    raw_action           | 0.4736204    |
|    route_completion     | 0.364        |
|    success_rate         | 0.1          |
|    total_cost           | 10.9         |
| time/                   |              |
|    total_timesteps      | 380000       |
| train/                  |              |
|    approx_kl            | 0.0014691947 |
|    arrive_dest          | 0.105        |
|    clip_fraction        | 0.0832       |
|    clip_range           | 0.1          |
|    crash                | 0.211        |
|    entropy_loss         | -2.25        |
|    explained_variance   | 0.377        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0974      |
|    crash                | 0.241       |
|    max_step             | 0           |
|    mean_ep_length       | 222         |
|    mean_reward          | 188         |
|    num_episodes         | 5           |
|    out_of_road          | 0.903       |
|    raw_action           | 0.47323862  |
|    route_completion     | 0.373       |
|    success_rate         | 0.2         |
|    total_cost           | 11.7        |
| time/                   |             |
|    total_timesteps      | 390000      |
| train/                  |             |
|    approx_kl            | 0.003971712 |
|    arrive_dest          | 0.108       |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.1         |
|    crash                | 0.221       |
|    entropy_loss         | -2.24       |
|    explained_variance   | 0.0223      |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.105       |
|    crash                | 0.245       |
|    max_step             | 0           |
|    mean_ep_length       | 209         |
|    mean_reward          | 260         |
|    num_episodes         | 5           |
|    out_of_road          | 0.895       |
|    raw_action           | 0.47452208  |
|    route_completion     | 0.381       |
|    success_rate         | 0.3         |
|    total_cost           | 11.7        |
| time/                   |             |
|    total_timesteps      | 400000      |
| train/                  |             |
|    approx_kl            | 0.004443941 |
|    arrive_dest          | 0.11        |
|    clip_fraction        | 0.139       |
|    clip_range           | 0.1         |
|    crash                | 0.22        |
|    entropy_loss         | -2.23       |
|    explained_variance   | 0.481       |
|    learning_rate        | 5e-05 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 323      |
|    ep_rew_mean     | 247      |
| time/              |          |
|    fps             | 382      |
|    iterations      | 157      |
|    time_elapsed    | 1051     |
|    total_timesteps | 401920   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 328           |
|    ep_rew_mean          | 252           |
| time/                   |               |
|    fps                  | 383           |
|    iterations           | 158           |
|    time_elapsed         | 1054          |
|    total_timesteps      | 404480        |
| train/                  |               |
|    approx_kl            | 0.00073453493 |
|    clip_fraction        | 0.0458        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.23         |
|    explained_variance   | 0.356         |


-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.112       |
|    crash                | 0.244       |
|    max_step             | 0           |
|    mean_ep_length       | 170         |
|    mean_reward          | 193         |
|    num_episodes         | 5           |
|    out_of_road          | 0.888       |
|    raw_action           | 0.47749263  |
|    route_completion     | 0.385       |
|    success_rate         | 0.2         |
|    total_cost           | 11.8        |
| time/                   |             |
|    total_timesteps      | 410000      |
| train/                  |             |
|    approx_kl            | 0.001916737 |
|    arrive_dest          | 0.107       |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.1         |
|    crash                | 0.22        |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.129       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.119        |
|    crash                | 0.243        |
|    max_step             | 0            |
|    mean_ep_length       | 172          |
|    mean_reward          | 153          |
|    num_episodes         | 5            |
|    out_of_road          | 0.881        |
|    raw_action           | 0.47767624   |
|    route_completion     | 0.39         |
|    success_rate         | 0.5          |
|    total_cost           | 12           |
| time/                   |              |
|    total_timesteps      | 420000       |
| train/                  |              |
|    approx_kl            | 0.0059725363 |
|    arrive_dest          | 0.119        |
|    clip_fraction        | 0.0925       |
|    clip_range           | 0.1          |
|    crash                | 0.219        |
|    entropy_loss         | -2.21        |
|    explained_variance   | 0.781        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.121        |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 210          |
|    mean_reward          | 124          |
|    num_episodes         | 5            |
|    out_of_road          | 0.879        |
|    raw_action           | 0.47859755   |
|    route_completion     | 0.393        |
|    success_rate         | 0.3          |
|    total_cost           | 12.8         |
| time/                   |              |
|    total_timesteps      | 430000       |
| train/                  |              |
|    approx_kl            | 0.0052703223 |
|    arrive_dest          | 0.126        |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    crash                | 0.219        |
|    entropy_loss         | -2.2         |
|    explained_variance   | 0.0535       |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.123        |
|    crash                | 0.241        |
|    max_step             | 0            |
|    mean_ep_length       | 186          |
|    mean_reward          | 243          |
|    num_episodes         | 5            |
|    out_of_road          | 0.877        |
|    raw_action           | 0.4771823    |
|    route_completion     | 0.4          |
|    success_rate         | 0.1          |
|    total_cost           | 12.8         |
| time/                   |              |
|    total_timesteps      | 440000       |
| train/                  |              |
|    approx_kl            | 0.0016636144 |
|    arrive_dest          | 0.123        |
|    clip_fraction        | 0.0701       |
|    clip_range           | 0.1          |
|    crash                | 0.214        |
|    entropy_loss         | -2.19        |
|    explained_variance   | 0.662        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.12         |
|    crash                | 0.249        |
|    max_step             | 0            |
|    mean_ep_length       | 138          |
|    mean_reward          | 132          |
|    num_episodes         | 5            |
|    out_of_road          | 0.88         |
|    raw_action           | 0.47851443   |
|    route_completion     | 0.403        |
|    success_rate         | 0            |
|    total_cost           | 12.9         |
| time/                   |              |
|    total_timesteps      | 450000       |
| train/                  |              |
|    approx_kl            | 0.0012942277 |
|    arrive_dest          | 0.12         |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.1          |
|    crash                | 0.213        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.395        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.122        |
|    crash                | 0.252        |
|    max_step             | 0            |
|    mean_ep_length       | 173          |
|    mean_reward          | 110          |
|    num_episodes         | 5            |
|    out_of_road          | 0.878        |
|    raw_action           | 0.47865662   |
|    route_completion     | 0.405        |
|    success_rate         | 0.3          |
|    total_cost           | 13.4         |
| time/                   |              |
|    total_timesteps      | 460000       |
| train/                  |              |
|    approx_kl            | 0.0024895004 |
|    arrive_dest          | 0.126        |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.1          |
|    crash                | 0.213        |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.472        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.119        |
|    crash                | 0.26         |
|    max_step             | 0            |
|    mean_ep_length       | 124          |
|    mean_reward          | 152          |
|    num_episodes         | 5            |
|    out_of_road          | 0.881        |
|    raw_action           | 0.47959664   |
|    route_completion     | 0.407        |
|    success_rate         | 0            |
|    total_cost           | 13.2         |
| time/                   |              |
|    total_timesteps      | 470000       |
| train/                  |              |
|    approx_kl            | 0.0005176229 |
|    arrive_dest          | 0.123        |
|    clip_fraction        | 0.106        |
|    clip_range           | 0.1          |
|    crash                | 0.217        |
|    entropy_loss         | -2.16        |
|    explained_variance   | 0.46         |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.125        |
|    crash                | 0.263        |
|    max_step             | 0            |
|    mean_ep_length       | 168          |
|    mean_reward          | 182          |
|    num_episodes         | 5            |
|    out_of_road          | 0.875        |
|    raw_action           | 0.47989133   |
|    route_completion     | 0.412        |
|    success_rate         | 0.3          |
|    total_cost           | 13.5         |
| time/                   |              |
|    total_timesteps      | 480000       |
| train/                  |              |
|    approx_kl            | 0.0012637668 |
|    arrive_dest          | 0.125        |
|    clip_fraction        | 0.0937       |
|    clip_range           | 0.1          |
|    crash                | 0.212        |
|    entropy_loss         | -2.15        |
|    explained_variance   | 0.707        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.131        |
|    crash                | 0.261        |
|    max_step             | 0            |
|    mean_ep_length       | 150          |
|    mean_reward          | 191          |
|    num_episodes         | 5            |
|    out_of_road          | 0.869        |
|    raw_action           | 0.4806681    |
|    route_completion     | 0.416        |
|    success_rate         | 0.3          |
|    total_cost           | 13.4         |
| time/                   |              |
|    total_timesteps      | 490000       |
| train/                  |              |
|    approx_kl            | 0.0015425779 |
|    arrive_dest          | 0.127        |
|    clip_fraction        | 0.0837       |
|    clip_range           | 0.1          |
|    crash                | 0.212        |
|    entropy_loss         | -2.14        |
|    explained_variance   | 0.656        |
|    learni

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.132      |
|    crash                | 0.264      |
|    max_step             | 0          |
|    mean_ep_length       | 172        |
|    mean_reward          | 138        |
|    num_episodes         | 5          |
|    out_of_road          | 0.868      |
|    raw_action           | 0.48013866 |
|    route_completion     | 0.42       |
|    success_rate         | 0.3        |
|    total_cost           | 13.8       |
| time/                   |            |
|    total_timesteps      | 500000     |
| train/                  |            |
|    approx_kl            | 0.00626289 |
|    arrive_dest          | 0.132      |
|    clip_fraction        | 0.153      |
|    clip_range           | 0.1        |
|    crash                | 0.212      |
|    entropy_loss         | -2.13      |
|    explained_variance   | 0.431      |
|    learning_rate        | 5e-05      |
|    loss       

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.141        |
|    crash                | 0.267        |
|    max_step             | 0            |
|    mean_ep_length       | 195          |
|    mean_reward          | 252          |
|    num_episodes         | 5            |
|    out_of_road          | 0.859        |
|    raw_action           | 0.48042876   |
|    route_completion     | 0.426        |
|    success_rate         | 0.4          |
|    total_cost           | 13.8         |
| time/                   |              |
|    total_timesteps      | 510000       |
| train/                  |              |
|    approx_kl            | 0.0024222606 |
|    arrive_dest          | 0.133        |
|    clip_fraction        | 0.139        |
|    clip_range           | 0.1          |
|    crash                | 0.212        |
|    entropy_loss         | -2.12        |
|    explained_variance   | 0.613        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.146       |
|    crash                | 0.262       |
|    max_step             | 0           |
|    mean_ep_length       | 139         |
|    mean_reward          | 184         |
|    num_episodes         | 5           |
|    out_of_road          | 0.854       |
|    raw_action           | 0.4804883   |
|    route_completion     | 0.43        |
|    success_rate         | 0.2         |
|    total_cost           | 13.7        |
| time/                   |             |
|    total_timesteps      | 520000      |
| train/                  |             |
|    approx_kl            | 0.003985368 |
|    arrive_dest          | 0.131       |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.1         |
|    crash                | 0.208       |
|    entropy_loss         | -2.11       |
|    explained_variance   | 0.548       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.147        |
|    crash                | 0.257        |
|    max_step             | 0            |
|    mean_ep_length       | 115          |
|    mean_reward          | 130          |
|    num_episodes         | 5            |
|    out_of_road          | 0.853        |
|    raw_action           | 0.4806332    |
|    route_completion     | 0.43         |
|    success_rate         | 0.3          |
|    total_cost           | 13.5         |
| time/                   |              |
|    total_timesteps      | 530000       |
| train/                  |              |
|    approx_kl            | 0.0027722842 |
|    arrive_dest          | 0.136        |
|    clip_fraction        | 0.0731       |
|    clip_range           | 0.1          |
|    crash                | 0.208        |
|    entropy_loss         | -2.09        |
|    explained_variance   | 0.565        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.144        |
|    crash                | 0.256        |
|    max_step             | 0            |
|    mean_ep_length       | 134          |
|    mean_reward          | 163          |
|    num_episodes         | 5            |
|    out_of_road          | 0.856        |
|    raw_action           | 0.48189113   |
|    route_completion     | 0.43         |
|    success_rate         | 0.1          |
|    total_cost           | 13.3         |
| time/                   |              |
|    total_timesteps      | 540000       |
| train/                  |              |
|    approx_kl            | 0.0050596357 |
|    arrive_dest          | 0.137        |
|    clip_fraction        | 0.274        |
|    clip_range           | 0.1          |
|    crash                | 0.215        |
|    entropy_loss         | -2.09        |
|    explained_variance   | 0.487        |
|    learni

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.145       |
|    crash                | 0.255       |
|    max_step             | 0           |
|    mean_ep_length       | 144         |
|    mean_reward          | 165         |
|    num_episodes         | 5           |
|    out_of_road          | 0.855       |
|    raw_action           | 0.4818801   |
|    route_completion     | 0.432       |
|    success_rate         | 0.1         |
|    total_cost           | 13.1        |
| time/                   |             |
|    total_timesteps      | 550000      |
| train/                  |             |
|    approx_kl            | 0.001204178 |
|    arrive_dest          | 0.135       |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.1         |
|    crash                | 0.211       |
|    entropy_loss         | -2.08       |
|    explained_variance   | 0.784       |
|    learning_rate        | 5e-05 

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.146       |
|    crash                | 0.257       |
|    max_step             | 0           |
|    mean_ep_length       | 135         |
|    mean_reward          | 137         |
|    num_episodes         | 5           |
|    out_of_road          | 0.854       |
|    raw_action           | 0.48334777  |
|    route_completion     | 0.434       |
|    success_rate         | 0.3         |
|    total_cost           | 13          |
| time/                   |             |
|    total_timesteps      | 560000      |
| train/                  |             |
|    approx_kl            | 0.005751005 |
|    arrive_dest          | 0.139       |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.1         |
|    crash                | 0.211       |
|    entropy_loss         | -2.06       |
|    explained_variance   | 0.598       |
|    learning_rate        | 5e-05 

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.147        |
|    crash                | 0.263        |
|    max_step             | 0            |
|    mean_ep_length       | 176          |
|    mean_reward          | 225          |
|    num_episodes         | 5            |
|    out_of_road          | 0.853        |
|    raw_action           | 0.48313123   |
|    route_completion     | 0.437        |
|    success_rate         | 0.2          |
|    total_cost           | 12.8         |
| time/                   |              |
|    total_timesteps      | 570000       |
| train/                  |              |
|    approx_kl            | 0.0074601076 |
|    arrive_dest          | 0.14         |
|    clip_fraction        | 0.166        |
|    clip_range           | 0.1          |
|    crash                | 0.214        |
|    entropy_loss         | -2.06        |
|    explained_variance   | 0.557        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.145        |
|    crash                | 0.266        |
|    max_step             | 0            |
|    mean_ep_length       | 118          |
|    mean_reward          | 167          |
|    num_episodes         | 5            |
|    out_of_road          | 0.855        |
|    raw_action           | 0.48387364   |
|    route_completion     | 0.437        |
|    success_rate         | 0            |
|    total_cost           | 12.6         |
| time/                   |              |
|    total_timesteps      | 580000       |
| train/                  |              |
|    approx_kl            | 0.0007721883 |
|    arrive_dest          | 0.138        |
|    clip_fraction        | 0.157        |
|    clip_range           | 0.1          |
|    crash                | 0.214        |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.643        |
|    learni

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.142        |
|    crash                | 0.268        |
|    max_step             | 0            |
|    mean_ep_length       | 123          |
|    mean_reward          | 162          |
|    num_episodes         | 5            |
|    out_of_road          | 0.858        |
|    raw_action           | 0.48294708   |
|    route_completion     | 0.437        |
|    success_rate         | 0.4          |
|    total_cost           | 12.4         |
| time/                   |              |
|    total_timesteps      | 590000       |
| train/                  |              |
|    approx_kl            | 0.0019184966 |
|    arrive_dest          | 0.149        |
|    clip_fraction        | 0.0763       |
|    clip_range           | 0.1          |
|    crash                | 0.214        |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.523        |
|    learni