In [1]:
!pip install stable_baselines3[extra]



In [2]:
!pip install git+https://github.com/metadriverse/metadrive.git

Collecting git+https://github.com/metadriverse/metadrive.git
  Cloning https://github.com/metadriverse/metadrive.git to c:\users\colton\appdata\local\temp\pip-req-build-xw239x35
  Resolved https://github.com/metadriverse/metadrive.git to commit a09bc963b067c9ce7e348586f43e7253cba55875
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/metadriverse/metadrive.git 'C:\Users\Colton\AppData\Local\Temp\pip-req-build-xw239x35'


In [3]:
!pip install wandb



## RL Environment

In [4]:
import copy
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv

DEFAULT_CONFIG = {
    # The below are default configs copied from SafeMetaDriveEnv
    # Environment difficulty
    "accident_prob": 0.8,
    "traffic_density": 0.05,
    # Termination conditions
    "crash_vehicle_done": False,
    "crash_object_done": False,
    # Reward
    "success_reward": 10.0,
    "driving_reward": 1.0,
    "speed_reward": 0.1,
    # Penalty will be negated and added to reward
    "out_of_road_penalty": 5.0,
    "crash_vehicle_penalty": 1.0,
    "crash_object_penalty": 1.0,
    # Cost will be return in info["cost"] and you can do constrained optimization with it
    "crash_vehicle_cost": 1.0,
    "crash_object_cost": 1.0,
    "out_of_road_cost": 1.0,
}

# Use deepcopy to avoid modifying the DEFAULT_CONFIG
TRAINING_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
TRAINING_CONFIG.update(
    {  # Environment setting
        "num_scenarios": 50,  # There are totally 50 possible maps.
        "start_seed": 100,  # We will use the map with seeds in [100, 150) as the default training environment.
    }
)


def get_training_env(extra_config=None):
    config = copy.deepcopy(TRAINING_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


VALIDATION_CONFIG = copy.deepcopy(DEFAULT_CONFIG)
VALIDATION_CONFIG.update(
    {  # Environment setting
        "num_scenarios": 50,  # There are totally 50 possible maps.
        "start_seed": 1000,  # We will use the map with seeds in [1000, 1050) as the default validation environment.
    }
)


def get_validation_env(extra_config=None):
    config = copy.deepcopy(VALIDATION_CONFIG)
    if extra_config:
        config.update(extra_config)
    return SafeMetaDriveEnv(config)


## Import and utilities

In [5]:
import argparse
import datetime
import logging
import os
import uuid
from collections import defaultdict
from pathlib import Path

import numpy as np
from metadrive.engine.logger import set_log_level
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO
from stable_baselines3.ppo.policies import ActorCriticPolicy
from wandb.integration.sb3 import WandbCallback

import wandb


# Remove MetaDrive's logging information when episode ends.
set_log_level(logging.ERROR)

In [6]:

def get_time_str():
    return datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


def remove_reset_seed_and_add_monitor(make_env, trial_dir):
    """
    MetaDrive env's reset function takes a seed argument and use it to determine the map to load.
    However, in stable-baselines3, it calls reset function with a seed argument serving as the random seed,
    which is not what we want. We do a trick here to remap the random seed to map index.

    Stable-baselines3 recommends using Monitor wrapper to log training data. We add a Monitor wrapper here.
    """
    from gymnasium import Wrapper
    from stable_baselines3.common.monitor import Monitor
    class NewClass(Wrapper):
        def reset(self, seed=None, **kwargs):
            # PZH: We do a trick here to remap the seed to the map index. This can help randomize the maps.
            if seed is not None:
                new_seed = self.env.start_index + (seed % self.env.num_scenarios)
            else:
                new_seed = None
            return self.env.reset(seed=new_seed, **kwargs)

    def new_make_env():
        env = make_env()
        NewClass.__name__ = env.__class__.__name__ + "WithoutResetSeed"
        wrapped_env = NewClass(env)
        wrapped_env = Monitor(env=wrapped_env, filename=str(trial_dir))
        return wrapped_env

    return new_make_env


class CustomizedEvalCallback(EvalCallback):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.evaluations_info_buffer = defaultdict(list)

    def _log_success_callback(self, locals_, globals_):
        info = locals_["info"]

        if locals_["done"]:
            maybe_is_success = info.get("is_success")
            if maybe_is_success is not None:
                self._is_success_buffer.append(maybe_is_success)

            maybe_is_success2 = info.get("arrive_dest", None)
            if maybe_is_success2 is not None:
                self._is_success_buffer.append(maybe_is_success2)

            assert (maybe_is_success is None) or (maybe_is_success2 is None), "We cannot have two success flags!"

            for k in ["route_completion", "total_cost", "arrive_dest", "max_step", "out_of_road", "crash"]:
                if k in info:
                    self.evaluations_info_buffer[k].append(info[k])

        if "raw_action" in info:
            self.evaluations_info_buffer["raw_action"].append(info["raw_action"])

    def _on_step(self) -> bool:
        """
        PZH Note: Overall this function is copied from original EvalCallback._on_step.
        We additionally record evaluations_info_buffer to the logger.
        """

        from stable_baselines3.common.evaluation import evaluate_policy
        from stable_baselines3.common.vec_env import sync_envs_normalization

        continue_training = True

        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Sync training and eval env if there is VecNormalize
            if self.model.get_vec_normalize_env() is not None:
                try:
                    sync_envs_normalization(self.training_env, self.eval_env)
                except AttributeError as e:
                    raise AssertionError(
                        "Training and eval env are not wrapped the same way, "
                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
                        "and warning above."
                    ) from e

            # Reset success rate buffer
            self._is_success_buffer = []

            episode_rewards, episode_lengths = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                render=self.render,
                deterministic=self.deterministic,
                return_episode_rewards=True,
                warn=self.warn,
                callback=self._log_success_callback,
            )

            if self.log_path is not None:
                assert isinstance(episode_rewards, list)
                assert isinstance(episode_lengths, list)
                self.evaluations_timesteps.append(self.num_timesteps)
                self.evaluations_results.append(episode_rewards)
                self.evaluations_length.append(episode_lengths)

                kwargs = {}
                # Save success log if present
                if len(self._is_success_buffer) > 0:
                    self.evaluations_successes.append(self._is_success_buffer)
                    kwargs = dict(successes=self.evaluations_successes)

                # PZH: Save evaluations_info_buffer to the log file
                for k, v in self.evaluations_info_buffer.items():
                    kwargs[k] = v

                np.savez(
                    self.log_path,
                    timesteps=self.evaluations_timesteps,
                    results=self.evaluations_results,
                    ep_lengths=self.evaluations_length,
                    **kwargs,  # type: ignore[arg-type]
                )

            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
            self.last_mean_reward = float(mean_reward)

            if self.verbose >= 1:
                print(
                    f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
            # Add to current Logger
            self.logger.record("eval/mean_reward", float(mean_reward))
            self.logger.record("eval/mean_ep_length", mean_ep_length)

            # PZH: Add this metric.
            self.logger.record("eval/num_episodes", len(episode_rewards))

            if len(self._is_success_buffer) > 0:
                success_rate = np.mean(self._is_success_buffer)
                if self.verbose >= 1:
                    print(f"Success rate: {100 * success_rate:.2f}%")
                self.logger.record("eval/success_rate", success_rate)

            # PZH: We record evaluations_info_buffer to the logger
            for k, v in self.evaluations_info_buffer.items():
                self.logger.record("eval/{}".format(k), np.mean(np.asarray(v)))

            # Dump log so the evaluation results are printed with the correct timestep
            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
            self.logger.dump(self.num_timesteps)

            if mean_reward > self.best_mean_reward:
                if self.verbose >= 1:
                    print("New best mean reward!")
                if self.best_model_save_path is not None:
                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
                self.best_mean_reward = float(mean_reward)
                # Trigger callback on new best model, if needed
                if self.callback_on_new_best is not None:
                    continue_training = self.callback_on_new_best.on_step()

            # Trigger callback after every evaluation, if needed
            if self.callback is not None:
                continue_training = continue_training and self._on_event()

        return continue_training


## Setup PPO trainer

In [7]:

# ===== Set up some arguments =====
exp_name = "ppo_metadrive"
use_wandb = True

experiment_batch_name = "{}".format(exp_name)
trial_name = "{}_{}_{}".format(experiment_batch_name, get_time_str(), uuid.uuid4().hex[:8])
experiment_dir = Path("runs") / experiment_batch_name
trial_dir = experiment_dir / trial_name
os.makedirs(experiment_dir, exist_ok=True)
os.makedirs(trial_dir, exist_ok=True)
print(f"We start logging training data into {trial_dir}")


We start logging training data into runs\ppo_metadrive\ppo_metadrive_2025-03-16_17-43-31_a51009e7


In [8]:
# ===== Setup environment =====
num_train_envs = 10
num_eval_envs = 5
train_env = make_vec_env(remove_reset_seed_and_add_monitor(get_training_env, trial_dir), n_envs=num_train_envs,
                            vec_env_cls=SubprocVecEnv)
eval_env = make_vec_env(remove_reset_seed_and_add_monitor(get_validation_env, trial_dir), n_envs=num_eval_envs,
                        vec_env_cls=SubprocVecEnv)

In [9]:
# ===== Setup evaluation, checkpointing, and wandb =====
save_freq = 10_000  # Number of steps per model checkpoint
eval_freq = 10_000  # Number of steps per evaluation

wandb_save_freq = 10_000  # Number of steps per evaluation

num_eval_episodes = 5

checkpoint_callback = CheckpointCallback(
    name_prefix="rl_model",
    verbose=2,
    save_freq=save_freq,
    save_path=str(trial_dir / "models")
)
eval_callback = CustomizedEvalCallback(
    eval_env,
    best_model_save_path=str(trial_dir / "eval"),
    log_path=str(trial_dir / "eval"),
    eval_freq=max(eval_freq // num_train_envs, 1),
    n_eval_episodes=num_eval_episodes,
)
callbacks = [checkpoint_callback, eval_callback]
if use_wandb:
    wandb.init(
        project="cs260r",
        id=trial_name,
        name=experiment_batch_name,
        sync_tensorboard=True,
        dir=str(trial_dir),
    )
    callbacks.append(WandbCallback(model_save_path=str(trial_dir / "wandb_models"), model_save_freq=wandb_save_freq))
callbacks = CallbackList(callbacks)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: coltonrowe (coltonrowe-ucla) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [10]:

# ===== Setup the training algorithm =====
model = PPO(
    env=train_env,
    policy=ActorCriticPolicy,
    n_steps=500,  # n_steps * n_envs = total_batch_size
    n_epochs=20,
    learning_rate=5e-5,
    batch_size=256,
    clip_range=0.1,
    vf_coef=0.5,
    ent_coef=0.0,
    max_grad_norm=10.0,
    tensorboard_log=str(trial_dir),
    verbose=2,
    device="auto",
)


Using cpu device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=500 and n_envs=10)


In [11]:
ckpt = None
if ckpt:
    ckpt = Path(ckpt)
    print(f"Loading checkpoint from {ckpt}!")
    from stable_baselines3.common.save_util import load_from_zip_file
    data, params, pytorch_variables = load_from_zip_file(ckpt, device=model.device, print_system_info=False)
    model.set_parameters(params, exact_match=True, device=model.device)


In [12]:
# ===== Launch training =====
total_timesteps = 1_000_000  # 1M steps
model.learn(
    total_timesteps=total_timesteps,
    callback=callbacks,
    reset_num_timesteps=True,
    tb_log_name=experiment_batch_name,
    log_interval=1,
    progress_bar=True,
)

Logging to runs\ppo_metadrive\ppo_metadrive_2025-03-16_17-43-31_a51009e7\ppo_metadrive_1


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 92       |
|    ep_rew_mean     | -2.64    |
| time/              |          |
|    fps             | 1439     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 5000     |
---------------------------------


------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0            |
|    max_step             | 0            |
|    mean_ep_length       | 206          |
|    mean_reward          | 47.9         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.033996757  |
|    route_completion     | 0.17         |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0026634517 |
|    clip_fraction        | 0.175        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.84        |
|    explained_variance   | -0.0353      |
|    learning_rate        | 5e-05        |
|    loss                 | 0.00569      |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 348      |
|    ep_rew_mean     | -1.29    |
| time/              |          |
|    fps             | 932      |
|    iterations      | 2        |
|    time_elapsed    | 10       |
|    total_timesteps | 10000    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 984         |
|    ep_rew_mean          | 8.19        |
| time/                   |             |
|    fps                  | 982         |
|    iterations           | 3           |
|    time_elapsed         | 15          |
|    total_timesteps      | 15000       |
| train/                  |             |
|    approx_kl            | 0.003381066 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.83       |
|    explained_variance   | 0.0133      |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.1          |
|    max_step             | 0            |
|    mean_ep_length       | 105          |
|    mean_reward          | 34.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.058816493  |
|    route_completion     | 0.158        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 20000        |
| train/                  |              |
|    approx_kl            | 0.0021977765 |
|    clip_fraction        | 0.128        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.82        |
|    explained_variance   | -0.0191      |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0236       |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.0667       |
|    max_step             | 0            |
|    mean_ep_length       | 71.4         |
|    mean_reward          | 25.8         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.081829436  |
|    route_completion     | 0.134        |
|    success_rate         | 0            |
|    total_cost           | 1            |
| time/                   |              |
|    total_timesteps      | 30000        |
| train/                  |              |
|    approx_kl            | 0.0023435077 |
|    clip_fraction        | 0.145        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.8         |
|    explained_variance   | 0.0371       |
|    learning_rate        | 5e-05        |
|    loss                 | 0.129        |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.05         |
|    max_step             | 0            |
|    mean_ep_length       | 132          |
|    mean_reward          | 86.8         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.12483836   |
|    route_completion     | 0.174        |
|    success_rate         | 0            |
|    total_cost           | 3.1          |
| time/                   |              |
|    total_timesteps      | 40000        |
| train/                  |              |
|    approx_kl            | 0.0020069717 |
|    clip_fraction        | 0.0723       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.78        |
|    explained_variance   | 0.0279       |
|    learning_rate        | 5e-05        |
|    loss                 | 0.31         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.17e+03 |
|    ep_rew_mean     | 19.8     |
| time/              |          |
|    fps             | 818      |
|    iterations      | 8        |
|    time_elapsed    | 48       |
|    total_timesteps | 40000    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 977          |
|    ep_rew_mean          | 18.5         |
| time/                   |              |
|    fps                  | 805          |
|    iterations           | 9            |
|    time_elapsed         | 55           |
|    total_timesteps      | 45000        |
| train/                  |              |
|    approx_kl            | 0.0030468146 |
|    clip_fraction        | 0.152        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.76        |
|    explained_variance   | -0.0223      |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.04         |
|    max_step             | 0            |
|    mean_ep_length       | 74.4         |
|    mean_reward          | 48           |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.14997362   |
|    route_completion     | 0.166        |
|    success_rate         | 0            |
|    total_cost           | 2.68         |
| time/                   |              |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0020844506 |
|    clip_fraction        | 0.0957       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.75        |
|    explained_variance   | 0.023        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.549        |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.133        |
|    max_step             | 0            |
|    mean_ep_length       | 74.6         |
|    mean_reward          | 58.2         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.1761587    |
|    route_completion     | 0.172        |
|    success_rate         | 0            |
|    total_cost           | 2.5          |
| time/                   |              |
|    total_timesteps      | 60000        |
| train/                  |              |
|    approx_kl            | 0.0021749625 |
|    clip_fraction        | 0.126        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.73        |
|    explained_variance   | 0.00674      |
|    learning_rate        | 5e-05        |
|    loss                 | 0.3          |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.114        |
|    max_step             | 0            |
|    mean_ep_length       | 65           |
|    mean_reward          | 50.5         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.19784625   |
|    route_completion     | 0.17         |
|    success_rate         | 0            |
|    total_cost           | 2.29         |
| time/                   |              |
|    total_timesteps      | 70000        |
| train/                  |              |
|    approx_kl            | 0.0022834758 |
|    clip_fraction        | 0.118        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.7         |
|    explained_variance   | 0.0183       |
|    learning_rate        | 5e-05        |
|    loss                 | 0.846        |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.15         |
|    max_step             | 0            |
|    mean_ep_length       | 46           |
|    mean_reward          | 24.4         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.21283023   |
|    route_completion     | 0.162        |
|    success_rate         | 0            |
|    total_cost           | 2.12         |
| time/                   |              |
|    total_timesteps      | 80000        |
| train/                  |              |
|    approx_kl            | 0.0013771876 |
|    clip_fraction        | 0.0725       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.68        |
|    explained_variance   | 0.00227      |
|    learning_rate        | 5e-05        |
|    loss                 | 1.52         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.156        |
|    max_step             | 0            |
|    mean_ep_length       | 33.8         |
|    mean_reward          | 11.6         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.22374982   |
|    route_completion     | 0.15         |
|    success_rate         | 0            |
|    total_cost           | 2            |
| time/                   |              |
|    total_timesteps      | 90000        |
| train/                  |              |
|    approx_kl            | 0.0017809641 |
|    clip_fraction        | 0.0667       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.66        |
|    explained_variance   | -0.00222     |
|    learning_rate        | 5e-05        |
|    loss                 | 2.52         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.18         |
|    max_step             | 0            |
|    mean_ep_length       | 58.4         |
|    mean_reward          | 45.1         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.2416878    |
|    route_completion     | 0.154        |
|    success_rate         | 0            |
|    total_cost           | 1.9          |
| time/                   |              |
|    total_timesteps      | 100000       |
| train/                  |              |
|    approx_kl            | 0.0015193938 |
|    clip_fraction        | 0.056        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.63        |
|    explained_variance   | 0.0766       |
|    learning_rate        | 5e-05        |
|    loss                 | 2.37         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.164        |
|    max_step             | 0            |
|    mean_ep_length       | 59.8         |
|    mean_reward          | 47.7         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.25702918   |
|    route_completion     | 0.153        |
|    success_rate         | 0            |
|    total_cost           | 1.82         |
| time/                   |              |
|    total_timesteps      | 110000       |
| train/                  |              |
|    approx_kl            | 0.0012741673 |
|    clip_fraction        | 0.0459       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.61        |
|    explained_variance   | 0.0368       |
|    learning_rate        | 5e-05        |
|    loss                 | 4            |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.183        |
|    max_step             | 0            |
|    mean_ep_length       | 101          |
|    mean_reward          | 95.8         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.2792714    |
|    route_completion     | 0.17         |
|    success_rate         | 0            |
|    total_cost           | 2.4          |
| time/                   |              |
|    total_timesteps      | 120000       |
| train/                  |              |
|    approx_kl            | 0.0015684167 |
|    clip_fraction        | 0.065        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.6         |
|    explained_variance   | 0.304        |
|    learning_rate        | 5e-05        |
|    loss                 | 2.71         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 211      |
|    ep_rew_mean     | 25.8     |
| time/              |          |
|    fps             | 651      |
|    iterations      | 24       |
|    time_elapsed    | 184      |
|    total_timesteps | 120000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 215          |
|    ep_rew_mean          | 29.9         |
| time/                   |              |
|    fps                  | 644          |
|    iterations           | 25           |
|    time_elapsed         | 193          |
|    total_timesteps      | 125000       |
| train/                  |              |
|    approx_kl            | 0.0017037019 |
|    clip_fraction        | 0.085        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.58        |
|    explained_variance   | 0.194        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0            |
|    crash                | 0.185        |
|    max_step             | 0            |
|    mean_ep_length       | 95           |
|    mean_reward          | 96.7         |
|    num_episodes         | 5            |
|    out_of_road          | 1            |
|    raw_action           | 0.29696774   |
|    route_completion     | 0.179        |
|    success_rate         | 0            |
|    total_cost           | 2.55         |
| time/                   |              |
|    total_timesteps      | 130000       |
| train/                  |              |
|    approx_kl            | 0.0016538494 |
|    clip_fraction        | 0.0464       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.57        |
|    explained_variance   | 0.0143       |
|    learning_rate        | 5e-05        |
|    loss                 | 5.23         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 219      |
|    ep_rew_mean     | 33.8     |
| time/              |          |
|    fps             | 646      |
|    iterations      | 26       |
|    time_elapsed    | 201      |
|    total_timesteps | 130000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 229          |
|    ep_rew_mean          | 38.2         |
| time/                   |              |
|    fps                  | 649          |
|    iterations           | 27           |
|    time_elapsed         | 207          |
|    total_timesteps      | 135000       |
| train/                  |              |
|    approx_kl            | 0.0016003707 |
|    clip_fraction        | 0.0497       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.56        |
|    explained_variance   | 0.0778       |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0143       |
|    crash                | 0.186        |
|    max_step             | 0            |
|    mean_ep_length       | 144          |
|    mean_reward          | 104          |
|    num_episodes         | 5            |
|    out_of_road          | 0.986        |
|    raw_action           | 0.32120183   |
|    route_completion     | 0.203        |
|    success_rate         | 0.2          |
|    total_cost           | 4.66         |
| time/                   |              |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 0.0015746423 |
|    clip_fraction        | 0.0775       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.54        |
|    explained_variance   | 0.172        |
|    learning_rate        | 5e-05        |
|    loss                 | 4.99         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 243      |
|    ep_rew_mean     | 44.7     |
| time/              |          |
|    fps             | 643      |
|    iterations      | 28       |
|    time_elapsed    | 217      |
|    total_timesteps | 140000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 262          |
|    ep_rew_mean          | 51.1         |
| time/                   |              |
|    fps                  | 648          |
|    iterations           | 29           |
|    time_elapsed         | 223          |
|    total_timesteps      | 145000       |
| train/                  |              |
|    approx_kl            | 0.0012389034 |
|    clip_fraction        | 0.0565       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.53        |
|    explained_variance   | 0.363        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0133       |
|    crash                | 0.227        |
|    max_step             | 0            |
|    mean_ep_length       | 109          |
|    mean_reward          | 118          |
|    num_episodes         | 5            |
|    out_of_road          | 0.987        |
|    raw_action           | 0.33585605   |
|    route_completion     | 0.216        |
|    success_rate         | 0            |
|    total_cost           | 4.77         |
| time/                   |              |
|    total_timesteps      | 150000       |
| train/                  |              |
|    approx_kl            | 0.0013855045 |
|    clip_fraction        | 0.0767       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.52        |
|    explained_variance   | 0.211        |
|    learning_rate        | 5e-05        |
|    loss                 | 4.8          |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 280      |
|    ep_rew_mean     | 58.6     |
| time/              |          |
|    fps             | 649      |
|    iterations      | 30       |
|    time_elapsed    | 230      |
|    total_timesteps | 150000   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 309         |
|    ep_rew_mean          | 67.7        |
| time/                   |             |
|    fps                  | 655         |
|    iterations           | 31          |
|    time_elapsed         | 236         |
|    total_timesteps      | 155000      |
| train/                  |             |
|    approx_kl            | 0.002576915 |
|    clip_fraction        | 0.0915      |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.51       |
|    explained_variance   | 0.203       |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0125       |
|    crash                | 0.225        |
|    max_step             | 0            |
|    mean_ep_length       | 74.6         |
|    mean_reward          | 65.6         |
|    num_episodes         | 5            |
|    out_of_road          | 0.988        |
|    raw_action           | 0.3450317    |
|    route_completion     | 0.219        |
|    success_rate         | 0            |
|    total_cost           | 4.65         |
| time/                   |              |
|    total_timesteps      | 160000       |
| train/                  |              |
|    approx_kl            | 0.0019665752 |
|    clip_fraction        | 0.0845       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.5         |
|    explained_variance   | 0.522        |
|    learning_rate        | 5e-05        |
|    loss                 | 4.33         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0118      |
|    crash                | 0.235       |
|    max_step             | 0           |
|    mean_ep_length       | 132         |
|    mean_reward          | 141         |
|    num_episodes         | 5           |
|    out_of_road          | 0.988       |
|    raw_action           | 0.35849148  |
|    route_completion     | 0.233       |
|    success_rate         | 0           |
|    total_cost           | 5.02        |
| time/                   |             |
|    total_timesteps      | 170000      |
| train/                  |             |
|    approx_kl            | 0.001842917 |
|    clip_fraction        | 0.0652      |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.48       |
|    explained_variance   | 0.803       |
|    learning_rate        | 5e-05       |
|    loss                 | 4.09        |
|    n_updates            | 660   

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 367      |
|    ep_rew_mean     | 95       |
| time/              |          |
|    fps             | 659      |
|    iterations      | 34       |
|    time_elapsed    | 257      |
|    total_timesteps | 170000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 364          |
|    ep_rew_mean          | 97.3         |
| time/                   |              |
|    fps                  | 663          |
|    iterations           | 35           |
|    time_elapsed         | 263          |
|    total_timesteps      | 175000       |
| train/                  |              |
|    approx_kl            | 0.0011107788 |
|    clip_fraction        | 0.0614       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.47        |
|    explained_variance   | 0.454        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0111       |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 107          |
|    mean_reward          | 110          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.366094     |
|    route_completion     | 0.243        |
|    success_rate         | 0            |
|    total_cost           | 5.24         |
| time/                   |              |
|    total_timesteps      | 180000       |
| train/                  |              |
|    approx_kl            | 0.0015239009 |
|    clip_fraction        | 0.0892       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.45        |
|    explained_variance   | 0.324        |
|    learning_rate        | 5e-05        |
|    loss                 | 9.01         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0105       |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 118          |
|    mean_reward          | 149          |
|    num_episodes         | 5            |
|    out_of_road          | 0.989        |
|    raw_action           | 0.37313223   |
|    route_completion     | 0.251        |
|    success_rate         | 0            |
|    total_cost           | 5.08         |
| time/                   |              |
|    total_timesteps      | 190000       |
| train/                  |              |
|    approx_kl            | 0.0012522198 |
|    clip_fraction        | 0.0837       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.43        |
|    explained_variance   | 0.447        |
|    learning_rate        | 5e-05        |
|    loss                 | 9.37         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 397      |
|    ep_rew_mean     | 123      |
| time/              |          |
|    fps             | 667      |
|    iterations      | 38       |
|    time_elapsed    | 284      |
|    total_timesteps | 190000   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 405         |
|    ep_rew_mean          | 133         |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 39          |
|    time_elapsed         | 290         |
|    total_timesteps      | 195000      |
| train/                  |             |
|    approx_kl            | 0.001366463 |
|    clip_fraction        | 0.0725      |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.42       |
|    explained_variance   | 0.419       |
|    learning_rate        | 5e

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.01         |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 123          |
|    mean_reward          | 152          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.38044482   |
|    route_completion     | 0.263        |
|    success_rate         | 0            |
|    total_cost           | 5.14         |
| time/                   |              |
|    total_timesteps      | 200000       |
| train/                  |              |
|    approx_kl            | 0.0015084939 |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.41        |
|    explained_variance   | 0.178        |
|    learning_rate        | 5e-05        |
|    loss                 | 10.7         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 415      |
|    ep_rew_mean     | 140      |
| time/              |          |
|    fps             | 672      |
|    iterations      | 40       |
|    time_elapsed    | 297      |
|    total_timesteps | 200000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 420          |
|    ep_rew_mean          | 149          |
| time/                   |              |
|    fps                  | 674          |
|    iterations           | 41           |
|    time_elapsed         | 304          |
|    total_timesteps      | 205000       |
| train/                  |              |
|    approx_kl            | 0.0017640574 |
|    clip_fraction        | 0.0866       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.39        |
|    explained_variance   | 0.218        |
|    learning_r

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00952      |
|    crash                | 0.229        |
|    max_step             | 0            |
|    mean_ep_length       | 109          |
|    mean_reward          | 119          |
|    num_episodes         | 5            |
|    out_of_road          | 0.99         |
|    raw_action           | 0.38426098   |
|    route_completion     | 0.267        |
|    success_rate         | 0            |
|    total_cost           | 5.24         |
| time/                   |              |
|    total_timesteps      | 210000       |
| train/                  |              |
|    approx_kl            | 0.0018637588 |
|    clip_fraction        | 0.0645       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.38        |
|    explained_variance   | 0.119        |
|    learning_rate        | 5e-05        |
|    loss                 | 5.27         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00909      |
|    crash                | 0.227        |
|    max_step             | 0            |
|    mean_ep_length       | 105          |
|    mean_reward          | 130          |
|    num_episodes         | 5            |
|    out_of_road          | 0.991        |
|    raw_action           | 0.38908494   |
|    route_completion     | 0.271        |
|    success_rate         | 0            |
|    total_cost           | 5.11         |
| time/                   |              |
|    total_timesteps      | 220000       |
| train/                  |              |
|    approx_kl            | 0.0016062362 |
|    clip_fraction        | 0.0607       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.36        |
|    explained_variance   | 0.209        |
|    learning_rate        | 5e-05        |
|    loss                 | 16.2         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0087      |
|    crash                | 0.217       |
|    max_step             | 0           |
|    mean_ep_length       | 89          |
|    mean_reward          | 97          |
|    num_episodes         | 5           |
|    out_of_road          | 0.991       |
|    raw_action           | 0.392813    |
|    route_completion     | 0.273       |
|    success_rate         | 0           |
|    total_cost           | 4.97        |
| time/                   |             |
|    total_timesteps      | 230000      |
| train/                  |             |
|    approx_kl            | 0.001475627 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.34       |
|    explained_variance   | 0.0944      |
|    learning_rate        | 5e-05       |
|    loss                 | 8.41        |
|    n_updates            | 900   

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.00833      |
|    crash                | 0.208        |
|    max_step             | 0            |
|    mean_ep_length       | 125          |
|    mean_reward          | 146          |
|    num_episodes         | 5            |
|    out_of_road          | 0.992        |
|    raw_action           | 0.3944366    |
|    route_completion     | 0.277        |
|    success_rate         | 0            |
|    total_cost           | 4.96         |
| time/                   |              |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0015160354 |
|    clip_fraction        | 0.134        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.0755       |
|    learning_rate        | 5e-05        |
|    loss                 | 18           |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.016        |
|    crash                | 0.216        |
|    max_step             | 0            |
|    mean_ep_length       | 145          |
|    mean_reward          | 202          |
|    num_episodes         | 5            |
|    out_of_road          | 0.984        |
|    raw_action           | 0.39979178   |
|    route_completion     | 0.288        |
|    success_rate         | 0.2          |
|    total_cost           | 4.91         |
| time/                   |              |
|    total_timesteps      | 250000       |
| train/                  |              |
|    approx_kl            | 0.0007106776 |
|    clip_fraction        | 0.0986       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.31        |
|    explained_variance   | 0.215        |
|    learning_rate        | 5e-05        |
|    loss                 | 20.5         |
|    n_upda

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 362      |
|    ep_rew_mean     | 178      |
| time/              |          |
|    fps             | 683      |
|    iterations      | 50       |
|    time_elapsed    | 365      |
|    total_timesteps | 250000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 361          |
|    ep_rew_mean          | 184          |
| time/                   |              |
|    fps                  | 685          |
|    iterations           | 51           |
|    time_elapsed         | 371          |
|    total_timesteps      | 255000       |
| train/                  |              |
|    approx_kl            | 0.0019312318 |
|    clip_fraction        | 0.0743       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.3         |
|    explained_variance   | 0.274        |
|    learning_r

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0154        |
|    crash                | 0.223         |
|    max_step             | 0             |
|    mean_ep_length       | 154           |
|    mean_reward          | 113           |
|    num_episodes         | 5             |
|    out_of_road          | 0.985         |
|    raw_action           | 0.40444338    |
|    route_completion     | 0.294         |
|    success_rate         | 0             |
|    total_cost           | 5.78          |
| time/                   |               |
|    total_timesteps      | 260000        |
| train/                  |               |
|    approx_kl            | 0.00085497985 |
|    clip_fraction        | 0.0698        |
|    clip_range           | 0.1           |
|    entropy_loss         | -2.29         |
|    explained_variance   | 0.276         |
|    learning_rate        | 5e-05         |
|    loss                 | 16.2

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0148       |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 103          |
|    mean_reward          | 127          |
|    num_episodes         | 5            |
|    out_of_road          | 0.985        |
|    raw_action           | 0.4103741    |
|    route_completion     | 0.297        |
|    success_rate         | 0            |
|    total_cost           | 5.61         |
| time/                   |              |
|    total_timesteps      | 270000       |
| train/                  |              |
|    approx_kl            | 0.0015905257 |
|    clip_fraction        | 0.129        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.28        |
|    explained_variance   | 0.125        |
|    learning_rate        | 5e-05        |
|    loss                 | 19.7         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0143       |
|    crash                | 0.221        |
|    max_step             | 0            |
|    mean_ep_length       | 77.2         |
|    mean_reward          | 75.2         |
|    num_episodes         | 5            |
|    out_of_road          | 0.986        |
|    raw_action           | 0.41255745   |
|    route_completion     | 0.295        |
|    success_rate         | 0            |
|    total_cost           | 5.44         |
| time/                   |              |
|    total_timesteps      | 280000       |
| train/                  |              |
|    approx_kl            | 0.0044126883 |
|    clip_fraction        | 0.111        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.27        |
|    explained_variance   | 0.112        |
|    learning_rate        | 5e-05        |
|    loss                 | 17.9         |
|    n_upda

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.0138     |
|    crash                | 0.214      |
|    max_step             | 0          |
|    mean_ep_length       | 98.4       |
|    mean_reward          | 90.7       |
|    num_episodes         | 5          |
|    out_of_road          | 0.986      |
|    raw_action           | 0.41385093 |
|    route_completion     | 0.296      |
|    success_rate         | 0          |
|    total_cost           | 5.5        |
| time/                   |            |
|    total_timesteps      | 290000     |
| train/                  |            |
|    approx_kl            | 0.00292307 |
|    clip_fraction        | 0.109      |
|    clip_range           | 0.1        |
|    entropy_loss         | -2.26      |
|    explained_variance   | 0.149      |
|    learning_rate        | 5e-05      |
|    loss                 | 20.3       |
|    n_updates            | 1140       |
|    policy_grad

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.02        |
|    crash                | 0.207       |
|    max_step             | 0           |
|    mean_ep_length       | 162         |
|    mean_reward          | 162         |
|    num_episodes         | 5           |
|    out_of_road          | 0.98        |
|    raw_action           | 0.41905218  |
|    route_completion     | 0.307       |
|    success_rate         | 0.2         |
|    total_cost           | 5.89        |
| time/                   |             |
|    total_timesteps      | 300000      |
| train/                  |             |
|    approx_kl            | 0.003908264 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.25       |
|    explained_variance   | 0.592       |
|    learning_rate        | 5e-05       |
|    loss                 | 12.5        |
|    n_updates            | 1180  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0258       |
|    crash                | 0.206        |
|    max_step             | 0            |
|    mean_ep_length       | 120          |
|    mean_reward          | 141          |
|    num_episodes         | 5            |
|    out_of_road          | 0.974        |
|    raw_action           | 0.42381582   |
|    route_completion     | 0.312        |
|    success_rate         | 0.2          |
|    total_cost           | 5.88         |
| time/                   |              |
|    total_timesteps      | 310000       |
| train/                  |              |
|    approx_kl            | 0.0011302407 |
|    clip_fraction        | 0.0531       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.24        |
|    explained_variance   | 0.315        |
|    learning_rate        | 5e-05        |
|    loss                 | 19.5         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.025        |
|    crash                | 0.206        |
|    max_step             | 0            |
|    mean_ep_length       | 167          |
|    mean_reward          | 128          |
|    num_episodes         | 5            |
|    out_of_road          | 0.975        |
|    raw_action           | 0.4296858    |
|    route_completion     | 0.317        |
|    success_rate         | 0            |
|    total_cost           | 6.39         |
| time/                   |              |
|    total_timesteps      | 320000       |
| train/                  |              |
|    approx_kl            | 0.0027951784 |
|    clip_fraction        | 0.0915       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.24        |
|    explained_variance   | 0.203        |
|    learning_rate        | 5e-05        |
|    loss                 | 26.7         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0303      |
|    crash                | 0.212       |
|    max_step             | 0           |
|    mean_ep_length       | 163         |
|    mean_reward          | 194         |
|    num_episodes         | 5           |
|    out_of_road          | 0.97        |
|    raw_action           | 0.43397808  |
|    route_completion     | 0.327       |
|    success_rate         | 0.2         |
|    total_cost           | 6.67        |
| time/                   |             |
|    total_timesteps      | 330000      |
| train/                  |             |
|    approx_kl            | 0.016417982 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.22       |
|    explained_variance   | 0.359       |
|    learning_rate        | 5e-05       |
|    loss                 | 18.5        |
|    n_updates            | 1300  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0353       |
|    crash                | 0.212        |
|    max_step             | 0            |
|    mean_ep_length       | 148          |
|    mean_reward          | 95.2         |
|    num_episodes         | 5            |
|    out_of_road          | 0.965        |
|    raw_action           | 0.4357877    |
|    route_completion     | 0.332        |
|    success_rate         | 0.2          |
|    total_cost           | 7.39         |
| time/                   |              |
|    total_timesteps      | 340000       |
| train/                  |              |
|    approx_kl            | 0.0034473422 |
|    clip_fraction        | 0.0822       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.2         |
|    explained_variance   | 0.279        |
|    learning_rate        | 5e-05        |
|    loss                 | 30.6         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.04         |
|    crash                | 0.223        |
|    max_step             | 0            |
|    mean_ep_length       | 161          |
|    mean_reward          | 122          |
|    num_episodes         | 5            |
|    out_of_road          | 0.96         |
|    raw_action           | 0.4385816    |
|    route_completion     | 0.337        |
|    success_rate         | 0.2          |
|    total_cost           | 8.01         |
| time/                   |              |
|    total_timesteps      | 350000       |
| train/                  |              |
|    approx_kl            | 0.0012599599 |
|    clip_fraction        | 0.0985       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.18        |
|    explained_variance   | 0.312        |
|    learning_rate        | 5e-05        |
|    loss                 | 29.4         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0389       |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 102          |
|    mean_reward          | 102          |
|    num_episodes         | 5            |
|    out_of_road          | 0.961        |
|    raw_action           | 0.43845674   |
|    route_completion     | 0.336        |
|    success_rate         | 0            |
|    total_cost           | 7.84         |
| time/                   |              |
|    total_timesteps      | 360000       |
| train/                  |              |
|    approx_kl            | 0.0028784208 |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.17        |
|    explained_variance   | 0.25         |
|    learning_rate        | 5e-05        |
|    loss                 | 20.8         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0378       |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 124          |
|    mean_reward          | 135          |
|    num_episodes         | 5            |
|    out_of_road          | 0.962        |
|    raw_action           | 0.43909484   |
|    route_completion     | 0.338        |
|    success_rate         | 0            |
|    total_cost           | 7.71         |
| time/                   |              |
|    total_timesteps      | 370000       |
| train/                  |              |
|    approx_kl            | 0.0047182753 |
|    clip_fraction        | 0.148        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.15        |
|    explained_variance   | 0.308        |
|    learning_rate        | 5e-05        |
|    loss                 | 19.9         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0368      |
|    crash                | 0.237       |
|    max_step             | 0           |
|    mean_ep_length       | 114         |
|    mean_reward          | 109         |
|    num_episodes         | 5           |
|    out_of_road          | 0.963       |
|    raw_action           | 0.44021443  |
|    route_completion     | 0.339       |
|    success_rate         | 0           |
|    total_cost           | 7.74        |
| time/                   |             |
|    total_timesteps      | 380000      |
| train/                  |             |
|    approx_kl            | 0.002455233 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.14       |
|    explained_variance   | 0.33        |
|    learning_rate        | 5e-05       |
|    loss                 | 16.7        |
|    n_updates            | 1500  

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0359      |
|    crash                | 0.236       |
|    max_step             | 0           |
|    mean_ep_length       | 168         |
|    mean_reward          | 230         |
|    num_episodes         | 5           |
|    out_of_road          | 0.964       |
|    raw_action           | 0.4423072   |
|    route_completion     | 0.346       |
|    success_rate         | 0           |
|    total_cost           | 7.59        |
| time/                   |             |
|    total_timesteps      | 390000      |
| train/                  |             |
|    approx_kl            | 0.003034351 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.309       |
|    learning_rate        | 5e-05       |
|    loss                 | 17.2        |
|    n_updates            | 1540  

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 360      |
|    ep_rew_mean     | 247      |
| time/              |          |
|    fps             | 704      |
|    iterations      | 78       |
|    time_elapsed    | 553      |
|    total_timesteps | 390000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 366          |
|    ep_rew_mean          | 254          |
| time/                   |              |
|    fps                  | 707          |
|    iterations           | 79           |
|    time_elapsed         | 558          |
|    total_timesteps      | 395000       |
| train/                  |              |
|    approx_kl            | 0.0011041415 |
|    clip_fraction        | 0.0959       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.12        |
|    explained_variance   | 0.405        |
|    learning_r

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.045       |
|    crash                | 0.235       |
|    max_step             | 0           |
|    mean_ep_length       | 186         |
|    mean_reward          | 211         |
|    num_episodes         | 5           |
|    out_of_road          | 0.955       |
|    raw_action           | 0.44515857  |
|    route_completion     | 0.353       |
|    success_rate         | 0.4         |
|    total_cost           | 7.72        |
| time/                   |             |
|    total_timesteps      | 400000      |
| train/                  |             |
|    approx_kl            | 0.002093027 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.11       |
|    explained_variance   | 0.326       |
|    learning_rate        | 5e-05       |
|    loss                 | 33.5        |
|    n_updates            | 1580  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0488       |
|    crash                | 0.234        |
|    max_step             | 0            |
|    mean_ep_length       | 155          |
|    mean_reward          | 135          |
|    num_episodes         | 5            |
|    out_of_road          | 0.951        |
|    raw_action           | 0.4456217    |
|    route_completion     | 0.356        |
|    success_rate         | 0.2          |
|    total_cost           | 8.12         |
| time/                   |              |
|    total_timesteps      | 410000       |
| train/                  |              |
|    approx_kl            | 0.0026373935 |
|    clip_fraction        | 0.172        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.1         |
|    explained_variance   | 0.393        |
|    learning_rate        | 5e-05        |
|    loss                 | 19.7         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0476       |
|    crash                | 0.238        |
|    max_step             | 0            |
|    mean_ep_length       | 114          |
|    mean_reward          | 143          |
|    num_episodes         | 5            |
|    out_of_road          | 0.952        |
|    raw_action           | 0.4469892    |
|    route_completion     | 0.356        |
|    success_rate         | 0            |
|    total_cost           | 7.95         |
| time/                   |              |
|    total_timesteps      | 420000       |
| train/                  |              |
|    approx_kl            | 0.0014550366 |
|    clip_fraction        | 0.0789       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.08        |
|    explained_variance   | 0.631        |
|    learning_rate        | 5e-05        |
|    loss                 | 23           |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0465      |
|    crash                | 0.237       |
|    max_step             | 0           |
|    mean_ep_length       | 119         |
|    mean_reward          | 144         |
|    num_episodes         | 5           |
|    out_of_road          | 0.953       |
|    raw_action           | 0.44605944  |
|    route_completion     | 0.357       |
|    success_rate         | 0           |
|    total_cost           | 7.8         |
| time/                   |             |
|    total_timesteps      | 430000      |
| train/                  |             |
|    approx_kl            | 0.016840424 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.09       |
|    explained_variance   | 0.401       |
|    learning_rate        | 5e-05       |
|    loss                 | 20.8        |
|    n_updates            | 1700  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0455       |
|    crash                | 0.241        |
|    max_step             | 0            |
|    mean_ep_length       | 127          |
|    mean_reward          | 138          |
|    num_episodes         | 5            |
|    out_of_road          | 0.955        |
|    raw_action           | 0.4476871    |
|    route_completion     | 0.36         |
|    success_rate         | 0            |
|    total_cost           | 7.82         |
| time/                   |              |
|    total_timesteps      | 440000       |
| train/                  |              |
|    approx_kl            | 0.0012328071 |
|    clip_fraction        | 0.0863       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.08        |
|    explained_variance   | 0.614        |
|    learning_rate        | 5e-05        |
|    loss                 | 23.1         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0444      |
|    crash                | 0.236       |
|    max_step             | 0           |
|    mean_ep_length       | 77          |
|    mean_reward          | 75.5        |
|    num_episodes         | 5           |
|    out_of_road          | 0.956       |
|    raw_action           | 0.44824728  |
|    route_completion     | 0.357       |
|    success_rate         | 0           |
|    total_cost           | 7.67        |
| time/                   |             |
|    total_timesteps      | 450000      |
| train/                  |             |
|    approx_kl            | 0.014898786 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.07       |
|    explained_variance   | 0.515       |
|    learning_rate        | 5e-05       |
|    loss                 | 25          |
|    n_updates            | 1780  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0435       |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 133          |
|    mean_reward          | 150          |
|    num_episodes         | 5            |
|    out_of_road          | 0.957        |
|    raw_action           | 0.44893825   |
|    route_completion     | 0.36         |
|    success_rate         | 0            |
|    total_cost           | 7.7          |
| time/                   |              |
|    total_timesteps      | 460000       |
| train/                  |              |
|    approx_kl            | 0.0027336474 |
|    clip_fraction        | 0.103        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.05        |
|    explained_variance   | 0.38         |
|    learning_rate        | 5e-05        |
|    loss                 | 50.6         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0426      |
|    crash                | 0.23        |
|    max_step             | 0           |
|    mean_ep_length       | 92.8        |
|    mean_reward          | 99.7        |
|    num_episodes         | 5           |
|    out_of_road          | 0.957       |
|    raw_action           | 0.45052528  |
|    route_completion     | 0.36        |
|    success_rate         | 0           |
|    total_cost           | 7.58        |
| time/                   |             |
|    total_timesteps      | 470000      |
| train/                  |             |
|    approx_kl            | 0.003292445 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.05       |
|    explained_variance   | 0.747       |
|    learning_rate        | 5e-05       |
|    loss                 | 21.1        |
|    n_updates            | 1860  

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.05       |
|    crash                | 0.229      |
|    max_step             | 0          |
|    mean_ep_length       | 144        |
|    mean_reward          | 149        |
|    num_episodes         | 5          |
|    out_of_road          | 0.95       |
|    raw_action           | 0.45271283 |
|    route_completion     | 0.364      |
|    success_rate         | 0.4        |
|    total_cost           | 7.6        |
| time/                   |            |
|    total_timesteps      | 480000     |
| train/                  |            |
|    approx_kl            | 0.01507427 |
|    clip_fraction        | 0.13       |
|    clip_range           | 0.1        |
|    entropy_loss         | -2.04      |
|    explained_variance   | 0.537      |
|    learning_rate        | 5e-05      |
|    loss                 | 27.5       |
|    n_updates            | 1900       |
|    policy_grad

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.0531     |
|    crash                | 0.229      |
|    max_step             | 0          |
|    mean_ep_length       | 171        |
|    mean_reward          | 128        |
|    num_episodes         | 5          |
|    out_of_road          | 0.947      |
|    raw_action           | 0.45416123 |
|    route_completion     | 0.366      |
|    success_rate         | 0.2        |
|    total_cost           | 7.98       |
| time/                   |            |
|    total_timesteps      | 490000     |
| train/                  |            |
|    approx_kl            | 0.00463545 |
|    clip_fraction        | 0.16       |
|    clip_range           | 0.1        |
|    entropy_loss         | -2.03      |
|    explained_variance   | 0.523      |
|    learning_rate        | 5e-05      |
|    loss                 | 28.2       |
|    n_updates            | 1940       |
|    policy_grad

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.056        |
|    crash                | 0.232        |
|    max_step             | 0            |
|    mean_ep_length       | 162          |
|    mean_reward          | 191          |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.45573094   |
|    route_completion     | 0.372        |
|    success_rate         | 0.2          |
|    total_cost           | 8.09         |
| time/                   |              |
|    total_timesteps      | 500000       |
| train/                  |              |
|    approx_kl            | 0.0019258012 |
|    clip_fraction        | 0.0865       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.02        |
|    explained_variance   | 0.531        |
|    learning_rate        | 5e-05        |
|    loss                 | 30.8         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0588       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 147          |
|    mean_reward          | 202          |
|    num_episodes         | 5            |
|    out_of_road          | 0.941        |
|    raw_action           | 0.4566249    |
|    route_completion     | 0.376        |
|    success_rate         | 0.2          |
|    total_cost           | 8.01         |
| time/                   |              |
|    total_timesteps      | 510000       |
| train/                  |              |
|    approx_kl            | 0.0022608102 |
|    clip_fraction        | 0.115        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.02        |
|    explained_variance   | 0.497        |
|    learning_rate        | 5e-05        |
|    loss                 | 57.6         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0615       |
|    crash                | 0.242        |
|    max_step             | 0            |
|    mean_ep_length       | 157          |
|    mean_reward          | 153          |
|    num_episodes         | 5            |
|    out_of_road          | 0.938        |
|    raw_action           | 0.45856723   |
|    route_completion     | 0.38         |
|    success_rate         | 0.2          |
|    total_cost           | 8.28         |
| time/                   |              |
|    total_timesteps      | 520000       |
| train/                  |              |
|    approx_kl            | 0.0014395001 |
|    clip_fraction        | 0.134        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2           |
|    explained_variance   | 0.634        |
|    learning_rate        | 5e-05        |
|    loss                 | 36.6         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0604       |
|    crash                | 0.245        |
|    max_step             | 0            |
|    mean_ep_length       | 163          |
|    mean_reward          | 186          |
|    num_episodes         | 5            |
|    out_of_road          | 0.94         |
|    raw_action           | 0.4593429    |
|    route_completion     | 0.384        |
|    success_rate         | 0            |
|    total_cost           | 8.35         |
| time/                   |              |
|    total_timesteps      | 530000       |
| train/                  |              |
|    approx_kl            | 0.0019632378 |
|    clip_fraction        | 0.143        |
|    clip_range           | 0.1          |
|    entropy_loss         | -2           |
|    explained_variance   | 0.507        |
|    learning_rate        | 5e-05        |
|    loss                 | 63.2         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0593       |
|    crash                | 0.244        |
|    max_step             | 0            |
|    mean_ep_length       | 111          |
|    mean_reward          | 121          |
|    num_episodes         | 5            |
|    out_of_road          | 0.941        |
|    raw_action           | 0.4588907    |
|    route_completion     | 0.384        |
|    success_rate         | 0            |
|    total_cost           | 8.32         |
| time/                   |              |
|    total_timesteps      | 540000       |
| train/                  |              |
|    approx_kl            | 0.0011301633 |
|    clip_fraction        | 0.114        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.99        |
|    explained_variance   | 0.576        |
|    learning_rate        | 5e-05        |
|    loss                 | 31.3         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0582      |
|    crash                | 0.244       |
|    max_step             | 0           |
|    mean_ep_length       | 104         |
|    mean_reward          | 103         |
|    num_episodes         | 5           |
|    out_of_road          | 0.942       |
|    raw_action           | 0.45899388  |
|    route_completion     | 0.383       |
|    success_rate         | 0           |
|    total_cost           | 8.25        |
| time/                   |             |
|    total_timesteps      | 550000      |
| train/                  |             |
|    approx_kl            | 0.002420627 |
|    clip_fraction        | 0.0904      |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.99       |
|    explained_variance   | 0.517       |
|    learning_rate        | 5e-05       |
|    loss                 | 28.1        |
|    n_updates            | 2180  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0571       |
|    crash                | 0.239        |
|    max_step             | 0            |
|    mean_ep_length       | 122          |
|    mean_reward          | 125          |
|    num_episodes         | 5            |
|    out_of_road          | 0.943        |
|    raw_action           | 0.45923236   |
|    route_completion     | 0.383        |
|    success_rate         | 0            |
|    total_cost           | 8.27         |
| time/                   |              |
|    total_timesteps      | 560000       |
| train/                  |              |
|    approx_kl            | 0.0015172607 |
|    clip_fraction        | 0.174        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.98        |
|    explained_variance   | 0.744        |
|    learning_rate        | 5e-05        |
|    loss                 | 27.1         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0561       |
|    crash                | 0.239        |
|    max_step             | 0            |
|    mean_ep_length       | 84.8         |
|    mean_reward          | 82.6         |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.45869      |
|    route_completion     | 0.382        |
|    success_rate         | 0            |
|    total_cost           | 8.17         |
| time/                   |              |
|    total_timesteps      | 570000       |
| train/                  |              |
|    approx_kl            | 0.0043407744 |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.98        |
|    explained_variance   | 0.708        |
|    learning_rate        | 5e-05        |
|    loss                 | 40.2         |
|    n_upda

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0552        |
|    crash                | 0.234         |
|    max_step             | 0             |
|    mean_ep_length       | 109           |
|    mean_reward          | 130           |
|    num_episodes         | 5             |
|    out_of_road          | 0.945         |
|    raw_action           | 0.4584982     |
|    route_completion     | 0.382         |
|    success_rate         | 0             |
|    total_cost           | 8.07          |
| time/                   |               |
|    total_timesteps      | 580000        |
| train/                  |               |
|    approx_kl            | 0.00094536936 |
|    clip_fraction        | 0.0687        |
|    clip_range           | 0.1           |
|    entropy_loss         | -1.98         |
|    explained_variance   | 0.697         |
|    learning_rate        | 5e-05         |
|    loss                 | 61.5

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0542       |
|    crash                | 0.234        |
|    max_step             | 0            |
|    mean_ep_length       | 194          |
|    mean_reward          | 173          |
|    num_episodes         | 5            |
|    out_of_road          | 0.946        |
|    raw_action           | 0.4591161    |
|    route_completion     | 0.385        |
|    success_rate         | 0            |
|    total_cost           | 8.38         |
| time/                   |              |
|    total_timesteps      | 590000       |
| train/                  |              |
|    approx_kl            | 0.0026405773 |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.96        |
|    explained_variance   | 0.859        |
|    learning_rate        | 5e-05        |
|    loss                 | 28           |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0533       |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 135          |
|    mean_reward          | 175          |
|    num_episodes         | 5            |
|    out_of_road          | 0.947        |
|    raw_action           | 0.45938647   |
|    route_completion     | 0.388        |
|    success_rate         | 0            |
|    total_cost           | 8.3          |
| time/                   |              |
|    total_timesteps      | 600000       |
| train/                  |              |
|    approx_kl            | 0.0026783838 |
|    clip_fraction        | 0.108        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.681        |
|    learning_rate        | 5e-05        |
|    loss                 | 25.9         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0525       |
|    crash                | 0.236        |
|    max_step             | 0            |
|    mean_ep_length       | 149          |
|    mean_reward          | 204          |
|    num_episodes         | 5            |
|    out_of_road          | 0.948        |
|    raw_action           | 0.4600119    |
|    route_completion     | 0.391        |
|    success_rate         | 0            |
|    total_cost           | 8.25         |
| time/                   |              |
|    total_timesteps      | 610000       |
| train/                  |              |
|    approx_kl            | 0.0018975653 |
|    clip_fraction        | 0.0852       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.823        |
|    learning_rate        | 5e-05        |
|    loss                 | 29.4         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0548       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 132          |
|    mean_reward          | 123          |
|    num_episodes         | 5            |
|    out_of_road          | 0.945        |
|    raw_action           | 0.461105     |
|    route_completion     | 0.39         |
|    success_rate         | 0.2          |
|    total_cost           | 8.39         |
| time/                   |              |
|    total_timesteps      | 620000       |
| train/                  |              |
|    approx_kl            | 0.0015291976 |
|    clip_fraction        | 0.208        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.721        |
|    learning_rate        | 5e-05        |
|    loss                 | 41.5         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0571      |
|    crash                | 0.235       |
|    max_step             | 0           |
|    mean_ep_length       | 176         |
|    mean_reward          | 181         |
|    num_episodes         | 5           |
|    out_of_road          | 0.943       |
|    raw_action           | 0.46072963  |
|    route_completion     | 0.393       |
|    success_rate         | 0.2         |
|    total_cost           | 8.65        |
| time/                   |             |
|    total_timesteps      | 630000      |
| train/                  |             |
|    approx_kl            | 0.008572719 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 0.661       |
|    learning_rate        | 5e-05       |
|    loss                 | 47.5        |
|    n_updates            | 2500  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0563       |
|    crash                | 0.241        |
|    max_step             | 0            |
|    mean_ep_length       | 95.4         |
|    mean_reward          | 90.3         |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.46111807   |
|    route_completion     | 0.393        |
|    success_rate         | 0            |
|    total_cost           | 8.58         |
| time/                   |              |
|    total_timesteps      | 640000       |
| train/                  |              |
|    approx_kl            | 0.0023555637 |
|    clip_fraction        | 0.199        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.754        |
|    learning_rate        | 5e-05        |
|    loss                 | 20.5         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0554       |
|    crash                | 0.24         |
|    max_step             | 0            |
|    mean_ep_length       | 126          |
|    mean_reward          | 162          |
|    num_episodes         | 5            |
|    out_of_road          | 0.945        |
|    raw_action           | 0.46184558   |
|    route_completion     | 0.393        |
|    success_rate         | 0            |
|    total_cost           | 8.49         |
| time/                   |              |
|    total_timesteps      | 650000       |
| train/                  |              |
|    approx_kl            | 0.0015351316 |
|    clip_fraction        | 0.124        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.702        |
|    learning_rate        | 5e-05        |
|    loss                 | 35.2         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0545      |
|    crash                | 0.242       |
|    max_step             | 0           |
|    mean_ep_length       | 129         |
|    mean_reward          | 153         |
|    num_episodes         | 5           |
|    out_of_road          | 0.945       |
|    raw_action           | 0.4617454   |
|    route_completion     | 0.395       |
|    success_rate         | 0           |
|    total_cost           | 8.43        |
| time/                   |             |
|    total_timesteps      | 660000      |
| train/                  |             |
|    approx_kl            | 0.001116737 |
|    clip_fraction        | 0.0804      |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.92       |
|    explained_variance   | 0.707       |
|    learning_rate        | 5e-05       |
|    loss                 | 50.2        |
|    n_updates            | 2620  

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0537      |
|    crash                | 0.239       |
|    max_step             | 0           |
|    mean_ep_length       | 88.8        |
|    mean_reward          | 97.3        |
|    num_episodes         | 5           |
|    out_of_road          | 0.946       |
|    raw_action           | 0.46184334  |
|    route_completion     | 0.393       |
|    success_rate         | 0           |
|    total_cost           | 8.32        |
| time/                   |             |
|    total_timesteps      | 670000      |
| train/                  |             |
|    approx_kl            | 0.004387529 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.91       |
|    explained_variance   | 0.714       |
|    learning_rate        | 5e-05       |
|    loss                 | 27.4        |
|    n_updates            | 2660  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0529       |
|    crash                | 0.238        |
|    max_step             | 0            |
|    mean_ep_length       | 111          |
|    mean_reward          | 97.7         |
|    num_episodes         | 5            |
|    out_of_road          | 0.947        |
|    raw_action           | 0.46195564   |
|    route_completion     | 0.393        |
|    success_rate         | 0            |
|    total_cost           | 8.35         |
| time/                   |              |
|    total_timesteps      | 680000       |
| train/                  |              |
|    approx_kl            | 0.0012545893 |
|    clip_fraction        | 0.136        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.9         |
|    explained_variance   | 0.651        |
|    learning_rate        | 5e-05        |
|    loss                 | 56.5         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0522       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 158          |
|    mean_reward          | 195          |
|    num_episodes         | 5            |
|    out_of_road          | 0.948        |
|    raw_action           | 0.46223566   |
|    route_completion     | 0.395        |
|    success_rate         | 0            |
|    total_cost           | 8.3          |
| time/                   |              |
|    total_timesteps      | 690000       |
| train/                  |              |
|    approx_kl            | 0.0011712838 |
|    clip_fraction        | 0.108        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.9         |
|    explained_variance   | 0.644        |
|    learning_rate        | 5e-05        |
|    loss                 | 40.3         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0514      |
|    crash                | 0.231       |
|    max_step             | 0           |
|    mean_ep_length       | 150         |
|    mean_reward          | 195         |
|    num_episodes         | 5           |
|    out_of_road          | 0.949       |
|    raw_action           | 0.46190226  |
|    route_completion     | 0.397       |
|    success_rate         | 0           |
|    total_cost           | 8.26        |
| time/                   |             |
|    total_timesteps      | 700000      |
| train/                  |             |
|    approx_kl            | 0.009255468 |
|    clip_fraction        | 0.165       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.9        |
|    explained_variance   | 0.741       |
|    learning_rate        | 5e-05       |
|    loss                 | 34.3        |
|    n_updates            | 2780  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0535       |
|    crash                | 0.228        |
|    max_step             | 0            |
|    mean_ep_length       | 140          |
|    mean_reward          | 179          |
|    num_episodes         | 5            |
|    out_of_road          | 0.946        |
|    raw_action           | 0.46233445   |
|    route_completion     | 0.398        |
|    success_rate         | 0.2          |
|    total_cost           | 8.19         |
| time/                   |              |
|    total_timesteps      | 710000       |
| train/                  |              |
|    approx_kl            | 0.0028044293 |
|    clip_fraction        | 0.0817       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.761        |
|    learning_rate        | 5e-05        |
|    loss                 | 24.9         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0556       |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 143          |
|    mean_reward          | 181          |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.46296272   |
|    route_completion     | 0.4          |
|    success_rate         | 0.2          |
|    total_cost           | 8.17         |
| time/                   |              |
|    total_timesteps      | 720000       |
| train/                  |              |
|    approx_kl            | 0.0017288171 |
|    clip_fraction        | 0.12         |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.646        |
|    learning_rate        | 5e-05        |
|    loss                 | 29.5         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0548       |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 145          |
|    mean_reward          | 170          |
|    num_episodes         | 5            |
|    out_of_road          | 0.945        |
|    raw_action           | 0.46337488   |
|    route_completion     | 0.401        |
|    success_rate         | 0            |
|    total_cost           | 8.16         |
| time/                   |              |
|    total_timesteps      | 730000       |
| train/                  |              |
|    approx_kl            | 0.0019644871 |
|    clip_fraction        | 0.0874       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.88        |
|    explained_variance   | 0.77         |
|    learning_rate        | 5e-05        |
|    loss                 | 30.1         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0541       |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 120          |
|    mean_reward          | 157          |
|    num_episodes         | 5            |
|    out_of_road          | 0.946        |
|    raw_action           | 0.4638999    |
|    route_completion     | 0.402        |
|    success_rate         | 0            |
|    total_cost           | 8.09         |
| time/                   |              |
|    total_timesteps      | 740000       |
| train/                  |              |
|    approx_kl            | 0.0012717682 |
|    clip_fraction        | 0.118        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.915        |
|    learning_rate        | 5e-05        |
|    loss                 | 48.4         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.056        |
|    crash                | 0.227        |
|    max_step             | 0            |
|    mean_ep_length       | 165          |
|    mean_reward          | 136          |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.4650784    |
|    route_completion     | 0.404        |
|    success_rate         | 0.2          |
|    total_cost           | 8.34         |
| time/                   |              |
|    total_timesteps      | 750000       |
| train/                  |              |
|    approx_kl            | 0.0069511333 |
|    clip_fraction        | 0.148        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.894        |
|    learning_rate        | 5e-05        |
|    loss                 | 60.3         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0553      |
|    crash                | 0.229       |
|    max_step             | 0           |
|    mean_ep_length       | 195         |
|    mean_reward          | 255         |
|    num_episodes         | 5           |
|    out_of_road          | 0.945       |
|    raw_action           | 0.46490926  |
|    route_completion     | 0.408       |
|    success_rate         | 0           |
|    total_cost           | 8.42        |
| time/                   |             |
|    total_timesteps      | 760000      |
| train/                  |             |
|    approx_kl            | 0.002554124 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.86       |
|    explained_variance   | 0.864       |
|    learning_rate        | 5e-05       |
|    loss                 | 34.9        |
|    n_updates            | 3020  

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 313      |
|    ep_rew_mean     | 272      |
| time/              |          |
|    fps             | 758      |
|    iterations      | 152      |
|    time_elapsed    | 1001     |
|    total_timesteps | 760000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 315          |
|    ep_rew_mean          | 275          |
| time/                   |              |
|    fps                  | 760          |
|    iterations           | 153          |
|    time_elapsed         | 1006         |
|    total_timesteps      | 765000       |
| train/                  |              |
|    approx_kl            | 0.0046933247 |
|    clip_fraction        | 0.0856       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.87        |
|    explained_variance   | 0.866        |
|    learning_r

----------------------------------------
| eval/                   |            |
|    arrive_dest          | 0.0545     |
|    crash                | 0.226      |
|    max_step             | 0          |
|    mean_ep_length       | 104        |
|    mean_reward          | 124        |
|    num_episodes         | 5          |
|    out_of_road          | 0.945      |
|    raw_action           | 0.46475112 |
|    route_completion     | 0.408      |
|    success_rate         | 0          |
|    total_cost           | 8.34       |
| time/                   |            |
|    total_timesteps      | 770000     |
| train/                  |            |
|    approx_kl            | 0.01288798 |
|    clip_fraction        | 0.154      |
|    clip_range           | 0.1        |
|    entropy_loss         | -1.87      |
|    explained_variance   | 0.826      |
|    learning_rate        | 5e-05      |
|    loss                 | 29.1       |
|    n_updates            | 3060       |
|    policy_grad

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0564      |
|    crash                | 0.226       |
|    max_step             | 0           |
|    mean_ep_length       | 129         |
|    mean_reward          | 124         |
|    num_episodes         | 5           |
|    out_of_road          | 0.944       |
|    raw_action           | 0.46576092  |
|    route_completion     | 0.408       |
|    success_rate         | 0.2         |
|    total_cost           | 8.36        |
| time/                   |             |
|    total_timesteps      | 780000      |
| train/                  |             |
|    approx_kl            | 0.002290646 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.86       |
|    explained_variance   | 0.872       |
|    learning_rate        | 5e-05       |
|    loss                 | 29.9        |
|    n_updates            | 3100  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0557       |
|    crash                | 0.23         |
|    max_step             | 0            |
|    mean_ep_length       | 109          |
|    mean_reward          | 148          |
|    num_episodes         | 5            |
|    out_of_road          | 0.944        |
|    raw_action           | 0.4662986    |
|    route_completion     | 0.409        |
|    success_rate         | 0            |
|    total_cost           | 8.27         |
| time/                   |              |
|    total_timesteps      | 790000       |
| train/                  |              |
|    approx_kl            | 0.0025845172 |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.887        |
|    learning_rate        | 5e-05        |
|    loss                 | 36.9         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.055        |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 116          |
|    mean_reward          | 103          |
|    num_episodes         | 5            |
|    out_of_road          | 0.945        |
|    raw_action           | 0.46597445   |
|    route_completion     | 0.408        |
|    success_rate         | 0            |
|    total_cost           | 8.36         |
| time/                   |              |
|    total_timesteps      | 800000       |
| train/                  |              |
|    approx_kl            | 0.0017692477 |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.85        |
|    explained_variance   | 0.928        |
|    learning_rate        | 5e-05        |
|    loss                 | 27.1         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0568       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 158          |
|    mean_reward          | 135          |
|    num_episodes         | 5            |
|    out_of_road          | 0.943        |
|    raw_action           | 0.46650156   |
|    route_completion     | 0.41         |
|    success_rate         | 0.2          |
|    total_cost           | 8.57         |
| time/                   |              |
|    total_timesteps      | 810000       |
| train/                  |              |
|    approx_kl            | 0.0031125757 |
|    clip_fraction        | 0.207        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.84        |
|    explained_variance   | 0.931        |
|    learning_rate        | 5e-05        |
|    loss                 | 27.7         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0634      |
|    crash                | 0.234       |
|    max_step             | 0           |
|    mean_ep_length       | 189         |
|    mean_reward          | 216         |
|    num_episodes         | 5           |
|    out_of_road          | 0.937       |
|    raw_action           | 0.4678248   |
|    route_completion     | 0.414       |
|    success_rate         | 0.6         |
|    total_cost           | 8.66        |
| time/                   |             |
|    total_timesteps      | 820000      |
| train/                  |             |
|    approx_kl            | 0.003750932 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.83       |
|    explained_variance   | 0.865       |
|    learning_rate        | 5e-05       |
|    loss                 | 27.3        |
|    n_updates            | 3260  

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0627      |
|    crash                | 0.231       |
|    max_step             | 0           |
|    mean_ep_length       | 138         |
|    mean_reward          | 173         |
|    num_episodes         | 5           |
|    out_of_road          | 0.937       |
|    raw_action           | 0.4680888   |
|    route_completion     | 0.414       |
|    success_rate         | 0           |
|    total_cost           | 8.6         |
| time/                   |             |
|    total_timesteps      | 830000      |
| train/                  |             |
|    approx_kl            | 0.002583946 |
|    clip_fraction        | 0.156       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.81       |
|    explained_variance   | 0.698       |
|    learning_rate        | 5e-05       |
|    loss                 | 37.6        |
|    n_updates            | 3300  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0619       |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 101          |
|    mean_reward          | 116          |
|    num_episodes         | 5            |
|    out_of_road          | 0.938        |
|    raw_action           | 0.46831536   |
|    route_completion     | 0.413        |
|    success_rate         | 0            |
|    total_cost           | 8.52         |
| time/                   |              |
|    total_timesteps      | 840000       |
| train/                  |              |
|    approx_kl            | 0.0025514937 |
|    clip_fraction        | 0.168        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.81        |
|    explained_variance   | 0.595        |
|    learning_rate        | 5e-05        |
|    loss                 | 54.3         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0612      |
|    crash                | 0.233       |
|    max_step             | 0           |
|    mean_ep_length       | 118         |
|    mean_reward          | 151         |
|    num_episodes         | 5           |
|    out_of_road          | 0.939       |
|    raw_action           | 0.46816647  |
|    route_completion     | 0.414       |
|    success_rate         | 0           |
|    total_cost           | 8.45        |
| time/                   |             |
|    total_timesteps      | 850000      |
| train/                  |             |
|    approx_kl            | 0.002586177 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.8        |
|    explained_variance   | 0.701       |
|    learning_rate        | 5e-05       |
|    loss                 | 53.5        |
|    n_updates            | 3380  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0628       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 156          |
|    mean_reward          | 207          |
|    num_episodes         | 5            |
|    out_of_road          | 0.937        |
|    raw_action           | 0.46835357   |
|    route_completion     | 0.416        |
|    success_rate         | 0.2          |
|    total_cost           | 8.45         |
| time/                   |              |
|    total_timesteps      | 860000       |
| train/                  |              |
|    approx_kl            | 0.0032259382 |
|    clip_fraction        | 0.117        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.8         |
|    explained_variance   | 0.732        |
|    learning_rate        | 5e-05        |
|    loss                 | 41.2         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0621       |
|    crash                | 0.237        |
|    max_step             | 0            |
|    mean_ep_length       | 97.6         |
|    mean_reward          | 98.8         |
|    num_episodes         | 5            |
|    out_of_road          | 0.938        |
|    raw_action           | 0.46900705   |
|    route_completion     | 0.415        |
|    success_rate         | 0            |
|    total_cost           | 8.38         |
| time/                   |              |
|    total_timesteps      | 870000       |
| train/                  |              |
|    approx_kl            | 0.0048948023 |
|    clip_fraction        | 0.145        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.79        |
|    explained_variance   | 0.656        |
|    learning_rate        | 5e-05        |
|    loss                 | 32.2         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0614      |
|    crash                | 0.239       |
|    max_step             | 0           |
|    mean_ep_length       | 116         |
|    mean_reward          | 152         |
|    num_episodes         | 5           |
|    out_of_road          | 0.939       |
|    raw_action           | 0.46882692  |
|    route_completion     | 0.416       |
|    success_rate         | 0           |
|    total_cost           | 8.31        |
| time/                   |             |
|    total_timesteps      | 880000      |
| train/                  |             |
|    approx_kl            | 0.004197233 |
|    clip_fraction        | 0.164       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.78       |
|    explained_variance   | 0.719       |
|    learning_rate        | 5e-05       |
|    loss                 | 37.7        |
|    n_updates            | 3500  

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0607      |
|    crash                | 0.238       |
|    max_step             | 0           |
|    mean_ep_length       | 94.6        |
|    mean_reward          | 110         |
|    num_episodes         | 5           |
|    out_of_road          | 0.939       |
|    raw_action           | 0.46960637  |
|    route_completion     | 0.416       |
|    success_rate         | 0           |
|    total_cost           | 8.23        |
| time/                   |             |
|    total_timesteps      | 890000      |
| train/                  |             |
|    approx_kl            | 0.008883422 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.765       |
|    learning_rate        | 5e-05       |
|    loss                 | 53.2        |
|    n_updates            | 3540  

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0622      |
|    crash                | 0.236       |
|    max_step             | 0           |
|    mean_ep_length       | 136         |
|    mean_reward          | 146         |
|    num_episodes         | 5           |
|    out_of_road          | 0.938       |
|    raw_action           | 0.470301    |
|    route_completion     | 0.416       |
|    success_rate         | 0.2         |
|    total_cost           | 8.19        |
| time/                   |             |
|    total_timesteps      | 900000      |
| train/                  |             |
|    approx_kl            | 0.007962584 |
|    clip_fraction        | 0.18        |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.673       |
|    learning_rate        | 5e-05       |
|    loss                 | 44          |
|    n_updates            | 3580  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0615       |
|    crash                | 0.233        |
|    max_step             | 0            |
|    mean_ep_length       | 128          |
|    mean_reward          | 116          |
|    num_episodes         | 5            |
|    out_of_road          | 0.938        |
|    raw_action           | 0.47109938   |
|    route_completion     | 0.417        |
|    success_rate         | 0            |
|    total_cost           | 8.29         |
| time/                   |              |
|    total_timesteps      | 910000       |
| train/                  |              |
|    approx_kl            | 0.0077061467 |
|    clip_fraction        | 0.234        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.76        |
|    explained_variance   | 0.596        |
|    learning_rate        | 5e-05        |
|    loss                 | 38.6         |
|    n_upda

-------------------------------------------
| eval/                   |               |
|    arrive_dest          | 0.0609        |
|    crash                | 0.235         |
|    max_step             | 0             |
|    mean_ep_length       | 113           |
|    mean_reward          | 143           |
|    num_episodes         | 5             |
|    out_of_road          | 0.939         |
|    raw_action           | 0.47127813    |
|    route_completion     | 0.417         |
|    success_rate         | 0             |
|    total_cost           | 8.22          |
| time/                   |               |
|    total_timesteps      | 920000        |
| train/                  |               |
|    approx_kl            | 0.00092391326 |
|    clip_fraction        | 0.0949        |
|    clip_range           | 0.1           |
|    entropy_loss         | -1.76         |
|    explained_variance   | 0.665         |
|    learning_rate        | 5e-05         |
|    loss                 | 57  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0624       |
|    crash                | 0.234        |
|    max_step             | 0            |
|    mean_ep_length       | 185          |
|    mean_reward          | 220          |
|    num_episodes         | 5            |
|    out_of_road          | 0.938        |
|    raw_action           | 0.4730851    |
|    route_completion     | 0.421        |
|    success_rate         | 0.2          |
|    total_cost           | 8.24         |
| time/                   |              |
|    total_timesteps      | 930000       |
| train/                  |              |
|    approx_kl            | 0.0017661791 |
|    clip_fraction        | 0.16         |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.75        |
|    explained_variance   | 0.698        |
|    learning_rate        | 5e-05        |
|    loss                 | 43.5         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0617      |
|    crash                | 0.238       |
|    max_step             | 0           |
|    mean_ep_length       | 138         |
|    mean_reward          | 197         |
|    num_episodes         | 5           |
|    out_of_road          | 0.938       |
|    raw_action           | 0.47342178  |
|    route_completion     | 0.422       |
|    success_rate         | 0           |
|    total_cost           | 8.17        |
| time/                   |             |
|    total_timesteps      | 940000      |
| train/                  |             |
|    approx_kl            | 0.001781694 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.74       |
|    explained_variance   | 0.65        |
|    learning_rate        | 5e-05       |
|    loss                 | 44.4        |
|    n_updates            | 3740  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0611       |
|    crash                | 0.238        |
|    max_step             | 0            |
|    mean_ep_length       | 99.8         |
|    mean_reward          | 121          |
|    num_episodes         | 5            |
|    out_of_road          | 0.939        |
|    raw_action           | 0.4740604    |
|    route_completion     | 0.422        |
|    success_rate         | 0            |
|    total_cost           | 8.09         |
| time/                   |              |
|    total_timesteps      | 950000       |
| train/                  |              |
|    approx_kl            | 0.0010033199 |
|    clip_fraction        | 0.121        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.73        |
|    explained_variance   | 0.846        |
|    learning_rate        | 5e-05        |
|    loss                 | 30.6         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0604      |
|    crash                | 0.235       |
|    max_step             | 0           |
|    mean_ep_length       | 126         |
|    mean_reward          | 162         |
|    num_episodes         | 5           |
|    out_of_road          | 0.94        |
|    raw_action           | 0.47390705  |
|    route_completion     | 0.422       |
|    success_rate         | 0           |
|    total_cost           | 8.05        |
| time/                   |             |
|    total_timesteps      | 960000      |
| train/                  |             |
|    approx_kl            | 0.001614628 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.73       |
|    explained_variance   | 0.909       |
|    learning_rate        | 5e-05       |
|    loss                 | 48.7        |
|    n_updates            | 3820  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0598       |
|    crash                | 0.235        |
|    max_step             | 0            |
|    mean_ep_length       | 107          |
|    mean_reward          | 122          |
|    num_episodes         | 5            |
|    out_of_road          | 0.94         |
|    raw_action           | 0.47443315   |
|    route_completion     | 0.422        |
|    success_rate         | 0            |
|    total_cost           | 7.99         |
| time/                   |              |
|    total_timesteps      | 970000       |
| train/                  |              |
|    approx_kl            | 0.0013747392 |
|    clip_fraction        | 0.147        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.72        |
|    explained_variance   | 0.891        |
|    learning_rate        | 5e-05        |
|    loss                 | 82.4         |
|    n_upda

-----------------------------------------
| eval/                   |             |
|    arrive_dest          | 0.0592      |
|    crash                | 0.235       |
|    max_step             | 0           |
|    mean_ep_length       | 138         |
|    mean_reward          | 174         |
|    num_episodes         | 5           |
|    out_of_road          | 0.941       |
|    raw_action           | 0.4745727   |
|    route_completion     | 0.423       |
|    success_rate         | 0           |
|    total_cost           | 7.97        |
| time/                   |             |
|    total_timesteps      | 980000      |
| train/                  |             |
|    approx_kl            | 0.002338577 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.72       |
|    explained_variance   | 0.925       |
|    learning_rate        | 5e-05       |
|    loss                 | 64.4        |
|    n_updates            | 3900  

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.0586       |
|    crash                | 0.236        |
|    max_step             | 0            |
|    mean_ep_length       | 127          |
|    mean_reward          | 157          |
|    num_episodes         | 5            |
|    out_of_road          | 0.941        |
|    raw_action           | 0.47443172   |
|    route_completion     | 0.423        |
|    success_rate         | 0            |
|    total_cost           | 7.92         |
| time/                   |              |
|    total_timesteps      | 990000       |
| train/                  |              |
|    approx_kl            | 0.0015454286 |
|    clip_fraction        | 0.167        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.72        |
|    explained_variance   | 0.915        |
|    learning_rate        | 5e-05        |
|    loss                 | 28.6         |
|    n_upda

------------------------------------------
| eval/                   |              |
|    arrive_dest          | 0.058        |
|    crash                | 0.236        |
|    max_step             | 0            |
|    mean_ep_length       | 108          |
|    mean_reward          | 119          |
|    num_episodes         | 5            |
|    out_of_road          | 0.942        |
|    raw_action           | 0.4746339    |
|    route_completion     | 0.422        |
|    success_rate         | 0            |
|    total_cost           | 7.89         |
| time/                   |              |
|    total_timesteps      | 1000000      |
| train/                  |              |
|    approx_kl            | 0.0031997715 |
|    clip_fraction        | 0.155        |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.71        |
|    explained_variance   | 0.875        |
|    learning_rate        | 5e-05        |
|    loss                 | 38.3         |
|    n_upda

<stable_baselines3.ppo.ppo.PPO at 0x27316794490>