## Importing dependencies

In [1]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import  VecMonitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
import metadrive


### Creating a MetaDrive environment

In [2]:
config = {
    "use_render": False,
    "manual_control": False,
    "traffic_density": 0.3,
    "map": "X",   
    "random_lane_width": True,
    "random_agent_model": True,
    "random_traffic": True,
    "map": 5,  # random 5-block map
    "num_agents": 1,
    "allow_respawn": True
}

env = metadrive.MetaDriveEnv(config)

[38;20m[INFO] Environment: MetaDriveEnv[0m
[38;20m[INFO] MetaDrive version: 0.4.3[0m
[38;20m[INFO] Sensors: [lidar: Lidar(), side_detector: SideDetector(), lane_line_detector: LaneLineDetector()][0m
[38;20m[INFO] Render Mode: none[0m
[38;20m[INFO] Horizon (Max steps per agent): 1000[0m


## Exploring the environment

In [11]:
print(env.observation_space)   
print(env.action_space)        

Box(-0.0, 1.0, (261,), float32)
Box(-1.0, 1.0, (2,), float32)


In [24]:
obs = env.reset()
for step in range(10):
    action = env.action_space.sample()   # random action
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

    if done:
        obs, info = env.reset()


[38;20m[INFO] Assets version: 0.4.3[0m
[38;20m[INFO] Known Pipes: wglGraphicsPipe[0m
[38;20m[INFO] Start Scenario Index: 0, Num Scenarios : 1[0m


In [9]:
env.close()

In [3]:
model= PPO("MlpPolicy", env, verbose=1,tensorboard_log="./tb_logs",)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [3]:
from stable_baselines3.common.vec_env import DummyVecEnv
vec_env = DummyVecEnv([lambda: metadrive.MetaDriveEnv(config) for _ in range(4)])


[38;20m[INFO] Environment: MetaDriveEnv[0m
[38;20m[INFO] MetaDrive version: 0.4.3[0m
[38;20m[INFO] Sensors: [lidar: Lidar(), side_detector: SideDetector(), lane_line_detector: LaneLineDetector(), main_camera: MainCamera(1200, 900), dashboard: DashBoard()][0m
[38;20m[INFO] Render Mode: onscreen[0m
[38;20m[INFO] Horizon (Max steps per agent): 1000[0m
[38;20m[INFO] Environment: MetaDriveEnv[0m
[38;20m[INFO] MetaDrive version: 0.4.3[0m
[38;20m[INFO] Sensors: [lidar: Lidar(), side_detector: SideDetector(), lane_line_detector: LaneLineDetector(), main_camera: MainCamera(1200, 900), dashboard: DashBoard()][0m
[38;20m[INFO] Render Mode: onscreen[0m
[38;20m[INFO] Horizon (Max steps per agent): 1000[0m
[38;20m[INFO] Environment: MetaDriveEnv[0m
[38;20m[INFO] MetaDrive version: 0.4.3[0m
[38;20m[INFO] Sensors: [lidar: Lidar(), side_detector: SideDetector(), lane_line_detector: LaneLineDetector(), main_camera: MainCamera(1200, 900), dashboard: DashBoard()][0m
[38;20m[INFO

In [4]:
ckpt_cb = CheckpointCallback(save_freq=100000, save_path="./checkpoints/", name_prefix="ppo_metadrive")

In [None]:
model.learn(total_timesteps=1000000, callback=ckpt_cb)


[38;20m[INFO] Assets version: 0.4.3[0m
[38;20m[INFO] Known Pipes: wglGraphicsPipe[0m
[38;20m[INFO] Start Scenario Index: 0, Num Scenarios : 1[0m


Logging to ./tb_logs\PPO_5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 434      |
|    ep_rew_mean     | 0.834    |
| time/              |          |
|    fps             | 15       |
|    iterations      | 1        |
|    time_elapsed    | 132      |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 622          |
|    ep_rew_mean          | 4.02         |
| time/                   |              |
|    fps                  | 13           |
|    iterations           | 2            |
|    time_elapsed         | 294          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0057820645 |
|    clip_fraction        | 0.0581       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | 0.0