# PPO - 2-way Single Intersection

## Imports

In [1]:
import csv
import os
import subprocess

import pyautogui
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from sumo_rl import SumoEnvironment

from reward_functions import abs_accel

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [2]:
TEST_NUM = 2

## Create Environment and Instantiate Agent

In [10]:
csv_dir = os.path.join("outputs","2way-single-intersection",f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.mkdir(csv_dir)

env_params = {
    "net_file": os.path.join("nets","2way-single-intersection","single-intersection.net.xml"),
    "route_file": os.path.join("nets","2way-single-intersection","single-intersection-vhvh.rou.xml"),
    "out_csv_name": os.path.join(csv_dir,"ppo"),
    "num_seconds": 1e5,
    "single_agent": True,
    "reward_fn": abs_accel,
    "sumo_seed": 42,
}
env = SumoEnvironment(**env_params)

check_env(env)

env = Monitor(env)  # wrap env to know episode reward, length, time

In [11]:
# tb_log_dir = os.path.join("logs","2way-single-intersection",f"ppo_{TEST_NUM}")
# if not os.path.exists(tb_log_dir):
#     os.mkdir(tb_log_dir)

model = PPO(
    "MlpPolicy",
    env,
    learning_rate=5e-3,
    n_steps=256,
    batch_size=256,
    n_epochs=5,
    gamma=0.95,
    gae_lambda=0.99,
    clip_range=0.3,
    ent_coef=0.09,
    vf_coef=0.05,
    max_grad_norm=0.9,
    tensorboard_log=os.path.join("logs","2way-single-intersection"),
    verbose=1
)

Using cuda device
Wrapping the env in a DummyVecEnv.


In [12]:
# Evaluate untrained random agent
mean_untrained_reward, std_untrained_reward = evaluate_policy(model, env, n_eval_episodes=1)

print(mean_untrained_reward)
print(std_untrained_reward)

-28913.718541
0.0


## Train and Evaluate Agent

In [13]:
# Train agent
model.learn(total_timesteps=1e5)

Logging to logs\2way-single-intersection\ppo_1\PPO_2
----------------------------
| time/              |     |
|    fps             | 48  |
|    iterations      | 1   |
|    time_elapsed    | 5   |
|    total_timesteps | 256 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 49          |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 512         |
| train/                  |             |
|    approx_kl            | 0.041538045 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.3         |
|    entropy_loss         | -1.37       |
|    explained_variance   | -4.53e-05   |
|    learning_rate        | 0.005       |
|    loss                 | 4.74e+03    |
|    n_updates            | 5           |
|    policy_gradient_loss | -0.0208     |
|    value_loss           | 9.57e+04    |
------------------------------

<stable_baselines3.ppo.ppo.PPO at 0x26dc8f57c70>

In [14]:
# Evaluate trained agent
mean_trained_reward, std_trained_reward = evaluate_policy(model, env, n_eval_episodes=1)
print(mean_trained_reward)
print(std_trained_reward)

-8924.633105
0.0


### Record Results

In [15]:
data = [env_params['net_file'], TEST_NUM, env.reward_fn, mean_untrained_reward, mean_trained_reward]

with open("test_results.csv", "a", encoding="UTF8", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

### Save Model

In [16]:
model.save(os.path.join("saved_models",f"PPO_2way_single_intersection_{TEST_NUM}"))

## Render Video

In [17]:
env_params.update({"use_gui": True})
render_env = SumoEnvironment(**env_params)

check_env(render_env)
render_env.close()

render_env = Monitor(render_env)  # wrap env to know episode reward, length, time

In [18]:
model = PPO.load(os.path.join("saved_models",f"PPO_2way_single_intersection_{TEST_NUM}"))

obs, info = render_env.reset()

folder_path = os.path.join("renders","2way-single-intersection",f"test_{TEST_NUM}")
if not os.path.exists(folder_path):
    os.mkdir(folder_path)

VID_LENGTH = 750
for i in range(VID_LENGTH):
    actions, _ = model.predict(obs)
    obs, reward, terminated, truncated, info = render_env.step(actions)
    env.render()

    im = pyautogui.screenshot(
        os.path.join(folder_path,f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

render_env.close()  # clean up

In [19]:
subprocess.run([
    "ffmpeg", "-y", "-r", "5", "-i",
    f"renders\\2way-single-intersection\\test_{TEST_NUM}\\img%d.jpg",
    f"videos\\2way_single_intersection_{TEST_NUM}.mp4"
])

CompletedProcess(args=['ffmpeg', '-y', '-r', '5', '-i', 'renders\\2way-single-intersection\\test_2\\img%d.jpg', 'videos\\2way_single_intersection_2.mp4'], returncode=0)

## Clean-up

In [20]:
env.close()
render_env.close()