# 2-way Single Intersection

In [1]:
import csv
import os
import shutil
import subprocess

import pyautogui
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from sumo_rl import SumoEnvironment

from evaluate import evaluate
from reward_functions import tyre_pm_reward

In [2]:
from stable_baselines3.common.utils import set_random_seed

TEST_NUM = 5
SEED = 0
set_random_seed(SEED)

## Create Environment and Instantiate Agent

In [3]:
csv_dir = os.path.join("outputs","2way_single_intersection",f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

env_params = {
    "net_file": os.path.join("nets","2way_single_intersection","single-intersection.net.xml"),
    "route_file": os.path.join("nets","2way_single_intersection","single-intersection-vhvh.rou.xml"),
    "num_seconds": 3600,
    "single_agent": True,
    "reward_fn": tyre_pm_reward,
    "sumo_seed": 23423,
}

eval_env = SumoEnvironment(**env_params)
eval_env = Monitor(eval_env)  # wrap env to know episode reward, length, time

train_env = make_vec_env(SumoEnvironment, n_envs=8, env_kwargs=env_params)
train_env.seed(SEED)
obs = train_env.reset()

In [4]:
from hyperparams import custom

model = PPO(
    "MlpPolicy",
    train_env,
    tensorboard_log=os.path.join("logs","2way_single_intersection"),
    seed=SEED,
    verbose=1,
    **custom,
)

Using cuda device


In [5]:
# Evaluate untrained random agent
csv_path = os.path.join(csv_dir, "untrained.csv")
tb_log_dir = os.path.join("logs","2way_single_intersection","eval_untrained")

reward_untrained, _ = evaluate(model, eval_env, csv_path, tb_log_dir, n_eval_episodes=1)

print(reward_untrained)

-42523.275263926946


## Train and Evaluate Agent

In [6]:
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement

# Separate env for eval callback
callback_env = make_vec_env(SumoEnvironment, n_envs=1, env_kwargs=env_params)

stop_train_callback = StopTrainingOnNoModelImprovement(max_no_improvement_evals=3, min_evals=5, verbose=1)
eval_callback = EvalCallback(callback_env, callback_after_eval=stop_train_callback,
                             eval_freq=25000, best_model_save_path="saved_models",
                             best_model_save_name=f"PPO_2way_single_intersection_{TEST_NUM}")

model.learn(total_timesteps=1e6, callback=eval_callback)

Logging to logs\2way_single_intersection\PPO_5
-----------------------------
| time/              |      |
|    fps             | 35   |
|    iterations      | 1    |
|    time_elapsed    | 58   |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 31            |
|    iterations           | 2             |
|    time_elapsed         | 130           |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 5.0263072e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0.000124      |
|    learning_rate        | 0.00025       |
|    loss                 | 1.52e+06      |
|    n_updates            | 4             |
|    policy_gradient_loss | -0.000782     |
|    value_loss           | 2.97e+06   

<stable_baselines3.ppo.ppo.PPO at 0x1ffcc43c490>

In [6]:
# Evaluate trained agent
model = PPO.load(os.path.join("saved_models",f"PPO_2way_single_intersection_{TEST_NUM}"))

csv_path = os.path.join(csv_dir, "trained.csv")
tb_log_dir = os.path.join("logs","2way_single_intersection",f"PPO_{TEST_NUM}","eval_trained")

reward_trained, _ = evaluate(model, eval_env, csv_path, tb_log_dir, n_eval_episodes=1)

print(reward_trained)

-9490.722518205643


In [8]:
# Move untrained agent's TensorBoard log into numbered folder
original = os.path.join("logs","2way_single_intersection","eval_untrained")
target = os.path.join("logs","2way_single_intersection",f"PPO_{TEST_NUM}","eval_untrained")

shutil.move(original, target)

'logs\\2way_single_intersection\\PPO_5\\eval_untrained'

### Record Results

In [7]:
import pandas as pd

untrained_csv = pd.read_csv(os.path.join(csv_dir, "untrained.csv"))
arrived_untrained = sum(untrained_csv.arrived_num)
pm_untrained = sum(untrained_csv.tyre_pm)
final_wait_untrained = untrained_csv.waiting_time.iat[-1]

trained_csv = pd.read_csv(os.path.join(csv_dir, "trained.csv"))
arrived_trained = sum(trained_csv.arrived_num)
pm_trained = sum(trained_csv.tyre_pm)
final_wait_trained = trained_csv.waiting_time.iat[-1]

In [9]:
data = ["2way_single_intersection", TEST_NUM, env_params["reward_fn"].__name__,
        reward_untrained, arrived_untrained, pm_untrained, final_wait_untrained,
        reward_trained, arrived_trained, pm_trained, final_wait_trained]

with open("test_results.csv", "a", encoding="UTF8", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

## Render Video

In [12]:
env_params.update({"use_gui": True})
render_env = SumoEnvironment(**env_params)
render_env = Monitor(render_env)

In [13]:
# model = PPO.load(os.path.join("saved_models",f"PPO_2way_single_intersection_{TEST_NUM}"))

obs, info = render_env.reset()

folder_path = os.path.join("renders","2way_single_intersection",f"test_{TEST_NUM}")
if not os.path.exists(folder_path):
    os.mkdir(folder_path)

max_time = render_env.env.sim_max_time
delta_time = render_env.env.delta_time
vid_length = int(max_time/delta_time)

for i in range(vid_length):
    actions, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = render_env.step(actions)
    render_env.render()

    im = pyautogui.screenshot(
        os.path.join(folder_path,f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

render_env.close()  # clean up

In [14]:
subprocess.run([
    "ffmpeg", "-y", "-r", "30", "-i",
    f"renders\\2way_single_intersection\\test_{TEST_NUM}\\img%d.jpg",
    f"videos\\2way_single_intersection_{TEST_NUM}.mp4"
])

CompletedProcess(args=['ffmpeg', '-y', '-r', '30', '-i', 'renders\\2way_single_intersection\\test_5\\img%d.jpg', 'videos\\2way_single_intersection_5.mp4'], returncode=0)