# PPO - 2-way Single Intersection

## Imports

In [19]:
import os
import subprocess

import pyautogui
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from sumo_rl import SumoEnvironment

from tyre_pm_reward import tyre_pm_reward

## Create Environment and Instantiate Agent

In [12]:
env = SumoEnvironment(
    net_file=os.path.join("nets","2way-single-intersection","single-intersection.net.xml"),
    route_file=os.path.join("nets","2way-single-intersection","single-intersection-vhvh.rou.xml"),
    out_csv_name=os.path.join("outputs","2way-single-intersection","ppo"),
    use_gui=True,
    num_seconds=1e5,
    single_agent=True,
    reward_fn=tyre_pm_reward,
    sumo_seed=42,
)
check_env(env)
env.close()
env = Monitor(env)  # wrap env to know episode reward, length, time

In [13]:
model = PPO(
    "MlpPolicy",
    env,
    learning_rate=5e-3,
    n_steps=256,
    batch_size=256,
    n_epochs=5,
    gamma=0.95,
    gae_lambda=0.99,
    clip_range=0.3,
    ent_coef=0.09,
    vf_coef=0.05,
    max_grad_norm=0.9,
    tensorboard_log=os.path.join("logs","2way-single-intersection","ppo"),
    verbose=1
)

Using cuda device
Wrapping the env in a DummyVecEnv.


In [None]:
# Evaluate untrained random agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)

print(mean_reward)
print(std_reward)

## Train and Evaluate Agent

In [None]:
# Train agent
model.learn(total_timesteps=1e5)

In [None]:
# Evaluate trained agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)
print(mean_reward)
print(std_reward)

### Save Model

In [None]:
model.save(os.path.join("saved_models","PPO_2way_single_intersection"))

# Render

In [23]:
loaded_model = PPO.load(os.path.join("saved_models","ppo_single_intersection"))

obs, info = env.reset()

img = env.render()

VID_LENGTH = 750
for i in range(VID_LENGTH):
    actions, _ = loaded_model.predict(obs, state=None, deterministic=False)
    obs, reward, done, truncated, info = env.step(actions)
    env.render()

    im = pyautogui.screenshot(
        os.path.join("renders","2way-single-intersection",f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

env.close()  # clean up

In [25]:
subprocess.run(["ffmpeg", "-y", "-r", "5", "-i", "renders\\2way-single-intersection\\img%d.jpg", "videos\\2way_single_intersection.mp4"])

## Clean-up

In [17]:
env.close()