# Cross 1L TL

In [1]:
import os

from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.monitor import Monitor
from sumo_rl import SumoEnvironment

from evaluate import evaluate

In [2]:
TEST_NUM = 1

## Create Environment and Instantiate Agent

In [3]:
from reward_functions import tyre_pm_reward
    
env_params = {
    "net_file": os.path.join("nets","simple_nets","cross1ltl","net.net.xml"),
    "route_file": os.path.join("nets","simple_nets","cross1ltl","input_routes.rou.xml"),
    "num_seconds": 1200,
    "single_agent": True,
    "reward_fn": tyre_pm_reward,
    "sumo_seed": 42,
}
env = SumoEnvironment(**env_params)
check_env(env)
env = Monitor(env)  # wrap env to know episode reward, length, time

In [4]:
from helper_functions import linear_schedule

# Using hyperparams for Atari (except for n_steps) from
# https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml

model = PPO(
    "MlpPolicy",
    env,
    learning_rate=linear_schedule(2.5e-4),
    n_steps=256,
    batch_size=256,
    n_epochs=4,
    clip_range=linear_schedule(0.1),
    ent_coef=0.01,
    max_grad_norm=0.9,
    tensorboard_log=os.path.join("logs","cross1ltl"),
    verbose=1
)

Using cuda device
Wrapping the env in a DummyVecEnv.


In [5]:
# Evaluate untrained random agent
csv_dir = os.path.join("outputs","cross1ltl",f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

csv_path = os.path.join(csv_dir, "untrained.csv")
tb_log_dir = os.path.join("logs","cross1ltl","eval_untrained")

mean_reward_untrained, std_reward_untrained = evaluate(model, env, csv_path, tb_log_dir, n_eval_episodes=1)

print(mean_reward_untrained)
print(std_reward_untrained)

-5797.319856
0.0


## Train and Evaluate Agent

In [6]:
# Train agent
model.learn(total_timesteps=1e6)

Logging to logs\cross1ltl\PPO_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 240       |
|    ep_rew_mean     | -2.65e+04 |
| time/              |           |
|    fps             | 46        |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 256       |
----------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 240           |
|    ep_rew_mean          | -2.79e+04     |
| time/                   |               |
|    fps                  | 48            |
|    iterations           | 2             |
|    time_elapsed         | 10            |
|    total_timesteps      | 512           |
| train/                  |               |
|    approx_kl            | 9.2131086e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | -1.39         |
| 

<stable_baselines3.ppo.ppo.PPO at 0x2292df52bb0>

In [7]:
# Evaluate trained agent
csv_path = os.path.join(csv_dir, "trained.csv")
tb_log_dir = os.path.join("logs","cross1ltl",f"PPO_{TEST_NUM}","eval_trained")

mean_reward_trained, std_reward_trained = evaluate(model, env, csv_path, tb_log_dir, n_eval_episodes=1)

print(mean_reward_trained)
print(std_reward_trained)

-5401.889468
0.0


In [8]:
# Move untrained.csv into numbered directory
import shutil

original = os.path.join("logs","cross1ltl","eval_untrained")
target = os.path.join("logs","cross1ltl",f"PPO_{TEST_NUM}","eval_untrained")

shutil.move(original, target)

'logs\\cross1ltl\\PPO_1\\eval_untrained'

### Record Results

In [9]:
import csv

data = [env_params["net_file"], TEST_NUM, env.reward_fn.__name__, mean_reward_untrained, mean_reward_trained]

with open("test_results.csv", "a", encoding="UTF8", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

### Save Model

In [10]:
model.save(os.path.join("saved_models",f"PPO_cross1ltl_{TEST_NUM}"))

## Render Video

In [11]:
env_params.update({"use_gui": True})
render_env = SumoEnvironment(**env_params)

check_env(render_env)
render_env.close()

render_env = Monitor(render_env)  # wrap env to know episode reward, length, time

In [12]:
import pyautogui

obs, info = render_env.reset()

folder_path = os.path.join("renders","cross1ltl",f"test_{TEST_NUM}")
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

max_time = env.sim_max_time
delta_time = env.delta_time
vid_length = (max_time // delta_time)

for i in range(vid_length):
    actions, _ = model.predict(obs)
    obs, reward, terminated, truncated, info = render_env.step(actions)
    env.render()

    im = pyautogui.screenshot(
        os.path.join(folder_path,f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

render_env.close()  # clean up

In [13]:
import subprocess

subprocess.run([
    "ffmpeg", "-y", "-r", "5", "-i",
    f"renders\\cross1ltl\\test_{TEST_NUM}\\img%d.jpg",
    f"videos\\cross1ltl_{TEST_NUM}.mp4"
])

CompletedProcess(args=['ffmpeg', '-y', '-r', '5', '-i', 'renders\\cross1ltl\\test_1\\img%d.jpg', 'videos\\cross1ltl_1.mp4'], returncode=0)

## Clean-up

In [14]:
env.close()
render_env.close()