# Cologne8

In [1]:
import csv
import os

import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecMonitor
import supersuit as ss

from evaluate import evaluate
from helper_functions import make_parallel_env

In [2]:
from stable_baselines3.common.utils import set_random_seed

TEST_NUM = 1
NET = "cologne8" 
SEED = 23423    # default SUMO seed no.
STACK_NUM = 3   # frames to stack

set_random_seed(SEED)

## Create Environment and Instantiate Agent

In [11]:
from reward_functions import combined_reward

csv_dir = os.path.join("outputs", NET, f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

env_params = {
    "net_file": os.path.join("nets", NET, "cologne8.net.xml"),
    "route_file": os.path.join("nets", NET, "cologne8.rou.xml"),
    "num_seconds": 3600,
    "reward_fn": combined_reward,
    "sumo_seed": SEED,
}
congestion_reward = combined_reward.__defaults__[0].__name__
alpha = combined_reward.__defaults__[1]  # congestion component coefficient

env = make_parallel_env(**env_params)
env.reset(seed=SEED)

env = ss.pad_action_space_v0(env)
env = ss.pad_observations_v0(env)
env = ss.frame_stack_v1(env, STACK_NUM)
env = ss.pettingzoo_env_to_vec_env_v1(env)

# Create env for evaluation
eval_env = ss.concat_vec_envs_v1(env, num_vec_envs=1, base_class="stable_baselines3")
eval_env = VecMonitor(eval_env)

# Create env for training
train_env = ss.concat_vec_envs_v1(env, num_vec_envs=8, base_class="stable_baselines3")
train_env = VecMonitor(train_env)

In [9]:
from hyperparams import custom

model = PPO(
    "MlpPolicy",
    train_env,
    tensorboard_log=os.path.join("logs", NET),
    verbose=1,
    **custom,
)

Using cuda device


## Evaluate Random Agent

In [5]:
# Evaluate untrained random agent

csv_path = os.path.join(csv_dir, "untrained.csv")
tb_log_dir = os.path.join("logs", NET, "eval_untrained")

reward_untrained, _ = evaluate(model, eval_env, csv_path, tb_log_dir, n_eval_episodes=1)

print(reward_untrained)

-47873.92051744461


In [6]:
# Record results
untrained_csv = pd.read_csv(os.path.join(csv_dir, "untrained.csv"))
arrived_untrained = sum(untrained_csv.arrived_num)
pm_untrained = sum(untrained_csv.tyre_pm)
final_wait_untrained = untrained_csv.waiting_time.iat[-1]

data = [TEST_NUM, congestion_reward, alpha, 0,
        reward_untrained, arrived_untrained, pm_untrained, final_wait_untrained]

with open(f"{NET}_results.csv", "a", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

## Train Agent

In [21]:
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement

model.set_env(train_env)

# Separate env for eval callback
callback_env = ss.concat_vec_envs_v1(env, num_vec_envs=1, base_class="stable_baselines3")
callback_env = VecMonitor(callback_env)

stop_train_callback = StopTrainingOnNoModelImprovement(max_no_improvement_evals=3, min_evals=5, verbose=1)
eval_callback = EvalCallback(callback_env, callback_after_eval=stop_train_callback, eval_freq=25000,
                             best_model_save_path=os.path.join("saved_models", NET, f"test_{TEST_NUM}"))

TRAIN_STEPS = 1e6
model.learn(total_timesteps=TRAIN_STEPS, callback=eval_callback)

Logging to logs\grid4x4\PPO_10
------------------------------
| time/              |       |
|    fps             | 74    |
|    iterations      | 1     |
|    time_elapsed    | 440   |
|    total_timesteps | 32768 |
------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 70           |
|    iterations           | 2            |
|    time_elapsed         | 934          |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0006997354 |
|    clip_fraction        | 0.00144      |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.03        |
|    explained_variance   | 1.19e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 5.49e+03     |
|    n_updates            | 128          |
|    policy_gradient_loss | -0.000678    |
|    value_loss           | 1.26e+04     |
---------------------

<stable_baselines3.ppo.ppo.PPO at 0x22cebc48550>

In [22]:
# Evaluate trained agent
csv_path = os.path.join(csv_dir, "trained.csv")
tb_log_dir = os.path.join("logs", NET, f"PPO_{TEST_NUM}", "eval_trained")

reward_trained, _ = evaluate(model, eval_env, csv_path, tb_log_dir, n_eval_episodes=1)

print(reward_trained)

-127638.21738874912


In [23]:
# Record results
trained_csv = pd.read_csv(os.path.join(csv_dir, "trained.csv"))
arrived_trained = sum(trained_csv.arrived_num)
pm_trained = sum(trained_csv.tyre_pm)
final_wait_trained = trained_csv.waiting_time.iat[-1]

data = [TEST_NUM, congestion_reward, alpha, TRAIN_STEPS,
        reward_trained, arrived_trained, pm_trained, final_wait_trained]

with open(f"{NET}_results.csv", "a", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

In [10]:
# Move untrained.csv into numbered directory
import shutil

original = os.path.join("logs", NET, "eval_untrained")
target = os.path.join("logs", NET, f"PPO_{TEST_NUM}","eval_untrained")

shutil.move(original, target)

'logs\\grid4x4\\PPO_9\\eval_untrained'

### Save Model

In [24]:
model.save(os.path.join("saved_models",f"PPO_{NET}_{TEST_NUM}"))

## Render Video

In [25]:
env_params.update({"use_gui": True})

render_env = make_parallel_env(**env_params)
render_env.reset(seed=SEED)
render_env = ss.pad_action_space_v0(render_env)
render_env = ss.pad_observations_v0(render_env)
render_env = ss.frame_stack_v1(render_env, STACK_NUM)
render_env = ss.pettingzoo_env_to_vec_env_v1(render_env)
render_env = VecMonitor(render_env)

In [26]:
import pyautogui

obs = render_env.reset()

folder_path = os.path.join("renders", NET, f"test_{TEST_NUM}")
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

max_time = render_env.unwrapped.par_env.unwrapped.env.sim_max_time
delta_time = render_env.unwrapped.par_env.unwrapped.env.delta_time
vid_length = int(max_time/delta_time)

for i in range(vid_length):
    actions, _ = model.predict(obs, deterministic=True)
    obs, rewards, dones, infos = render_env.step(actions)

    im = pyautogui.screenshot(
        os.path.join(folder_path,f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

In [27]:
# Clean up
render_env.close()

In [28]:
import subprocess

subprocess.run([
    "ffmpeg", "-y", "-r", "30", "-i",
    f"renders\\{NET}\\test_{TEST_NUM}\\img%d.jpg",
    f"videos\\{NET}_{TEST_NUM}.mp4"
])

CompletedProcess(args=['ffmpeg', '-y', '-r', '30', '-i', 'renders\\grid4x4\\test_10\\img%d.jpg', 'videos\\grid4x4_10.mp4'], returncode=0)