#  Grid4x4

In [2]:
import os

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecMonitor
from sumo_rl import parallel_env
import supersuit as ss

from evaluate import evaluate

In [3]:
TEST_NUM = 4

## Create Environment and Instantiate Agent

In [4]:
from reward_functions import combined_reward

env_params = {
    "net_file": os.path.join("nets","grid4x4","grid4x4.net.xml"),
    "route_file": os.path.join("nets","grid4x4","grid4x4_1.rou.xml"),
    "num_seconds": 3600,
    "reward_fn": combined_reward,
    # "sumo_seed": 42,
}
env = parallel_env(**env_params)

# Maybe add frame-stacking here
env = ss.pettingzoo_env_to_vec_env_v1(env)
env = ss.concat_vec_envs_v1(env, num_vec_envs=8, base_class="stable_baselines3")
env = VecMonitor(env)

FatalTraCIError: connection closed by SUMO

In [44]:
from hyperparams import resco

model = PPO(
    "MlpPolicy",
    env,
    **resco,
    tensorboard_log=os.path.join("logs","grid4x4"),
    verbose=1,
    # seed=0,
)

Using cuda device


In [45]:
# Evaluate untrained random agent
csv_dir = os.path.join("outputs","grid4x4",f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

csv_path = os.path.join(csv_dir, "untrained.csv")
tb_log_dir = os.path.join("logs","grid4x4","eval_untrained")

mean_reward_untrained, std_reward_untrained = evaluate(model, env, csv_path, tb_log_dir, n_eval_episodes=1)

# Change made to SB3 > common > vec_env > vec_monitor.py > VecMonitor
# Line 76 - added extra item to self.venv.step_wait() return
# obs, rewards, dones, infos --> obs, rewards, dones, _, infos

print(mean_reward_untrained)
print(std_reward_untrained)

-245990.18501207232
0.0


## Train and Evaluate Agent

In [46]:
TRAIN_TIME = 1e6
model.learn(total_timesteps=TRAIN_TIME)

Logging to logs\grid4x4\PPO_3
-----------------------------------
| rollout/           |            |
|    ep_len_mean     | 720        |
|    ep_rew_mean     | -10200.519 |
| time/              |            |
|    fps             | 179        |
|    iterations      | 1          |
|    time_elapsed    | 729        |
|    total_timesteps | 131072     |
-----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 720          |
|    ep_rew_mean          | -10065.127   |
| time/                   |              |
|    fps                  | 174          |
|    iterations           | 2            |
|    time_elapsed         | 1498         |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0016639231 |
|    clip_fraction        | 0.0159       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.08        |
|    exp

<stable_baselines3.ppo.ppo.PPO at 0x28640f46ca0>

In [47]:
# Evaluate trained agent
csv_path = os.path.join(csv_dir, "trained.csv")
tb_log_dir = os.path.join("logs","grid4x4",f"PPO_{TEST_NUM}","eval_trained")

mean_reward_trained, std_reward_trained = evaluate(model, env, csv_path, tb_log_dir, n_eval_episodes=1)

print(mean_reward_trained)
print(std_reward_trained)

-143693.29015433788
0.0


In [48]:
# Move untrained.csv into numbered directory
import shutil

original = os.path.join("logs","grid4x4","eval_untrained")
target = os.path.join("logs","grid4x4",f"PPO_{TEST_NUM}","eval_untrained")

shutil.move(original, target)

'logs\\grid4x4\\PPO_3\\eval_untrained'

### Save Model

In [49]:
model.save(os.path.join("saved_models",f"PPO_grid4x4_{TEST_NUM}"))

## Process Results

In [50]:
import csv

data = [env_params["net_file"], TEST_NUM, env_params["reward_fn"].__name__, mean_reward_untrained, mean_reward_trained, TRAIN_TIME, "resco"]

with open("test_results.csv", "a", encoding="UTF8", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

## Render Video

In [51]:
env_params.update({"use_gui": True, "render_mode": "human"})
render_env = parallel_env(**env_params)

# Maybe add frame-stacking here
render_env = ss.pettingzoo_env_to_vec_env_v1(render_env)
render_env = VecMonitor(render_env)

In [52]:
import pyautogui

model = PPO.load(os.path.join("saved_models","PPO_grid4x4_1.zip"))
obs = render_env.reset()

folder_path = os.path.join("renders","grid4x4",f"test_{TEST_NUM}")
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

max_time = render_env.unwrapped.par_env.unwrapped.env.sim_max_time
delta_time = render_env.unwrapped.par_env.unwrapped.env.delta_time
vid_length = round(max_time/delta_time)

for i in range(vid_length):
    actions, _ = model.predict(obs, deterministic=True)
    obs, rewards, dones, infos = render_env.step(actions)

    im = pyautogui.screenshot(
        os.path.join(folder_path,f"img{i}.jpg"),
        region=(0, 0, 2560, 1542)
    )

render_env.close()  # clean up

In [53]:
import subprocess

subprocess.run([
    "ffmpeg", "-y", "-r", "5", "-i",
    f"renders\\grid4x4\\test_{TEST_NUM}\\img%d.jpg",
    f"videos\\grid4x4_{TEST_NUM}.mp4"
])

CompletedProcess(args=['ffmpeg', '-y', '-r', '5', '-i', 'renders\\grid4x4\\test_3\\img%d.jpg', 'videos\\grid4x4_3.mp4'], returncode=0)

## Clean-up

In [54]:
env.close()
render_env.close()