# Grid4x4 - PettingZoo + RLlib

In [1]:
import os
import ray

In [2]:
import random
import numpy as np
import torch

SEED = 23423  # default SUMO seed no.
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x1be27298ed0>

## Environment Setup

In [3]:
import supersuit as ss

from helper_functions import make_parallel_env
from observation import Grid4x4ObservationFunction
from reward_functions import combined_reward

def env_creator(args):
    env_params = {
        "net_file": os.path.join("nets","grid4x4","grid4x4.net.xml"),
        "route_file": os.path.join("nets","grid4x4","grid4x4_1.rou.xml"),
        "num_seconds": 3600,
        "reward_fn": combined_reward,
        "sumo_seed": SEED,
        # "observation_class": Grid4x4ObservationFunction
    }
    env = make_parallel_env(**env_params)
    env = ss.frame_stack_v1(env, 3)
    return env

## Training the RL Agent

In [4]:
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv  # RLlib-PZ interface
from ray.tune.registry import register_env

ray.init()

env_name = "grid4x4"
register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

2023-05-29 12:14:59,049	INFO worker.py:1625 -- Started a local Ray instance.


In [5]:
from ray.rllib.algorithms.ppo import PPOConfig

# From https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/atari-ppo.yaml

config = (
    PPOConfig()
    .environment(env=env_name)
    .framework(framework="torch")
    .rollouts(
        num_rollout_workers=4, rollout_fragment_length=128,
        # num_envs_per_worker=5,
        # batch_mode="truncate_episodes",
    )
    .training(
        train_batch_size=512,
        lr=2e-5,
        gamma=0.99,
        lambda_=0.9,
        use_gae=True,
        clip_param=0.4,
        grad_clip=None,
        entropy_coeff=0.1,
        vf_loss_coeff=0.25,
        sgd_minibatch_size=64,
        num_sgd_iter=10,
        # lambda_=0.95,
        # kl_coeff=0.5,
        # clip_param=0.1,
        # vf_clip_param=10.0,
        # entropy_coeff=0.01,
        # train_batch_size=5000,
        # sgd_minibatch_size=500,
        # num_sgd_iter=10,
    )
    # .evaluation(
    #     evaluation_duration=3600,
    #     evaluation_duration_unit="timesteps",
    #     evaluation_num_workers=1,
    # )
    .debugging(log_level="WARN", seed=SEED)
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)

In [6]:
from time import time
from ray import tune

current_time = int(time())
 
tune.run(
    "PPO",
    name="PPO",
    stop={"timesteps_total": 2e4},
    checkpoint_freq=10,
    local_dir=os.path.join("ray_results", env_name, str(current_time)),
    config=config.to_dict(),
)



0,1
Current time:,2023-05-29 12:37:09
Running for:,00:22:05.97
Memory:,13.3/31.7 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_grid4x4_0eff4_00000,RUNNING,127.0.0.1:19016,34,1286.96,17408,-157617,-155535,-160523,720


[2m[36m(PPO pid=19016)[0m 2023-05-29 12:15:08,491	INFO algorithm.py:527 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=23028)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 66ms, vehicles TOT 0 ACT 0 BUF 0)                     




Step #100.00 (0ms ?*RT. ?UPS, TraCI: 110ms, vehicles TOT 21 ACT 21 BUF 0)                 OT 1 ACT 1 BUF 0)                     
[2m[36m(RolloutWorker pid=7864)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 191ms, vehicles TOT 0 ACT 0 BUF 0)                    [32m [repeated 11x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m




Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_total,training_iteration,trial_id
PPO_grid4x4_0eff4_00000,8192,{},"{'num_env_steps_sampled': 512, 'num_env_steps_trained': 512, 'num_agent_steps_sampled': 8192, 'num_agent_steps_trained': 8192}",{},2023-05-29_12-15-50,False,,{},,,,0,0,JM-M16,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 0.36689417099696586, 'cur_kl_coeff': 0.19999999999999998, 'cur_lr': 2e-05, 'total_loss': 2.270295824483037, 'policy_loss': -0.0013563843014708255, 'vf_loss': 9.91817272901535, 'vf_explained_var': -4.936009645462036e-09, 'kl': 0.0001798126565526914, 'entropy': 2.0792693788185717, 'entropy_coeff': 0.09999999999999999}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 64.0, 'num_grad_updates_lifetime': 640.5, 'diff_num_grad_updates_vs_sampler_policy': 639.5}}, 'num_env_steps_sampled': 512, 'num_env_steps_trained': 512, 'num_agent_steps_sampled': 8192, 'num_agent_steps_trained': 8192}",1,127.0.0.1,8192,8192,512,512,512,512,0,4,0,0,512,"{'cpu_util_percent': 2.3711111111111114, 'ram_util_percent': 43.9377777777778, 'gpu_util_percent0': 0.352, 'vram_util_percent0': 0.00567626953125}",19016,{},{},{},{},"{'episode_reward_max': nan, 'episode_reward_min': nan, 'episode_reward_mean': nan, 'episode_len_mean': nan, 'episode_media': {}, 'episodes_this_iter': 0, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [], 'episode_lengths': []}, 'sampler_perf': {}, 'num_faulty_episodes': 0, 'connector_metrics': {}}",34.4827,34.4827,34.4827,"{'training_iteration_time_ms': 34476.684, 'sample_time_ms': 29336.455, 'load_time_ms': 0.0, 'load_throughput': 0.0, 'learn_time_ms': 5136.341, 'learn_throughput': 99.682, 'synch_weights_time_ms': 3.887}",1685358950,512,1,0eff4_00000




Step #1900.00 (1ms ~= 1000.00*RT, ~209000.00UPS, TraCI: 181ms, vehicles TOT 824 ACT 209 BU108ms, vehicles TOT 1 ACT 1 BUF 0)     




[2m[36m(RolloutWorker pid=23028)[0m Step #2000.00 (1ms ~= 1000.00*RT, ~192000.00UPS, TraCI: 180ms, vehicles TOT 907 ACT 192 BU
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 233ms, vehicles TOT 1473 ACT 34 BUF 0)              I: 173ms, vehicles TOT 983 ACT 196 BUF 
Step #3600.00 (1ms ~= 1000.00*RT, ~35000.00UPS, TraCI: 235ms, vehicles TOT 1473 ACT 35 BUF: 118ms, vehicles TOT 45 ACT 38 BUF 0)
Step #3600.00 (1ms ~= 1000.00*RT, ~34000.00UPS, TraCI: 222ms, vehicles TOT 1473 ACT 34 BUF08ms, vehicles TOT 1 ACT 1 BUF 0)     
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 208ms, vehicles TOT 1473 ACT 34 BUF 0)              07ms, vehicles TOT 1 ACT 1 BUF 0)     




Step #2000.00 (1ms ~= 1000.00*RT, ~216000.00UPS, TraCI: 225ms, vehicles TOT 907 ACT 216 BUOT 1 ACT 1 BUF 0)                     




[2m[36m(RolloutWorker pid=6700)[0m Step #2100.00 (1ms ~= 1000.00*RT, ~194000.00UPS, TraCI: 231ms, vehicles TOT 983 ACT 194 BU




Step #2400.00 (1ms ~= 1000.00*RT, ~118000.00UPS, TraCI: 220ms, vehicles TOT 1136 ACT 118 BTOT 1 ACT 1 BUF 0)                     
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 137ms, vehicles TOT 1473 ACT 38 BUF 0)              CI: 206ms, vehicles TOT 1175 ACT 95 BUF
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 137ms, vehicles TOT 1473 ACT 32 BUF 0)              CI: 189ms, vehicles TOT 1042 ACT 153 B
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 138ms, vehicles TOT 1473 ACT 32 BUF 0)              OT 1 ACT 1 BUF 0)                     
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 130ms, vehicles TOT 1473 ACT 38 BUF 0)              OT 1 ACT 1 BUF 0)                     




Step #600.00 (1ms ~= 1000.00*RT, ~54000.00UPS, TraCI: 121ms, vehicles TOT 141 ACT 54 BUF 0OT 1 ACT 1 BUF 0)                      




Step #1600.00 (1ms ~= 1000.00*RT, ~130000.00UPS, TraCI: 152ms, vehicles TOT 551 ACT 130 BUT 1 ACT 1 BUF 0)                      




Step #1800.00 (1ms ~= 1000.00*RT, ~177000.00UPS, TraCI: 157ms, vehicles TOT 708 ACT 177 BUOT 1 ACT 1 BUF 0)                     
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 199ms, vehicles TOT 1473 ACT 34 BUF 0)              T 1 ACT 1 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~35000.00UPS, TraCI: 197ms, vehicles TOT 1473 ACT 35 BUFs TOT 164 ACT 56 BUF 0)                
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 189ms, vehicles TOT 1473 ACT 35 BUF 0)              I: 359ms, vehicles TOT 824 ACT 216 BUF
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 185ms, vehicles TOT 1473 ACT 34 BUF 0)              CI: 151ms, vehicles TOT 623 ACT 138 BU




Step #1300.00 (0ms ?*RT. ?UPS, TraCI: 123ms, vehicles TOT 386 ACT 94 BUF 0)               OT 1 ACT 1 BUF 0)                      




Step #1900.00 (1ms ~= 1000.00*RT, ~229000.00UPS, TraCI: 164ms, vehicles TOT 824 ACT 229 BUT 1 ACT 1 BUF 0)                      




Step #3200.00 (0ms ?*RT. ?UPS, TraCI: 107ms, vehicles TOT 1401 ACT 51 BUF 0)              T 1 ACT 1 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 162ms, vehicles TOT 1473 ACT 35 BUF 0)              CI: 130ms, vehicles TOT 432 ACT 99 BUF 
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 163ms, vehicles TOT 1473 ACT 38 BUF 0)              CI: 273ms, vehicles TOT 907 ACT 217 BU
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 156ms, vehicles TOT 1473 ACT 36 BUF 0)              s TOT 1419 ACT 40 BUF 0)              
Step #3600.00 (1ms ~= 1000.00*RT, ~38000.00UPS, TraCI: 148ms, vehicles TOT 1473 ACT 38 BUFT 1 ACT 1 BUF 0)                      


Step #1700.00 (2ms ~= 500.00*RT, ~76000.00UPS, TraCI: 141ms, vehicles TOT 623 ACT 152 BUF T 1 ACT 1 BUF 0)                      




Step #2600.00 (1ms ~= 1000.00*RT, ~92000.00UPS, TraCI: 122ms, vehicles TOT 1220 ACT 92 BUFI: 116ms, vehicles TOT 299 ACT 74 BUF 




Step #2700.00 (0ms ?*RT. ?UPS, TraCI: 129ms, vehicles TOT 1264 ACT 102 BUF 0)             OT 1 ACT 1 BUF 0)                      




Step #3200.00 (0ms ?*RT. ?UPS, TraCI: 106ms, vehicles TOT 1401 ACT 52 BUF 0)              CI: 145ms, vehicles TOT 708 ACT 169 BU
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 118ms, vehicles TOT 1473 ACT 43 BUF 0)              es TOT 1297 ACT 76 BUF 0)              
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 119ms, vehicles TOT 1473 ACT 34 BUF 0)              T 1 ACT 1 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 120ms, vehicles TOT 1473 ACT 33 BUF 0)              s TOT 1264 ACT 90 BUF 0)              
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 122ms, vehicles TOT 1473 ACT 34 BUF 0)              I: 109ms, vehicles TOT 1419 ACT 44 BUF




Step #1700.00 (2ms ~= 500.00*RT, ~73500.00UPS, TraCI: 145ms, vehicles TOT 623 ACT 147 BUF T 1 ACT 1 BUF 0)                      




Step #2200.00 (1ms ~= 1000.00*RT, ~167000.00UPS, TraCI: 151ms, vehicles TOT 1042 ACT 167 BOT 1 ACT 1 BUF 0)                      




Step #2300.00 (1ms ~= 1000.00*RT, ~134000.00UPS, TraCI: 139ms, vehicles TOT 1086 ACT 134 BT 1 ACT 1 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 192ms, vehicles TOT 1473 ACT 40 BUF 0)              aCI: 138ms, vehicles TOT 1086 ACT 133 B
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 192ms, vehicles TOT 1473 ACT 40 BUF 0)              CI: 156ms, vehicles TOT 708 ACT 180 BU
Step #3600.00 (1ms ~= 1000.00*RT, ~34000.00UPS, TraCI: 191ms, vehicles TOT 1473 ACT 34 BUFCI: 134ms, vehicles TOT 1136 ACT 115 B
Step #3600.00 (1ms ~= 1000.00*RT, ~35000.00UPS, TraCI: 176ms, vehicles TOT 1473 ACT 35 BUFT 1 ACT 1 BUF 0)                      




Step #1500.00 (1ms ~= 1000.00*RT, ~114000.00UPS, TraCI: 129ms, vehicles TOT 483 ACT 114 BUT 1 ACT 1 BUF 0)                      




Step #1900.00 (1ms ~= 1000.00*RT, ~230000.00UPS, TraCI: 170ms, vehicles TOT 824 ACT 230 BUT 1 ACT 1 BUF 0)                      




Step #2400.00 (0ms ?*RT. ?UPS, TraCI: 130ms, vehicles TOT 1136 ACT 118 BUF 0)             CI: 134ms, vehicles TOT 551 ACT 133 BU
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 149ms, vehicles TOT 1473 ACT 36 BUF 0)              OT 1 ACT 1 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 153ms, vehicles TOT 1473 ACT 36 BUF 0)              T 1 ACT 1 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~35000.00UPS, TraCI: 141ms, vehicles TOT 1473 ACT 35 BUFCI: 126ms, vehicles TOT 1175 ACT 101 B
Step #3600.00 (1ms ~= 1000.00*RT, ~39000.00UPS, TraCI: 138ms, vehicles TOT 1473 ACT 39 BUFI: 163ms, vehicles TOT 907 ACT 216 BUF


2023-05-29 12:40:05,537	INFO tune.py:945 -- Total run time: 1502.44 seconds (1501.42 seconds for the tuning loop).


Step #405.00 (1ms ~= 1000.00*RT, ~44000.00UPS, TraCI: 5887ms, vehicles TOT 85 ACT 44 BUF 0OT 1 ACT 1 BUF 0)                      
Step #405.00 (0ms ?*RT. ?UPS, TraCI: 5768ms, vehicles TOT 85 ACT 40 BUF 0)                T 1 ACT 1 BUF 0)                      
Step #405.00 (0ms ?*RT. ?UPS, TraCI: 5748ms, vehicles TOT 85 ACT 38 BUF 0)                T 1 ACT 1 BUF 0)                      
Step #405.00 (0ms ?*RT. ?UPS, TraCI: 5897ms, vehicles TOT 85 ACT 45 BUF 0)                T 1 ACT 1 BUF 0)                      


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x1be35983c10>

In [7]:
ray.shutdown()

## Watching the Trained Agent

## RLlib Docs

In [None]:
algo = config.build()

In [None]:
# Changes made in ParallelPettingZooEnv's reset() method: see line 202-206 in
# C:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\env\wrappers\pettingzoo_env.py

algo.evaluate()

In [None]:
from ray.tune.logger import pretty_print

result = algo.train()
print(pretty_print(result))

In [None]:
algo.evaluate()