# Setup and Installations

In [1]:
import utils
import visualizations_and_metrics as vm
import env_manager as env_manager
import algo_trainer as algo_trainer
from typing import SupportsIndex

In [2]:
def chain_training(manager: env_manager, generator: env_manager.EnvManager.env_generator, algo_agent, running_result: list):
    if len(running_result) != 0: 
        # take the best config from the previous training 
        best = running_result[-1].get_best_result("env_runners/episode_reward_max", "max")
        
        # Initialize the environment manager with new route file
        rou, csv = next(generator)
        manager.initialize_env(rou, csv)
        
        # continue the training with the best config
        algo_agent.config = best.config
        algo_agent.build_config()
    
    result = algo_agent.train()
    
    return result

def training(num_intersection: int, experiment_type: str, algo_config: str, env_config: str, num_training: SupportsIndex):
    running_result = []
    sumo_type = "SingleAgent"
    algo_type = experiment_type.split("_")
     
    if experiment_type.__contains__("Multi"):
        sumo_type = "MultiAgent"
    
    # Initialize the environment manager
    manager = env_manager.EnvManager(f"{sumo_type}Environment", env_config, intersection_id=f"intersection_{num_intersection}")
    generator = manager.env_generator(f"Nets/intersection_{num_intersection}/route_xml_path_intersection_{num_intersection}.txt", algo_name=algo_type[0])
    
    # Initialize the environment manager with new route file
    rou, csv = next(generator)
    manager.initialize_env(rou, csv)
    
    algo_agent = algo_trainer.ALGOTrainer(config_path=algo_config, env_manager=manager, experiment_type=experiment_type)
    algo_agent.build_config()

    for i in range(num_training):
        chain_result = chain_training(manager=manager, generator=generator, algo_agent=algo_agent, running_result=running_result)
        if chain_result is not None:
            running_result.append(chain_result)
    
    return running_result

In [3]:
num_intersection_to_train = 4  # Choose which intersection you want to train

# Choose the experiment_type:
# PPO_SingleAgent | DQN_SingleAgent | DDQN_SingleAgent | PPO_MultiAgent | DQN_MultiAgent | DDQN_MultiAgent
experiment_type = "PPO_SingleAgent"  

num_training_cycles = 1

env_config_file_path = "env_config.json"

ppo_config_file_path = "ppo_config.json"

dqn_config_file_path = "dqn_config.json"

In [4]:
results = training(num_intersection=num_intersection_to_train, experiment_type=experiment_type, algo_config=ppo_config_file_path, env_config=env_config_file_path, num_training=num_training_cycles)



0,1
Current time:,2024-07-29 22:59:42
Running for:,00:01:59.28
Memory:,11.7/16.0 GiB

Trial name,status,loc,clip_param,entropy_coeff,gamma,lambda_,lr,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
PPO_PPO_d171f_00000,TERMINATED,127.0.0.1:84591,0.280609,0.0186549,0.925649,0.925862,0.00136161,6,128,1024,4,107.513,4096,1,0,0




[36m(RolloutWorker pid=84594)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84594)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(RolloutWorker pid=84599)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 0 ACT 0 BUF 0)                      [32m [repeated 2x across cluster][0m


[36m(PPO pid=84591)[0m Install gputil for GPU system monitoring.


Step #3600.00 (1ms ~= 1000.00*RT, ~180000.00UPS, TraCI: 42ms, vehicles TOT 4509 ACT 180 BU, vehicles TOT 1 ACT 1 BUF 0)      
[36m(RolloutWorker pid=84594)[0m  Retrying in 1 seconds[32m [repeated 3x across cluster][0m
[36m(PPO pid=84591)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 4ms, vehicles TOT 0 ACT 0 BUF 0)                      
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds
Step #3600.00 (1ms ~= 1000.00*RT, ~137000.00UPS, TraCI: 35ms, vehicles TOT 4465 ACT 137 BU ACT 3 BUF 0)                      
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds


[36m(PPO pid=84591)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43/PPO_PPO_d171f_00000_0_clip_param=0.2806,entropy_coeff=0.0187,gamma=0.9256,lambda=0.9259,lr=0.0014,num_sgd_iter=6,sgd_minibatch_siz_2024-07-29_22-57-43/checkpoint_000000)


Step #3600.00 (1ms ~= 1000.00*RT, ~185000.00UPS, TraCI: 39ms, vehicles TOT 4613 ACT 185 BU, vehicles TOT 1 ACT 1 BUF 0)      
[36m(RolloutWorker pid=84594)[0m  Retrying in 1 seconds
Step #3600.00 (1ms ~= 1000.00*RT, ~170000.00UPS, TraCI: 44ms, vehicles TOT 4478 ACT 170 BU ACT 1 BUF 0)                      
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds


[36m(PPO pid=84591)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43/PPO_PPO_d171f_00000_0_clip_param=0.2806,entropy_coeff=0.0187,gamma=0.9256,lambda=0.9259,lr=0.0014,num_sgd_iter=6,sgd_minibatch_siz_2024-07-29_22-57-43/checkpoint_000001)


Step #3600.00 (1ms ~= 1000.00*RT, ~143000.00UPS, TraCI: 25ms, vehicles TOT 4528 ACT 143 BUACT 2 BUF 0)                       
[36m(RolloutWorker pid=84594)[0m  Retrying in 1 seconds
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 40ms, vehicles TOT 4456 ACT 128 BUF 497)            ACT 1 BUF 0)                       
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 42ms, vehicles TOT 4454 ACT 170 BUF 601)            ACT 2 BUF 0)                       


[36m(PPO pid=84591)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43/PPO_PPO_d171f_00000_0_clip_param=0.2806,entropy_coeff=0.0187,gamma=0.9256,lambda=0.9259,lr=0.0014,num_sgd_iter=6,sgd_minibatch_siz_2024-07-29_22-57-43/checkpoint_000002)


Step #3600.00 (1ms ~= 1000.00*RT, ~127000.00UPS, TraCI: 40ms, vehicles TOT 4462 ACT 127 BU ACT 2 BUF 0)                      
[36m(RolloutWorker pid=84594)[0m  Retrying in 1 seconds[32m [repeated 2x across cluster][0m
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 41ms, vehicles TOT 4611 ACT 130 BUF 467)            ACT 0 BUF 0)                       


[36m(PPO pid=84591)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43/PPO_PPO_d171f_00000_0_clip_param=0.2806,entropy_coeff=0.0187,gamma=0.9256,lambda=0.9259,lr=0.0014,num_sgd_iter=6,sgd_minibatch_siz_2024-07-29_22-57-43/checkpoint_000003)
2024-07-29 22:59:42,656	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43' in 0.0114s.


Step #2485.00 (0ms ?*RT. ?UPS, TraCI: 857ms, vehicles TOT 3151 ACT 183 BUF 328)            vehicles TOT 3 ACT 3 BUF 0)       
[36m(RolloutWorker pid=84599)[0m  Retrying in 1 seconds


2024-07-29 22:59:52,682	INFO tune.py:1041 -- Total run time: 129.34 seconds (119.27 seconds for the tuning loop).
[36m(RolloutWorker pid=84599)[0m Error: tcpip::Socket::recvAndCheck @ recv: Connection reset by peer
[36m(RolloutWorker pid=84599)[0m Quitting (on error).


Step #2900.00 (0ms ?*RT. ?UPS, TraCI: 15ms, vehicles TOT 3797 ACT 124 BUF 333)            ACT 2 BUF 0)                       
[36m(RolloutWorker pid=84670)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84670)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
[36m(RolloutWorker pid=84676)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84679)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84676)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
[36m(RolloutWorker pid=84679)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
[36m(RolloutWorker pid=84684)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84684)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=84684)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~183000.00UPS, TraCI: 36ms, vehicles TOT 4637 ACT 

In [5]:
result = results[-1]
from ray.rllib.algorithms.algorithm import Algorithm

best_result = result.get_best_result("env_runners/episode_reward_max", "max")
checkpoint_path = best_result.checkpoint.path
print(f'Best checkpoint path: {checkpoint_path}')

# Load the Algorithm from the checkpoint
algo = Algorithm.from_checkpoint(checkpoint_path)

# Retrieve the current configuration
new_config = algo.config.copy()
new_config["evaluation_duration"] = 2 # Define as many evaluation episodes as you want


# Re-create the algorithm instance with the updated configuration
algo = Algorithm.from_checkpoint(checkpoint_path)
algo.config = new_config

# Evaluate the Algorithm
eval_results = algo.evaluate()
print(eval_results)

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))


Best checkpoint path: /Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_4/saved_agent/PPO_2024-07-29_22-57-43/PPO_PPO_d171f_00000_0_clip_param=0.2806,entropy_coeff=0.0187,gamma=0.9256,lambda=0.9259,lr=0.0014,num_sgd_iter=6,sgd_minibatch_siz_2024-07-29_22-57-43/checkpoint_000003
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      


`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))


 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
{'env_runners': {'episode_reward_max': -16.73, 'episode_reward_min': -21.12, 'episode_reward_mean': -18.925, 'episode_len_mean': 720.0, 'episode_media': {}, 'episodes_timesteps_total': 1440, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {'4_tls_total_waiting_time_mean': 310.0, '4_tls_total_waiting_time_min': 125.0, '4_tls_total_waiting_time_max': 495.0, '4_tls_lane_count_mean': 15.0, '4_tls_lane_count_min': 15, '4_tls_lane_count_max': 15, '4_tls_episode_total_waiting_time_mean': 310.0, '4_tls_episode_total_waiting_time_min': 125.0, '4_tls_episode_total_waiting_time_max': 495.0}, 'hist_stats': {'episode_reward': [-21.12, -16.73], 'episode_lengths': [720, 720]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.6401943707210465, 'mean_inference_ms': 0.4818916155117744, 'mean_action_processing_ms': 0.0489270800430928, 'mean_env_wait_ms': 20.037911316559

In [7]:
# Notify when done
experiment_date = env_manager.datetime.now().strftime("%m.%d-%H:%M:%S")
message = f'Training for intersection {num_intersection_to_train} with {experiment_type} from {experiment_date} is done!'  # Replace with your message
recipient = 'matand94@icloud.com'  # Replace with your iCloud email
utils.send_imessage(message, recipient)

In [None]:
# Save the results to a CSV file
utils.save_custom_metrics_to_csv(results, num_intersection_to_train, experiment_type, cycle_index=2)
