# Setup and Installations

In [1]:
import utils
import env_manager as env_manager
import algo_trainer as algo_trainer
from typing import SupportsIndex

In [2]:
def chain_training(manager: env_manager, generator: env_manager.EnvManager.env_generator, algo_agent, running_result_grid: list):
    if len(running_result_grid) != 0: 
        # take the best config from the previous training 
        best = running_result_grid[-1].get_best_result(metric="env_runners/episode_reward_mean", mode="max")
        
        # Initialize the environment manager with new route file
        rou, csv = next(generator)
        manager.initialize_env(rou, csv)
        
        # continue the training with the best config
        algo_agent.config = algo_agent.from_dict(best.config)
        algo_agent.build_config(flag=True)
        
    result = algo_agent.train()
    
    return result

def training(num_intersection: int, experiment_type: str, algo_config: str, env_config: str, num_training: SupportsIndex):
    final_running_result = []
    sumo_type = "SingleAgent"
    algo_type = experiment_type.split("_")
     
    if experiment_type.__contains__("Multi"):
        sumo_type = "MultiAgent"
    
    # Initialize the environment manager
    manager = env_manager.EnvManager(f"{sumo_type}Environment", env_config, intersection_id=f"intersection_{num_intersection}")
    generator = manager.env_generator(f"Nets/intersection_{num_intersection}/route_xml_path_intersection_{num_intersection}.txt", algo_name=algo_type[0])
    
    # Initialize the environment manager with new route file
    rou, csv = next(generator)
    manager.initialize_env(rou, csv)
    
    algo_agent = algo_trainer.ALGOTrainer(config_path=algo_config, env_manager=manager, experiment_type=experiment_type)
    algo_agent.build_config()
    for i in range(num_training):
        chain_result = chain_training(manager=manager, generator=generator, algo_agent=algo_agent, running_result_grid=final_running_result)
        if chain_result is not None:
            final_running_result.append(chain_result)
    
    return final_running_result

In [4]:
# Choose which intersection you want to train
num_intersection_to_train = 1

# Choose the experiment_type:
# PPO_SingleAgent | DQN_SingleAgent | DDQN_SingleAgent | PPO_MultiAgent | DQN_MultiAgent | DDQN_MultiAgent
experiment_type = "DQN_SingleAgent"  

# Choose how many training cycles you want to run
num_training_cycles = 1

env_config_file_path = "Config/env_config.json"

ppo_config_file_path = "Config/ppo_config.json"

dqn_config_file_path = "Config/dqn_config.json"

In [5]:
training(num_intersection=num_intersection_to_train, experiment_type=experiment_type, algo_config=dqn_config_file_path, env_config=env_config_file_path, num_training=num_training_cycles)

print(f"Finished training for intersection: {num_intersection_to_train} with {num_training_cycles} training rounds")



0,1
Current time:,2024-08-11 12:56:59
Running for:,00:01:43.44
Memory:,12.2/16.0 GiB

Trial name,status,loc,adam_epsilon,gamma,hiddens,lr,n_step,target_network_updat e_freq,train_batch_size,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
DQN_DQN_cf0fb_00000,TERMINATED,127.0.0.1:29860,4.08338e-08,0.982825,"[32, 32]",0.000353671,5,500,32,3,85.0144,2160,1,0,0
DQN_DQN_cf0fb_00001,TERMINATED,127.0.0.1:29861,1.86674e-07,0.907901,"[32, 32]",0.000539771,3,500,16,3,92.7497,2160,1,0,0
DQN_DQN_cf0fb_00002,TERMINATED,127.0.0.1:29862,3.53537e-10,0.933803,"[128, 128]",0.00345357,3,500,32,3,94.6659,2160,1,0,0




[36m(RolloutWorker pid=29863)[0m  Retrying in 1 seconds
[36m(RolloutWorker pid=29863)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 0 ACT 0 BUF 0)                      


[36m(DQN pid=29862)[0m Install gputil for GPU system monitoring.


Step #3600.00 (3ms ~= 333.33*RT, ~55000.00UPS, TraCI: 27ms, vehicles TOT 4065 ACT 165 BUF , vehicles TOT 4 ACT 4 BUF 8)      
[36m(RolloutWorker pid=29864)[0m  Retrying in 1 seconds[32m [repeated 9x across cluster][0m
[36m(DQN pid=29860)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      [32m [repeated 5x across cluster][0m


[36m(DQN pid=29860)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00000_0_adam_epsilon=0.0000,gamma=0.9828,hiddens=32_32,lr=0.0004,n_step=5,target_network_update_freq=500,train_batch_2024-08-11_12-55-15/checkpoint_000000)
[36m(DQN pid=29861)[0m Install gputil for GPU system monitoring.[32m [repeated 2x across cluster][0m


Step #3600.00 (1ms ~= 1000.00*RT, ~254000.00UPS, TraCI: 42ms, vehicles TOT 3805 ACT 254 BU, vehicles TOT 4 ACT 4 BUF 8)      


[36m(DQN pid=29862)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00002_2_adam_epsilon=0.0000,gamma=0.9338,hiddens=128_128,lr=0.0035,n_step=3,target_network_update_freq=500,train_bat_2024-08-11_12-55-15/checkpoint_000000)


Step #3600.00 (1ms ~= 1000.00*RT, ~241000.00UPS, TraCI: 41ms, vehicles TOT 3652 ACT 241 BU ACT 4 BUF 8)                      
[36m(RolloutWorker pid=29865)[0m  Retrying in 1 seconds[32m [repeated 2x across cluster][0m


[36m(DQN pid=29861)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00001_1_adam_epsilon=0.0000,gamma=0.9079,hiddens=32_32,lr=0.0005,n_step=3,target_network_update_freq=500,train_batch_2024-08-11_12-55-15/checkpoint_000000)


Step #3600.00 (1ms ~= 1000.00*RT, ~245000.00UPS, TraCI: 25ms, vehicles TOT 3950 ACT 245 BU, vehicles TOT 4 ACT 4 BUF 8)      
[36m(RolloutWorker pid=29864)[0m  Retrying in 1 seconds


[36m(DQN pid=29860)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00000_0_adam_epsilon=0.0000,gamma=0.9828,hiddens=32_32,lr=0.0004,n_step=5,target_network_update_freq=500,train_batch_2024-08-11_12-55-15/checkpoint_000001)


Step #3600.00 (1ms ~= 1000.00*RT, ~249000.00UPS, TraCI: 29ms, vehicles TOT 3835 ACT 249 BU, vehicles TOT 4 ACT 4 BUF 8)      
[36m(RolloutWorker pid=29863)[0m  Retrying in 1 seconds
Step #3600.00 (1ms ~= 1000.00*RT, ~238000.00UPS, TraCI: 26ms, vehicles TOT 3819 ACT 238 BU ACT 4 BUF 8)                      


[36m(DQN pid=29862)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00002_2_adam_epsilon=0.0000,gamma=0.9338,hiddens=128_128,lr=0.0035,n_step=3,target_network_update_freq=500,train_bat_2024-08-11_12-55-15/checkpoint_000001)


Step #3600.00 (1ms ~= 1000.00*RT, ~184000.00UPS, TraCI: 24ms, vehicles TOT 4206 ACT 184 BU, vehicles TOT 4 ACT 4 BUF 8)      
[36m(RolloutWorker pid=29864)[0m  Retrying in 1 seconds[32m [repeated 2x across cluster][0m


[36m(DQN pid=29860)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00000_0_adam_epsilon=0.0000,gamma=0.9828,hiddens=32_32,lr=0.0004,n_step=5,target_network_update_freq=500,train_batch_2024-08-11_12-55-15/checkpoint_000002)[32m [repeated 2x across cluster][0m


Step #5.00 (0ms ?*RT. ?UPS, TraCI: 582ms, vehicles TOT 8 ACT 8 BUF 4)                     , vehicles TOT 4 ACT 4 BUF 8)      
Step #3600.00 (1ms ~= 1000.00*RT, ~190000.00UPS, TraCI: 24ms, vehicles TOT 4194 ACT 190 BU ACT 4 BUF 8)                      
[36m(RolloutWorker pid=29865)[0m  Retrying in 1 seconds


[36m(DQN pid=29861)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00001_1_adam_epsilon=0.0000,gamma=0.9079,hiddens=32_32,lr=0.0005,n_step=3,target_network_update_freq=500,train_batch_2024-08-11_12-55-15/checkpoint_000002)


Step #5.00 (0ms ?*RT. ?UPS, TraCI: 703ms, vehicles TOT 8 ACT 8 BUF 4)                      vehicles TOT 4 ACT 4 BUF 8)       
Step #3600.00 (1ms ~= 1000.00*RT, ~224000.00UPS, TraCI: 18ms, vehicles TOT 4339 ACT 224 BU vehicles TOT 4 ACT 4 BUF 8)       
[36m(RolloutWorker pid=29863)[0m  Retrying in 1 seconds


[36m(DQN pid=29862)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15/DQN_DQN_cf0fb_00002_2_adam_epsilon=0.0000,gamma=0.9338,hiddens=128_128,lr=0.0035,n_step=3,target_network_update_freq=500,train_bat_2024-08-11_12-55-15/checkpoint_000002)
2024-08-11 12:56:59,043	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/md/Desktop/Traffic_Tune_Project/Outputs/Training/intersection_1/saved_agent/DQN_2024-08-11_12-55-15' in 0.0125s.
2024-08-11 12:56:59,172	INFO tune.py:1041 -- Total run time: 103.61 seconds (103.43 seconds for the tuning loop).


Finished training for intersection: 1 with 1 training rounds
Step #5.00 (0ms ?*RT. ?UPS, TraCI: 180ms, vehicles TOT 8 ACT 8 BUF 4)                      ACT 4 BUF 8)                      


In [None]:
# Notify when done
experiment_date = env_manager.datetime.now().strftime("%m.%d-%H:%M:%S")
message = f'Training for intersection {num_intersection_to_train} with {experiment_type} and {num_training_cycles} from {experiment_date} is done!'  # Replace with your message
recipient = 'eviatar109@icloud.com'  # Replace with your iCloud email
utils.send_imessage(message, recipient)

In [None]:
result = results[-1]
from ray.rllib.algorithms.algorithm import Algorithm

best_result = result.get_best_result("env_runners/episode_reward_max", "max")
checkpoint_path = best_result.checkpoint.path
print(f'Best checkpoint path: {checkpoint_path}')

# Load the Algorithm from the checkpoint
algo = Algorithm.from_checkpoint(checkpoint_path)

# Retrieve the current configuration
new_config = algo.config.copy()
new_config["evaluation_duration"] = 2 # Define as many evaluation episodes as you want


# Re-create the algorithm instance with the updated configuration
algo = Algorithm.from_checkpoint(checkpoint_path)
algo.config = new_config

# Evaluate the Algorithm
eval_results = algo.evaluate()
print(eval_results)

In [None]:
utils.extract_and_write_all_params()