In [None]:
import numpy as np
import os
import torch
from torch.nn.functional import normalize
from vissim import Vissim
from ddpg import DDPG
import time
import matplotlib.pyplot as plt

In [None]:
# hyperparameters
alpha = 0.0001
beta = 0.0003
gamma = 0.6
tau = 0.005
n_agents = 1
nodes = 3

# simulation parameters
decision_point = [i for i in range(900, 4501, 60)]
sim_time = 4500
num_episodes = 100

In [None]:
plt.rcParams['figure.dpi'] = 150
ddpg_agent = DDPG(alpha=alpha, beta=beta, gamma=gamma, tau=tau, chkpt_dir='/Users/chhuang/ddpg_model/model/')

Path = os.getcwd()
Filename = os.path.join(Path, r"C:\Users\chhuang\ddpg_model\vissim_network\multi-3.inpx")
env = Vissim(Filename)

In [None]:
def input_transform(car_D, scooter_D, car_S, scooter_S):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    car_D = torch.tensor(car_D, dtype=torch.float).to(device)
    car_D = normalize(car_D, dim=0)
    car_D = torch.stack([car_D])

    scooter_D = torch.tensor(scooter_D, dtype=torch.float).to(device)
    scooter_D = normalize(scooter_D, dim=0)
    scooter_D = torch.stack([scooter_D])

    car_S = torch.tensor(car_S, dtype=torch.float).to(device)
    car_S = normalize(car_S, dim=0)
    car_S = torch.stack([car_S])

    scooter_S = torch.tensor(scooter_S, dtype=torch.float).to(device)
    scooter_S = normalize(scooter_S, dim=0)
    scooter_S = torch.stack([scooter_S])
    
    return car_D, scooter_D, car_S, scooter_S

In [None]:
# 儲存每一個episode獲得的平均累積獎勵
avg_rewards_per_episode = []
avg_global_rewards_per_episode = []

# 儲存每一個episode的平均時相時間
avg_phase_time_1_per_episode = []
avg_phase_time_2_per_episode = []

# 儲存每一個episode的平均時差
avg_offset_per_episode = []

# 儲存旅行時間資料
avg_travtime_car_per_episode = np.zeros((num_episodes, 36))
avg_travtime_scooter_per_episode = np.zeros((num_episodes, 36))

# 儲存等候車隊長度資料
avg_queue_length_per_episode = np.zeros((num_episodes, 12))

# 儲存平均停等次數
avg_num_stops_car = []
avg_num_stops_scooter = []

# 儲存平均延滯
avg_delay_car = []
avg_delay_scooter = []

# 儲存平均總旅行時間
avg_travtmtot_car = []
avg_travtmtot_scooter = []

# 儲存平均總停等次數
avg_stopstot_car = []
avg_stopstot_scooter = []

# 儲存平均總延滯時間
avg_delaytot_car = []
avg_delaytot_scooter = []

# 儲存每回合花費時間
spent_times = []

In [None]:
# 載入模型
ddpg_agent.load_checkpoint()

np.set_printoptions(precision=4, threshold=10000)
total_step = 0

for episode in range(1, num_episodes + 1):
    start = time.time()
    print('******** start episode %s ********' % episode)
    env.stop_simulation()
    env.del_pre_simulation()
    env.set_randseed(episode)
    # env.random_veh_input_each_15_min(episode)
    env.reset()
    env.set_signal_program(3)
    
    # 創建一個回合的reward table, avg. reward table, ...
    rewards = np.zeros(n_agents)
    avg_rewards = np.zeros(n_agents)
    
    phase_1 = np.zeros(nodes)
    phase_2 = np.zeros(nodes)
    avg_phase_1 = np.zeros(nodes)
    avg_phase_2 = np.zeros(nodes)
    
    offset = np.zeros(nodes)
    avg_offset = np.zeros(nodes)
    
    # 熱機840s
    env.quickmode(1)
    env.warm_up(840)
    env.break_time(900)
    
    # 取得900s的狀態
    car_D, scooter_D, car_S, scooter_S = env.get_all_states()
    car_D, scooter_D, car_S, scooter_S = input_transform(car_D, scooter_D, car_S, scooter_S)

    samples = 0
    
    while True:
        if env.time in decision_point:
            splits, first_greens = ddpg_agent.choose_actions(car_D, scooter_D, car_S, scooter_S, 0)
            splits_for_vissim = splits.squeeze(0).cpu().numpy()
            first_greens_for_vissim = first_greens.squeeze(0).cpu().numpy()
            
            # 更新時制計畫
            env.update_timing_plans(splits_for_vissim, first_greens_for_vissim)
            
        # 輸出並記錄新的時制計畫
        new_timing_plans = env.get_timing_plans()
        updated_offsets = env.get_offsets()
        print('new_timing_plans:', new_timing_plans)
        print('updated_offsets:', updated_offsets)
        for i in range(n_agents):
            phase_1[i] += new_timing_plans['%s' % (i+1)][0]
            phase_2[i] += new_timing_plans['%s' % (i+1)][1]
            offset[i] += updated_offsets[i]
        
        # 執行新的時制計畫
        env.execute_new_timing_plans(env.time)

        # 取得新狀態資訊
        car_D_, scooter_D_, car_S_, scooter_S_ = env.get_all_states()
        car_D_, scooter_D_, car_S_, scooter_S_ = input_transform(car_D_, scooter_D_, car_S_, scooter_S_)
      
        # ========================================== 以下計算reward ==========================================
        # 創建單次動作的reward table
        reward = np.zeros(n_agents)
        
        # 從VISSIM取得資料
        throughput_car, throughput_scooter = env.get_total_throughput()
        queue_length = env.get_total_queue_length()
        
        for i in range(n_agents):
            # 這邊要計算每個路口獲得的reward
            # 考慮項目：路口通過車輛數、等候車隊長度
            reward[i] = throughput_car[i] + 0.3 * throughput_scooter[i] - 0.44 * queue_length[i]
            rewards[i] += reward[i]
        
        print('reward:', reward)
        samples += 1
        total_step += 1
        print('###### have got %s sample(s) ######' % samples)
        
        if env.time >= sim_time:
            break
        else:
            car_D = car_D_
            scooter_D = scooter_D_
            car_S = car_S_
            scooter_S = scooter_S_
        
    # 計算一回合的 avg. reward, avg. phase time, ...
    for i in range(n_agents):
        avg_rewards[i] += rewards[i] / 60
        avg_phase_1[i] += phase_1[i] / 60
        avg_phase_2[i] += phase_2[i] / 60
        avg_offset[i] += offset[i] / 60
    avg_rewards_per_episode.append(avg_rewards)
    avg_global_rewards_per_episode.append(float(sum(avg_rewards)))
    avg_phase_time_1_per_episode.append(avg_phase_1)
    avg_phase_time_2_per_episode.append(avg_phase_2)
    avg_offset_per_episode.append(avg_offset)
    
    # 儲存旅行時間資料
    travtime_car, travtime_scooter = env.get_travel_time()
    avg_travtime_car_per_episode[episode - 1] += travtime_car
    avg_travtime_scooter_per_episode[episode - 1] += travtime_scooter
    
    # 儲存等候車隊長度資料
    each_q_len = env.get_each_queue_length()
    avg_queue_length_per_episode[episode - 1] += each_q_len
    
    # 儲存停等次數
    num_stops_car, num_stops_scooter = env.get_avg_num_stops()
    avg_num_stops_car.append(num_stops_car)
    avg_num_stops_scooter.append(num_stops_scooter)
    
    # 儲存延滯時間
    delay_car, delay_scooter = env.get_avg_delay()
    avg_delay_car.append(delay_car)
    avg_delay_scooter.append(delay_scooter)
    
    # 儲存總旅行時間
    travtmtot_car, travtmtot_scooter = env.get_total_travel_time()
    avg_travtmtot_car.append(travtmtot_car)
    avg_travtmtot_scooter.append(travtmtot_scooter)
    
    # 儲存總停等次數
    stopstot_car, stopstot_scooter = env.get_total_num_stops()
    avg_stopstot_car.append(stopstot_car)
    avg_stopstot_scooter.append(stopstot_scooter)
    
    # 儲存總延滯時間
    delaytot_car, delaytot_scooter = env.get_total_delay()
    avg_delaytot_car.append(delaytot_car)
    avg_delaytot_scooter.append(delaytot_scooter)
        
    end = time.time()
    spent_time = end - start
    spent_times.append(spent_time)
    print('avg. rewards in this episode:', avg_rewards)
    # print('avg. global rewards in this episode:', avg_global_rewards_per_episode[episode-1])
    print('******** end episode %s ********' % episode)
    print('spent %.2f (s) for episode %s' % (spent_time, episode))
    
    episode += 1

In [None]:
# ========================================== 記錄訓練參數及回合 ==========================================
max_time_per_episode = max(spent_times)
min_time_per_episode = min(spent_times)
avg_time_per_episode = sum(spent_times) / len(spent_times)
print('====== testing %s episodes ======' % len(spent_times))
print('max_time_per_episode: %.2f s' % max_time_per_episode)
print('min_time_per_episode: %.2f s' % min_time_per_episode)
print('avg_time_per_episode: %.2f s' % avg_time_per_episode)

with open('test_results/log.txt', 'w') as f:
    f.write('====== testing %s episodes ======' % len(spent_times))
    f.write('\nmax_time_per_episode: %.2f s' % max_time_per_episode)
    f.write('\nmin_time_per_episode: %.2f s' % min_time_per_episode)
    f.write('\navg_time_per_episode: %.2f s' % avg_time_per_episode)
    

# ========================================== 以下記錄訓練結果 ==========================================
global_agent = []

for i in range(len(avg_rewards_per_episode)):
    global_agent.append(avg_rewards_per_episode[i][0])

# ========================================== 將reward寫入txt檔 ==========================================
with open('test_results/reward.txt', 'w') as f:
    f.write('[')
    for i in range(len(global_agent)):
        if i == len(global_agent) - 1:
            f.write('%s' % (global_agent[i]) + ']')
        else:
            f.write('%s' % (global_agent[i]) + ',')

with open('test_results/global_reward.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_global_rewards_per_episode)):
        if i == len(avg_global_rewards_per_episode) - 1:
            f.write('%s' % (avg_global_rewards_per_episode[i]) + ']')
        else:
            f.write('%s' % (avg_global_rewards_per_episode[i]) + ',')
            

# ========================================== 將旅行時間寫入txt檔 ==========================================
travtime_car_for_each_detectors = [[] for i in range(36)]
travtime_scooter_for_each_detectors = [[] for i in range(36)]
"""
0-11: 111, 112, 113, ... , 141, 142, 143
12-23: 211, 212, 213, ... , 241, 242, 243
24-35: 311, 312, 313, ... , 341, 342, 343

"""
for i in range(num_episodes):
    for j in range(36):
        travtime_car_for_each_detectors[j].append(avg_travtime_car_per_episode[i][j])
        travtime_scooter_for_each_detectors[j].append(avg_travtime_scooter_per_episode[i][j])

for i in range(36):
    with open('test_results/traveltime/car travel time_%s.txt' % (i+1), 'w') as f:
        f.write('[')
        for j in range(len(travtime_car_for_each_detectors[i])):
            if j == len(travtime_car_for_each_detectors[i]) - 1:
                f.write('%s' % (travtime_car_for_each_detectors[i][j]) + ']')
            else:
                f.write('%s' % (travtime_car_for_each_detectors[i][j]) + ',')
                
for i in range(36):
    with open('test_results/traveltime/scooter travel time_%s.txt' % (i+1), 'w') as f:
        f.write('[')
        for j in range(len(travtime_scooter_for_each_detectors[i])):
            if j == len(travtime_scooter_for_each_detectors[i]) - 1:
                f.write('%s' % (travtime_scooter_for_each_detectors[i][j]) + ']')
            else:
                f.write('%s' % (travtime_scooter_for_each_detectors[i][j]) + ',')


# ========================================== 將時相變化寫入txt檔 ==========================================
node_1_phase_1 = []
node_2_phase_1 = []
node_3_phase_1 = []

node_1_phase_2 = []
node_2_phase_2 = []
node_3_phase_2 = []

for i in range(len(avg_phase_time_1_per_episode)):
    node_1_phase_1.append(avg_phase_time_1_per_episode[i][0])
    node_2_phase_1.append(avg_phase_time_1_per_episode[i][1])
    node_3_phase_1.append(avg_phase_time_1_per_episode[i][2])

for i in range(len(avg_phase_time_2_per_episode)):
    node_1_phase_2.append(avg_phase_time_2_per_episode[i][0])
    node_2_phase_2.append(avg_phase_time_2_per_episode[i][1])
    node_3_phase_2.append(avg_phase_time_2_per_episode[i][2])

with open('test_results/phase/node_1_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_phase_1)):
        if i == len(node_1_phase_1) - 1:
            f.write('%s' % (node_1_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_1_phase_1[i]) + ',')

with open('test_results/phase/node_1_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_phase_2)):
        if i == len(node_1_phase_2) - 1:
            f.write('%s' % (node_1_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_1_phase_2[i]) + ',')

with open('test_results/phase/node_2_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_phase_1)):
        if i == len(node_2_phase_1) - 1:
            f.write('%s' % (node_2_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_2_phase_1[i]) + ',')

with open('test_results/phase/node_2_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_phase_2)):
        if i == len(node_2_phase_2) - 1:
            f.write('%s' % (node_2_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_2_phase_2[i]) + ',')

with open('test_results/phase/node_3_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_phase_1)):
        if i == len(node_3_phase_1) - 1:
            f.write('%s' % (node_3_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_3_phase_1[i]) + ',')

with open('test_results/phase/node_3_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_phase_2)):
        if i == len(node_3_phase_2) - 1:
            f.write('%s' % (node_3_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_3_phase_2[i]) + ',')
        
        
# ========================================== 將時差變化寫入txt檔 ==========================================
node_1_offset = []
node_2_offset = []
node_3_offset = []

for i in range(len(avg_offset_per_episode)):
    node_1_offset.append(avg_offset_per_episode[i][0])
    node_2_offset.append(avg_offset_per_episode[i][1])
    node_3_offset.append(avg_offset_per_episode[i][2])

with open('test_results/phase/node_1_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_offset)):
        if i == len(node_1_offset) - 1:
            f.write('%s' % (node_1_offset[i]) + ']')
        else:
            f.write('%s' % (node_1_offset[i]) + ',')

with open('test_results/phase/node_2_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_offset)):
        if i == len(node_2_offset) - 1:
            f.write('%s' % (node_2_offset[i]) + ']')
        else:
            f.write('%s' % (node_2_offset[i]) + ',')

with open('test_results/phase/node_3_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_offset)):
        if i == len(node_3_offset) - 1:
            f.write('%s' % (node_3_offset[i]) + ']')
        else:
            f.write('%s' % (node_3_offset[i]) + ',')
            
            
# ========================================== 將等候車隊長度寫入txt檔 ==========================================
q_len_for_each_approach = [[] for i in range(12)]

for i in range(num_episodes):
    for j in range(12):
        q_len_for_each_approach[j].append(avg_queue_length_per_episode[i][j])

        
for i in range(1, 13):
    with open('test_results/queue/queue_length_%s.txt' % (i*2-1), 'w') as f:
        f.write('[')
        for j in range(len(q_len_for_each_approach[i-1])):
            if j == len(q_len_for_each_approach[i-1]) - 1:
                f.write('%s' % (q_len_for_each_approach[i-1][j]) + ']')
            else:
                f.write('%s' % (q_len_for_each_approach[i-1][j]) + ',')
                
# ========================================== 將平均每回合停等次數寫入txt檔 ==========================================
with open('test_results/number_of_stops_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_num_stops_car)):
        if i == len(avg_num_stops_car) - 1:
            f.write('%s' % (avg_num_stops_car[i]) + ']')
        else:
            f.write('%s' % (avg_num_stops_car[i]) + ',')
        
with open('test_results/number_of_stops_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_num_stops_scooter)):
        if i == len(avg_num_stops_scooter) - 1:
            f.write('%s' % (avg_num_stops_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_num_stops_scooter[i]) + ',')

# ========================================== 將平均每回合延滯時間寫入txt檔 ==========================================
with open('test_results/delay_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delay_car)):
        if i == len(avg_delay_car) - 1:
            f.write('%s' % (avg_delay_car[i]) + ']')
        else:
            f.write('%s' % (avg_delay_car[i]) + ',')
        
with open('test_results/delay_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delay_scooter)):
        if i == len(avg_delay_scooter) - 1:
            f.write('%s' % (avg_delay_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_delay_scooter[i]) + ',')
            
# ========================================== 將平均每回合總旅行時間寫入txt檔 ==========================================
with open('test_results/TravTmTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_travtmtot_car)):
        if i == len(avg_travtmtot_car) - 1:
            f.write('%s' % (avg_travtmtot_car[i]) + ']')
        else:
            f.write('%s' % (avg_travtmtot_car[i]) + ',')
        
with open('test_results/TravTmTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_travtmtot_scooter)):
        if i == len(avg_travtmtot_scooter) - 1:
            f.write('%s' % (avg_travtmtot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_travtmtot_scooter[i]) + ',')
            
# ========================================== 將平均每回合總停等次數寫入txt檔 ==========================================
with open('test_results/StopsTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_stopstot_car)):
        if i == len(avg_stopstot_car) - 1:
            f.write('%s' % (avg_stopstot_car[i]) + ']')
        else:
            f.write('%s' % (avg_stopstot_car[i]) + ',')
        
with open('test_results/StopsTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_stopstot_scooter)):
        if i == len(avg_stopstot_scooter) - 1:
            f.write('%s' % (avg_stopstot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_stopstot_scooter[i]) + ',')
            
# ========================================== 將平均每回合總延滯時間寫入txt檔 ==========================================
with open('test_results/DelayTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delaytot_car)):
        if i == len(avg_delaytot_car) - 1:
            f.write('%s' % (avg_delaytot_car[i]) + ']')
        else:
            f.write('%s' % (avg_delaytot_car[i]) + ',')
        
with open('test_results/DelayTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delaytot_scooter)):
        if i == len(avg_delaytot_scooter) - 1:
            f.write('%s' % (avg_delaytot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_delaytot_scooter[i]) + ',')

In [None]:
env.stop_simulation()
env.del_pre_simulation()

In [None]:
env.close()