In [3]:
import pandas as pd 
import numpy as np
import pickle
from collections import defaultdict
import tensorflow as tf

def read_file(filename):
  with open(filename, "rb") as file:
      stat = pickle.load(file)
      file.close()
  return stat

def get_origin_path(stat):
    path_link = stat['data']['paths_link']
    od = [k for k in path_link.keys()]
    path1 = [tuple(p[0]) for p in path_link.values()]
    path2 = [tuple(p[1]) for p in path_link.values()]
    path3 = [tuple(p[2]) for p in path_link.values()]

    demand_dic = stat["data"]["demand"]
    demand = [v for v in demand_dic.values()]
    path_link_df = pd.DataFrame({"od": od, "demand":demand, "path1": path1, "path2": path2, "path3": path3})
    return path_link_df

def get_UE_link_cost(stat):
    # return a dataframe of link cost, link flow
    link = stat['data']['network'].copy()
    link['link_flow'] = stat['link_flow']
    # Calculate link cost
    link['link_cost'] = round(link['free_flow_time']*\
                            (1+link['b']*((link['link_flow']/link['capacity'])**1)), 2)
    return link

# Calculate path travel time for each od pair
def calculate_path_cost(row, link_df):
    sum_time = 0
    for l in row:
        sum_time += link_df.at[l, 'link_cost']
    return round(sum_time, 2)

# calculate each link flow based on path flow
def extract_link_flow(path_link, flows):
    # input: a dictionary of {od pair: path_link} and list of flow distribution
    # return a dictionary of link flow
    path_flow = {}
    for path_set, flow_set in zip(path_link.values(), flows):
        for path, flow in zip(path_set, flow_set):
            path_flow[tuple(path)] = flow

    aggregated_sums = defaultdict(float)
    for path, flow in path_flow.items():
        for link in path:
            aggregated_sums[link] += flow
    link_flow = dict(aggregated_sums)
    return link_flow

In [26]:
# Check UE of origin dataset 
filename = '../Output/5by5_Data1800'
stat = read_file(filename)
path_link_df = get_origin_path(stat)
UE_link = get_UE_link_cost(stat)

path_link_df['path1_cost'] = path_link_df['path1'].apply(lambda x: calculate_path_cost(x, UE_link))
path_link_df['path2_cost'] = path_link_df['path2'].apply(lambda x: calculate_path_cost(x, UE_link))
path_link_df['path3_cost'] = path_link_df['path3'].apply(lambda x: calculate_path_cost(x, UE_link))

flows = stat['path_flow']
path_link_df['flow1'] = [f[0] for f in flows]
path_link_df['flow2'] = [f[1] for f in flows]
path_link_df['flow3'] = [f[2] for f in flows]

path_link_df.head()

Unnamed: 0,od,demand,path1,path2,path3,path1_cost,path2_cost,path3_cost,flow1,flow2,flow3
0,"(6, 22)",1878,"(18, 24, 42, 60)","(20, 36, 42, 60)","(20, 38, 54, 60)",4.05,4.05,4.03,0,0,1878
1,"(23, 24)",976,"(76,)","(65, 62, 68)","(75, 61, 58, 62, 68)",1.0,2.88,4.66,976,0,0
2,"(1, 24)",614,"(0, 4, 8, 14, 32, 50, 68)","(2, 18, 22, 26, 32, 50, 68)","(0, 6, 22, 26, 32, 50, 68)",7.18,6.98,7.07,0,614,0
3,"(6, 12)",871,"(18, 24)","(20, 36)","(18, 22, 28, 41)",1.97,1.97,3.98,0,871,0
4,"(1, 4)",1478,"(0, 4, 8)","(2, 18, 7, 4, 8)","(0, 6, 22, 11, 8)",3.38,4.96,5.05,1478,0,0


Read predicted output

In [4]:
def load_from_file(filename):
    with open(filename, 'r') as f:
        data = np.loadtxt(f)
    num_tensors = data.size // (3 * 625)
    reshaped_data = data.reshape((num_tensors, 3, 625))
    tensors = [tf.convert_to_tensor(reshaped_data[i], dtype=tf.float32) for i in range(num_tensors)]
    return tensors

predicted_values = load_from_file('../predicted_values.txt')
print(len(predicted_values))

10


In [23]:
def extract_flow(tensor):
  # input: a single tensor of predicted path flow
  # return a dataframe of full information: od pair, demand, path set, predicted path flow
  final_dict = {}
  for sub_flow_index, sub_flow in enumerate(tensor):
    sub_flow_dict = {(i+1, j+1): int(sub_flow[i, j]) for i in range(sub_flow.shape[0]) for j in range(sub_flow.shape[1])}

    for key, value in sub_flow_dict.items():
        if key not in final_dict:
            final_dict[key] = [None] * tensor.shape[0]
        final_dict[key][sub_flow_index] = value
  final_dict = {k: v for k, v in final_dict.items() if not all(val == 0 for val in v)}
  return final_dict

def create_pred_df(tensor, stat):
  final_dict = extract_flow(tensor)
  print("Number of OD pairs predicted: ", len(final_dict))
  print("Number of origin OD pairs: ", len(stat['path_flow']))
  
  flow_df = pd.DataFrame.from_dict(final_dict, orient='index', columns=['pred_f1', 'pred_f2', 'pred_f3']).reset_index()
  flow_df.rename(columns={'index': 'od'}, inplace=True)
  pred_df = get_origin_path(stat)[['od', 'demand', 'path1', 'path2', 'path3']]
  pred_df = pd.merge(pred_df, flow_df, how='left', on='od')
  nan_val = pred_df['pred_f1'].isna().sum()
  # Percentage of nan value
  print("Nan values: ", nan_val, " -- ", round(nan_val/len(stat['path_flow'])*100,2), "%")
  pred_df = pred_df.fillna(0)
  return pred_df

# Calculate link flow from pred path flow
def sum_pred_link_flow(pred_df, stat):
    pred_path_flow = pred_df[['pred_f1', 'pred_f2', 'pred_f3']].values.tolist()
    path_link = stat['data']['paths_link']

    pred_link_flow = extract_link_flow(path_link, pred_path_flow)
    pred_link_flow = pd.DataFrame.from_dict(pred_link_flow, orient='index', columns=['pred_link_flow']).sort_index(ascending=True).reset_index()
    pred_link_flow.rename(columns={'index': 'link_id'}, inplace=True)
    link = stat['data']['network'].copy()[['link_id', 'capacity', 'free_flow_time', 'b']]
    output = pd.merge(link, pred_link_flow, how='left', on='link_id')
    output = output.fillna(0)
    output['link_cost'] = round(output['free_flow_time']*\
                            (1+output['b']*((output['pred_link_flow']/output['capacity'])**1)), 2)
    return output

def calculate_delay(pred_df, pred_link_flow):
    pred_df['path1_cost'] = pred_df['path1'].apply(lambda x: calculate_path_cost(x, pred_link_flow))
    pred_df['path2_cost'] = pred_df['path2'].apply(lambda x: calculate_path_cost(x, pred_link_flow))
    pred_df['path3_cost'] = pred_df['path3'].apply(lambda x: calculate_path_cost(x, pred_link_flow))
    pred_df['min_path_cost'] = pred_df[['path1_cost', 'path2_cost', 'path3_cost']].min(axis=1)
    pred_df['delay'] = (
        pred_df['pred_f1'] * (pred_df['path1_cost'] - pred_df['min_path_cost']) +
        pred_df['pred_f2'] * (pred_df['path2_cost'] - pred_df['min_path_cost']) +
        pred_df['pred_f3'] * (pred_df['path3_cost'] - pred_df['min_path_cost'])
    )
    avg_delay = pred_df['delay'].sum()/pred_df['demand'].sum()
    #return average delay in minutes
    return avg_delay*60

def single_avg_delay(pred_tensor, filename):
    stat = read_file(filename)
    a = tf.reshape(pred_tensor, (3, 25, 25))
    pred_df = create_pred_df(a, stat)
    pred_link_flow = sum_pred_link_flow(pred_df, stat)
    avg_delay = calculate_delay(pred_df, pred_link_flow)
    return avg_delay

In [30]:
%run ../parameters.py
p = Params()

# Check number of OD pair in origin dataset 
start_from=1800
files = []
for i in range(10):
    file_name = ''.join([p.base_path, str(start_from+i)])
    files.append(file_name)
    

In [31]:
# Check len of predicted path flow distribution
filename = '../Output/5by5_Data1800'

sum_delay = 0
for i, filename in zip(predicted_values, files):
    avg_delay = single_avg_delay(i, filename)
    sum_delay += avg_delay
    print(f"Average delay: {round(avg_delay,3)} mins")
    print("-----------------------------")
print("avg delay of prediction: ", sum_delay/len(predicted_values))

Number of OD pairs predicted:  322
Number of origin OD pairs:  127
Nan values:  54  --  42.52 %
Average delay: 3.817 mins
-----------------------------
Number of OD pairs predicted:  317
Number of origin OD pairs:  127
Nan values:  54  --  42.52 %
Average delay: 4.133 mins
-----------------------------
Number of OD pairs predicted:  311
Number of origin OD pairs:  127
Nan values:  51  --  40.16 %
Average delay: 1.65 mins
-----------------------------
Number of OD pairs predicted:  315
Number of origin OD pairs:  127
Nan values:  54  --  42.52 %
Average delay: 3.999 mins
-----------------------------
Number of OD pairs predicted:  318
Number of origin OD pairs:  127
Nan values:  58  --  45.67 %
Average delay: 3.455 mins
-----------------------------
Number of OD pairs predicted:  317
Number of origin OD pairs:  127
Nan values:  60  --  47.24 %
Average delay: 3.42 mins
-----------------------------
Number of OD pairs predicted:  322
Number of origin OD pairs:  127
Nan values:  43  --  33