In [22]:
import json
import xml.etree.ElementTree as ET
from pathlib import Path

def load_experiment_metadata(input_path: str):

    config = json.load(open(input_path + "/config.json"))
    config["NETWORK"] = Path(config["NETWORK"])
    config["SUMO"] = Path(config["SUMO"])
    # [CO] updated simulation setup to use OD scenario #4: 42-dimensional  + low congestion.
    #sim_setup = json.load(open(input_path + "simulation_setups_co_gt.json"))
    sim_setup = json.load(open(input_path + "simulation_setups_lowCongestion_5minDemand.json"))

    return config, sim_setup



In [23]:
input_path = '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/metadata/'
config, sim_setup = load_experiment_metadata(input_path)

sim_setup

{'objective': 'counts',
 'net': 'SFO.net.xml',
 'route2od': 'trips24h_smoothed.rou.xml',
 'taz': 'taz.xml',
 'add': 'additional.add.xml',
 'start_sim_sec': 0,
 'end_sim_sec': 3600,
 'current_od': 'current_lowCongestion_5minDemand_od.xml',
 'init_od': 'initOD_lowCongestion_5minDemand.xml',
 'gt_od': 'initOD_lowCongestion_5minDemand.xml',
 'n_sumo_replicate': 1,
 'prefix_gt': 'gt',
 'prefix_sim_run': 'lowCongestion_5minDemand'}

In [24]:
import numpy as np

import xml.etree.ElementTree as ET
from pathlib import Path

def generate_od_xml(x, config, sim_setup):

    init_od_path = f"{config['NETWORK']}/{sim_setup['init_od']}"

    if Path(init_od_path).is_file():
        print("Reading:",init_od_path)
        tree = ET.parse(init_od_path)
        root = tree.getroot()

        for i,x in enumerate(x):
            root[0][i].attrib["count"] = str(np.round(x,4))
            
    file_name = f"{config['NETWORK']}/{sim_setup['current_od']}"
    print('Saving: '+file_name)
    tree.write(file_name)

### Find upper/lower bounds based on distribution of init

In [25]:
import pandas as pd

def transform_od_xml_to_pandas(file_path_xml):

    tree = ET.parse(file_path_xml)
    root = tree.getroot()

    od_mat = []
    for taz_elem in root.iter('tazRelation'):
        od_mat.append([taz_elem.get('from'), taz_elem.get('to'), int(taz_elem.get('count'))])
    
    df_od = pd.DataFrame(od_mat, columns=['tazFrom', 'tazTo', 'tazCount'])

    return df_od

In [26]:
file_path_xml = str(config['NETWORK']) + "/" + sim_setup['gt_od']
df_gt = transform_od_xml_to_pandas(file_path_xml)

In [27]:
import torch

device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
dtype = torch.double

### Declare search space
# dimensionality of input space

dim_od = df_gt.shape[0]

bounds = torch.tensor([
    [ df_gt[ 'tazCount'].min() - 1 for _ in range(dim_od)],
    [ df_gt[ 'tazCount'].max() + 1 for _ in range(dim_od)]
], device=device, dtype=dtype) 

bounds


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.],
        [4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
         4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
         4., 4., 4., 4., 4., 4.]], dtype=torch.float64)

In [28]:
# Sample according to Sobol
from torch.quasirandom import SobolEngine
from botorch.utils.transforms import unnormalize

n0 = 2
sobol = SobolEngine(dim_od, scramble=True)
x_0 = sobol.draw(n0).to(dtype=dtype)

# map the normalized into the original parameter space
train_X0 = unnormalize(x_0, bounds)
train_X0

tensor([[2.9367, 3.7742, 2.5638, 0.6628, 0.0340, 0.9893, 1.3211, 2.2440, 1.8314,
         0.1908, 3.2780, 2.4457, 1.3153, 2.9470, 1.8286, 3.4148, 1.5895, 1.4882,
         2.9343, 0.4655, 3.5241, 1.3192, 1.4800, 0.0538, 3.0220, 1.8021, 3.8500,
         0.3535, 0.1370, 3.5102, 3.2129, 2.5796, 1.6473, 1.9427, 3.7112, 1.8400,
         1.9732, 0.0596, 0.9513, 3.2730, 2.6288, 1.7996],
        [0.3731, 0.7778, 0.1070, 3.0692, 3.3205, 2.7221, 2.2477, 1.2081, 3.1072,
         2.2342, 1.5109, 0.8405, 2.7829, 0.5560, 2.9194, 0.4542, 2.5379, 2.0532,
         1.8406, 3.2293, 1.5047, 2.8383, 2.2440, 2.3685, 1.5270, 2.4982, 1.0354,
         3.3493, 3.8999, 0.8262, 1.5903, 1.6505, 3.9909, 2.9596, 0.3146, 3.0141,
         2.1815, 2.1907, 3.9846, 0.5715, 0.2115, 2.0838]], dtype=torch.float64)

In [29]:

x_i = train_X0.cpu().detach().numpy().tolist()
generate_od_xml(x_i[0], config, sim_setup)

Reading: /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/initOD_lowCongestion_5minDemand.xml
Saving: /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od.xml


## Run simulations

In [30]:
import os

def run_sumo(config, sim_setup, prefix_input, prefix_output):
    od2trips_cmd = (
        #f"od2trips --no-step-log  --spread.uniform "
        f"od2trips  --spread.uniform "
        #Loads TAZ (districts)
        f"--taz-files {config['NETWORK']}/{sim_setup['taz']} " 
        # Loads O/D-matrix in tazRelation format fromFILE(s)
        f"--tazrelation-files {config['NETWORK']}/{sim_setup[f'{prefix_input}_od']} "
        # Writes trip definitions into FILE
        f"-o {config['NETWORK']}/{prefix_output}_{sim_setup['prefix_sim_run']}_od_trips.trips.xml " 
    )

    # Run SUMO to generate outputs
    sumo_run = (
        # Prefix which is applied to all output files. 
        f"sumo --output-prefix {prefix_output}_{sim_setup['prefix_sim_run']}_ " 
        # Do not check whether routes are connected
        f"--ignore-route-errors=true "
        # Load road network description from FILE
        f"--net-file={config['NETWORK']/sim_setup['net']} "
        # Load routes descriptions from FILE(s)
        f"--routes={config['NETWORK']}/{prefix_output}_{sim_setup['prefix_sim_run']}_od_trips.trips.xml "
        #  -b Defines the begin time in seconds; The simulation starts at this time
        # -e Defines the end time in seconds; The simulation ends at this time
        f"-b {sim_setup['start_sim_sec']} -e {sim_setup['end_sim_sec']} "
        # Load further descriptions from FILE(s)
        f"--additional-files {config['NETWORK']/sim_setup['add']} "
        f"--duration-log.statistics "
        f"--xml-validation never "
        # Save single vehicle route info into FILE
        f"--vehroutes {config['NETWORK']}/routes.vehroutes.xml "
        f"--verbose "
        # Disables output of warnings
        f"--no-warnings "
        # Faster simulation (i.e. less detailed)
        f"--mesosim true "
    
    )
        # f"--seed {seed}"

    try:
        print(od2trips_cmd)
        os.system(od2trips_cmd)
    except:
        print("Unable to create trips file")
    else:
        print("###### Running SUMO #######")
        print(sumo_run)
        os.system(sumo_run)


In [31]:


# Evaluate batch
train_X0_numpy = train_X0.cpu().detach().numpy().tolist()
len(train_X0_numpy)


2

In [81]:
prefix_input = "gt"
prefix_output = "gt"
run_sumo(config, sim_setup, prefix_input, prefix_output)

od2trips  --spread.uniform --taz-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/taz.xml --tazrelation-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/initOD_lowCongestion_5minDemand.xml -o /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/gt_lowCongestion_5minDemand_od_trips.trips.xml 
Success.time 3000.00
###### Running SUMO #######
Loading net-file from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/SFO.net.xml' ... 
done (33166ms).
Loading additional-files from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/additional.add.xml' ... done (315ms).
Loading done.
Simulation version 1.16.0 started with time: 0.00.
Step #3600.00 (62ms ~= 16.13*RT, ~16.13UPS, vehicles TOT 47 ACT 1 BUF 0)                  
Simulation ended at time: 3600.00
Reason: The final simulation step has been reached.
Performance: 
 Duration: 20.51s
 Real time factor: 175.498
 UPS: 989.713840
Vehicles: 
 Inserted: 47
 Running: 1
 Waiting: 0
Statistic

In [75]:
prefix_input = "current"
prefix_output = "current"
run_sumo(config, sim_setup, prefix_input, prefix_output)

od2trips  --spread.uniform --taz-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/taz.xml --tazrelation-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od.xml -o /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od_trips.trips.xml 
Success.time 3000.00
###### Running SUMO #######
Loading net-file from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/SFO.net.xml' ... 
done (32956ms).
Loading additional-files from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/additional.add.xml' ... done (291ms).
Loading done.
Simulation version 1.16.0 started with time: 0.00.
Step #3600.00 (114ms ~= 8.77*RT, ~61.40UPS, vehicles TOT 85 ACT 7 BUF 0)                  
Simulation ended at time: 3600.00
Reason: The final simulation step has been reached.
Performance: 
 Duration: 21.74s
 Real time factor: 165.631
 UPS: 1695.238095
Vehicles: 
 Inserted: 85
 Running: 7
 Waiting: 0

In [32]:
# excluding warmup period of 15min
# so you need to simulate for at least 15min now. 
warm_up_sec = 15*60 # duration in seconds

# duration, in seconds, of each edge simulation output statistics
# this value should be consistent with what is defined in additional.add.xml
edge_stats_freq = 300

In [33]:
import os
import pandas as pd
from pandas import DataFrame
import numpy as np

def parse_loop_data_xml_to_pandas(config: dict,loop_file: dict) -> DataFrame: 
    """Read the Loop Detectors Data: Each SUMO run produces a file with the
    traffic counts. This function reads the corresponding traffic counts file
    averages across simulation replications
    
    """

    output_file =(config["NETWORK"] / "loopOutputs.csv")
    ## See output explanation:
    # https://sumo.dlr.de/docs/Simulation/Output/Lane-_or_Edge-based_Traffic_Measures.html#generated_output

    data2csv = (
        f"python {config['SUMO']}/tools/xml/xml2csv.py "
        f"{loop_file} "
        f"-o {output_file}"
        )
    
    os.system(data2csv)
    
    df_trips = pd.read_csv(output_file, sep=";", header=0)

    # edge flow in vehicles per hour
    ## edge speed is given in m/s
    ## edge_density is given in no. of vehicles/km

    df_trips['interval_nVehContrib'] = df_trips['edge_arrived'] + df_trips['edge_left']
    #df_trips['interval_nVehContrib'] = 3.6*df_trips['edge_speed']*df_trips['edge_density']


    #df_trips['EdgeID'] = df_trips['edge_id']

    # edge speed is given in m/s
    # computed only for edges that have departed flow
    df_trips['interval_harmonicMeanSpeed'] = df_trips[df_trips['interval_nVehContrib']>0]['edge_speed']
    
    # exclude warm-up period
    df_trips = df_trips[df_trips['interval_begin']>warm_up_sec]

    # aggregate the rest of the time intervals
    df_trips = df_trips[df_trips['interval_begin']>warm_up_sec]
    df_agg = df_trips.groupby(by=['edge_id'], as_index=False).agg(
        {'interval_nVehContrib':np.sum, 'interval_harmonicMeanSpeed':np.mean})


    return df_agg, df_trips

In [34]:
loop_stats_df_gt, _ = parse_loop_data_xml_to_pandas(
    config,
    loop_file = f"/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/gt_{sim_setup['prefix_sim_run']}_edge_data_SFO.xml"
)


loop_stats_df_current, _ = parse_loop_data_xml_to_pandas(
    config,
    loop_file = f"/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_{sim_setup['prefix_sim_run']}_edge_data_SFO.xml"
)


In [35]:
import numpy as np


def compute_nrmse_counts_one_edge(df_true, df_simulated,GT_edge_id):
    # Merge simulated output with ground truth
    df1 = df_true\
        .merge(df_simulated, on=['edge_id'],
        suffixes=('_GT', '_sim'), how='left')
    
    # only consider GT_edge_id of interest
    df1 = df1[df1['edge_id']==GT_edge_id]

    df1['interval_nVehContrib_sim'] = df1['interval_nVehContrib_sim'].fillna(0)
        
    df1['diff_square'] = (
        df1['interval_nVehContrib_GT'] - df1['interval_nVehContrib_sim']
        )**2
    
    n = df1.shape[0]
    print(n)
    print(df_true.shape[0])
    print(df_simulated.shape[0])
    RMSN = np.sqrt(n*(df1['diff_square'].sum()))/df1['interval_nVehContrib_GT'].sum()

    return RMSN


def compute_nrmse_counts_all_edges(df_true, df_simulated):
    # Merge simulated output with ground truth
    df1 = df_true\
        .merge(df_simulated, on=['edge_id'],
        suffixes=('_GT', '_sim'), how='left')
    
    df1['interval_nVehContrib_sim'] = df1['interval_nVehContrib_sim'].fillna(0)
        
    df1['diff_square'] = (
        df1['interval_nVehContrib_GT'] - df1['interval_nVehContrib_sim']
        )**2
    
    n = df1.shape[0]
    print(n)
    print(df_true.shape[0])
    print(df_simulated.shape[0])
    RMSN = np.sqrt(n*(df1['diff_square'].sum()))/df1['interval_nVehContrib_GT'].sum()

    return RMSN



In [36]:
y_i = compute_nrmse_counts_all_edges(loop_stats_df_gt, loop_stats_df_current)

1238
1238
1477


In [37]:
y_i

0.833699363478993

In [38]:
# Sample according to Sobol
from torch.quasirandom import SobolEngine
from botorch.utils.transforms import unnormalize

n0 = 50
sobol = SobolEngine(dim_od, scramble=True)
x_0 = sobol.draw(n0).to(dtype=dtype)

# map the normalized into the original parameter space
train_X0 = unnormalize(x_0, bounds)
train_X0

tensor([[3.5959, 0.6227, 2.1713,  ..., 0.5682, 3.2522, 2.3849],
        [1.5626, 3.0795, 1.5042,  ..., 2.8088, 1.0908, 1.6330],
        [0.4941, 1.2709, 3.0409,  ..., 1.9516, 2.6147, 0.8581],
        ...,
        [3.7542, 3.9914, 2.7665,  ..., 3.7279, 0.8292, 3.5369],
        [3.9130, 0.2735, 0.5269,  ..., 2.7090, 3.2001, 0.7288],
        [1.8797, 3.8016, 3.1725,  ..., 0.9218, 1.3929, 3.4769]],
       dtype=torch.float64)

In [39]:
prefix_input = "current"
prefix_output = "current"

loop_file = f"/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/{prefix_output}_{sim_setup['prefix_sim_run']}_edge_data_SFO.xml"

train_x = train_X0.cpu().detach().numpy().tolist()
loss = []
for i, x_i in enumerate(train_x):

    print(f"########### Sampling location={i+1} ###########")

    # Generate OD
    generate_od_xml(x_i, config, sim_setup)

    # Query simulator
    run_sumo(config, sim_setup, prefix_input, prefix_output)

    # Compute metrics
    loop_stats_df_current, _ = parse_loop_data_xml_to_pandas(config,loop_file)

    # Compute loss with respect to gt
    y_i = compute_nrmse_counts_all_edges(loop_stats_df_gt, loop_stats_df_current)

    # append loss
    loss.append(y_i)


train_x = train_X0.cpu().detach().numpy().tolist()
df_train = pd.DataFrame(train_x,
            columns = [f"x_{i+1}" for i in range(dim_od)])
df_train['loss'] = loss

df_train.to_csv(f"./{sim_setup['prefix_sim_run']}_initial_data_bo.csv",index=False)

########### Sampling location=1 ###########
Reading: /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/initOD_lowCongestion_5minDemand.xml
Saving: /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od.xml
od2trips  --spread.uniform --taz-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/taz.xml --tazrelation-files /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od.xml -o /Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/current_lowCongestion_5minDemand_od_trips.trips.xml 
Success.time 3150.00
###### Running SUMO #######
Loading net-file from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/SFO.net.xml' ... 
done (29795ms).
Loading additional-files from '/Users/rodrse/Downloads/calibrate_ods_carolina/network/SFO/additional.add.xml' ... done (283ms).
Loading done.
Simulation version 1.16.0 started with time: 0.00.
Step #3600.00 (96ms ~= 10.42*RT, ~72.92UPS, veh