This notebook uses a vanilla (basic) Bayesian optimization algorithm to tackle an urban travel demand (i.e., origin-destination, OD) calibration problem. The traffic simulations are based on the SUMO simulator.


# SUMO configuration


Mount GDrive


If you are working w/ colab rather than a jupyterlab notebook this drive mounting and sumo installation will need to be done every time you restart the runtime.


In [1]:
# from google.colab import drive
# drive.mount('/content/gdrive')

Install SUMO


In [2]:
# %sudo add-apt-repository -y ppa:sumo/stable
# %sudo apt-get update
# %sudo apt-get -y install sumo sumo-tools sumo-doc &

Set sumo env vars


In [3]:
# Set environment variable
import os
import sys
os.environ['SUMO_HOME'] = '/usr/share/sumo'
os.environ['LIBSUMO_AS_TRACI'] = '1' #Optional: for a huge performance boost (~8x) with Libsumo (No GUI)

if "SUMO_HOME" in os.environ:
    tools = os.path.join(os.environ["SUMO_HOME"], "tools")
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")
#import traci


# Macros / utils


In [4]:
base_path = "/home/bench/Gitsrcs/origin_destination_bayes_opt"

# if base_path has a space in it, the sumo code will not work
if ' ' in base_path:
    raise ValueError("base_path should not contain spaces")

os.chdir(base_path)

In [5]:
# install missing packages
%pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
import os
import numpy as np
import pandas as pd
import pprint
import matplotlib.pyplot as plt
from pathlib import Path

import torch
from torch.quasirandom import SobolEngine

from botorch import fit_gpytorch_mll
from botorch.acquisition import qLogExpectedImprovement
from botorch.models import SingleTaskGP
from botorch.models.transforms import Standardize
from botorch.optim import optimize_acqf
from botorch.sampling.stochastic_samplers import StochasticSampler
from botorch.utils.transforms import unnormalize, normalize

from gpytorch.constraints import Interval
from gpytorch.kernels import MaternKernel, ScaleKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood


from bayesian_optimization.helpers import (load_kwargs_config, 
                    compute_nrmse_counts_all_edges, 
                    parse_loop_data_xml_to_pandas, 
                    create_taz_xml,
                    simulate_od,
                    od_xml_to_df,
                    )

In [7]:
config = load_kwargs_config(base_path, "gridsearch")
Path(config["simulation_run_path"]).mkdir(parents=True, exist_ok=True)
pprint.pprint(dict(config))

{'BATCH_SIZE': 1,
 'EDGE_OUT_STR': 'edge_data.xml',
 'NITER': 100,
 'NUM_RESTARTS': 5,
 'RAW_SAMPLES': 32,
 'SAMPLE_SHAPE': 128,
 'SUMO_PATH': '/usr/share/sumo',
 'TRIPS2ODS_OUT_STR': 'trips.xml',
 'additional_xml': PosixPath('/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/additional.xml'),
 'edge_selection': PosixPath('/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/edge_selection.txt'),
 'file_gt_od': PosixPath('/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/od.xml'),
 'fixed_routes': PosixPath('/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/routes.csv'),
 'model_name': 'gridsearch',
 'n_init_search': 30,
 'net_xml': PosixPath('/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/net.xml'),
 'network_name': 'quickstart_underdetermined',
 'network_path': PosixPath('network/quickstart_underdetermined'),
 'od_du

# Create GT (ground truth) scenario


In [8]:
# Get Ground Truth OD + fixed routes
print(f"Reading: {config['file_gt_od']}")
gt_od_df = od_xml_to_df(config["file_gt_od"])

print(f"Reading: {config['fixed_routes']}")
routes_df = pd.read_csv(config["fixed_routes"], index_col=0)

# if config["edge_selection"] exists
if "edge_selection" in config:
    if not os.path.exists(config["edge_selection"]):
        edge_selection = None
    else:
        print(f"Reading: {config['edge_selection']}")
        edge_selection = pd.read_csv(config["edge_selection"], header=None)
        edge_selection.columns = ["edge_id"]
        edge_selection = edge_selection["edge_id"].tolist()
else:
    edge_selection = None


Reading: /home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/od.xml
total GT demand:  1400.0
Reading: /home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/routes.csv
Reading: /home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/edge_selection.txt


Simulate the GT scenario to obtain the GT traffic statistics


In [9]:
simulation_gt_run_path =f'{config["simulation_run_path"]}/ground_truth'
prefix_output_gt = f'{simulation_gt_run_path}/sim'
sim_edge_out_gt = f'{prefix_output_gt}_{config["EDGE_OUT_STR"]}'
new_od_xml = f'{simulation_gt_run_path}/od.xml'

Path(simulation_gt_run_path).mkdir(parents=True, exist_ok=True)

base_od = gt_od_df.copy()
gt_od_vals = gt_od_df['count'].astype(float).to_numpy()
curr_od = gt_od_vals.copy()
base_od['count'] = curr_od
base_od = base_od.rename(columns={'fromTaz':'from', 'toTaz':'to'})        
create_taz_xml(new_od_xml, base_od, config["od_duration_sec"], base_path)
print(base_od)

# Run simulation
simulate_od(new_od_xml, 
            prefix_output_gt, 
            base_path, 
            config["net_xml"], 
            config["taz2edge_xml"], 
            config["additional_xml"],
            routes_df,
            config["sim_end_time"],
            config["TRIPS2ODS_OUT_STR"])



Created  /home/bench/Gitsrcs/origin_destination_bayes_opt/output/quickstart_underdetermined_gridsearch/ground_truth/od.xml
    from     to  count  data
0  taz91  taz93  500.0  None
1  taz91  taz94  900.0  None
od2trips  --spread.uniform --taz-files /home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/taz.xml --tazrelation-files output/quickstart_underdetermined_gridsearch/ground_truth/od.xml -o /home/bench/Gitsrcs/origin_destination_bayes_opt/output/quickstart_underdetermined_gridsearch/ground_truth/sim_trips_beforeRteUpdates.xml
Success.time 299.17
###### Running SUMO #######
Seed 0
Loading net-file from '/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/net.xml' ... done (2ms).
Loading additional-files from '/home/bench/Gitsrcs/origin_destination_bayes_opt/network/quickstart_underdetermined/additional.xml' ... done (1ms).
Loading done.
Simulation version 1.12.0 started with time: 0.00
Step #57600.00 (0ms ?*RT. ?UPS, veh

Read and process the GT simulation outputs


In [10]:
df_edge_gt, _, _ = parse_loop_data_xml_to_pandas(base_path, sim_edge_out_gt, prefix_output_gt, config["SUMO_PATH"], edge_list=edge_selection)
# picking at edges as GT edges
num_gt_edges = df_edge_gt.shape[0]
print("Number of GT edges:",num_gt_edges)
gt_edge_data = df_edge_gt\
    .sort_values(by=['interval_nVehContrib'], ascending=False)\
    .iloc[:num_gt_edges]

print(sim_edge_out_gt)
print(gt_edge_data.head())

Filtering edges to [['D2', 'D4', 'D7', 'D5']]
Number of GT edges: 3
output/quickstart_underdetermined_gridsearch/ground_truth/sim_edge_data.xml
  edge_id  interval_nVehContrib  interval_harmonicMeanSpeed
0      D2                1400.0                    8.591667
2      D7                 900.0                   11.520000
1      D5                 500.0                   11.076667


# Optimization

Bayesian optimization utils / helpers


In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
dtype = torch.double

dim_od = gt_od_df.shape[0]
print(dim_od)

bounds = torch.tensor([
    [ 0 for _ in range(dim_od)],
    [ 2000 for _ in range(dim_od)]
], device=device, dtype=dtype) 

cuda:0
2


## Create and simulate a full-grid search.


In [12]:
# full grid search
n_full_search = 201
candidates = []

# print(dim_od)
for i in range(dim_od):
    candidates.append(torch.linspace(0,1,n_full_search))

search_space = torch.meshgrid(candidates,indexing="ij")
search_space = torch.stack(search_space , 0)
search_space.shape
search_space = search_space.view(dim_od, -1)
search_space = search_space.transpose(0,1)
print(f"search_space shape = {search_space.shape}")
search_space = search_space.to(device)

# map the normalized into the original parameter space
train_X0 = unnormalize(search_space, bounds)
train_X0 = train_X0[1:,:]
train_X0

search_space shape = torch.Size([40401, 2])


tensor([[   0.0000,   10.0000],
        [   0.0000,   20.0000],
        [   0.0000,   30.0000],
        ...,
        [2000.0000, 1980.0000],
        [2000.0000, 1990.0000],
        [2000.0000, 2000.0000]], device='cuda:0', dtype=torch.float64)

In [24]:
ods_epsilon = []
loss_all = []
batch_data_i = torch.ones((search_space.shape[0],dim_od)).to(train_X0.device) * np.nan
batch_data_i[:,0:dim_od] = train_X0

# Base OD which we will update their count entries
base_od = gt_od_df.copy()
gt_od_vals = gt_od_df['count'].astype(float).to_numpy()

for i , x in enumerate(train_X0.tolist()):
      print(f"########### OD: {i} ###########")
      print(x)
      
      simulation_run_path_grid =f'{config["simulation_run_path"]}/full_search'
      Path(simulation_run_path_grid).mkdir(parents=True, exist_ok=True)
      
      # new_od_xml = f"{simulation_run_path_grid}/grid_od_{config['network_name']}_{i}.xml"
      # prefix_output_grid = f'{simulation_run_path_grid}/grid_{i}'

      new_od_xml = f"{simulation_run_path_grid}/grid_od_{config['network_name']}_temp.xml"
      prefix_output_grid = f'{simulation_run_path_grid}/grid_temp'

      # Generate OD
      #curr_od = gt_od_vals.copy()
      curr_od = np.array(x)

      print(f'total expected GT demand: {np.sum(curr_od)}')

      ###
      # create OD xml file 
      ###
      base_od['count'] = curr_od
      # round to 1 decimal point
      base_od['count'] = [round(elem, 1) for elem in base_od['count']]     
      base_od = base_od.rename(columns={'fromTaz':'from', 'toTaz':'to'})        
      create_taz_xml(new_od_xml, base_od, config["od_duration_sec"], base_path)
      ods_epsilon.append(curr_od)

      # simulate gridial search
      simulate_od(new_od_xml, 
                  prefix_output_grid, 
                  base_path, 
                  config["net_xml"], 
                  config["taz2edge_xml"], 
                  config["additional_xml"],
                  routes_df,
                  config["sim_end_time"],
                  config["TRIPS2ODS_OUT_STR"])

      ## Compute loss
      #prefix_output = f'full_search/sobol_{i}'
      sim_edge_out = f'{base_path}/{prefix_output_grid}_{config["EDGE_OUT_STR"]}'
      print(sim_edge_out)
      curr_loop_stats, _, _ = parse_loop_data_xml_to_pandas(base_path, sim_edge_out,prefix_output_grid,config["SUMO_PATH"], edge_list=edge_selection)
      curr_loss = compute_nrmse_counts_all_edges(gt_edge_data, curr_loop_stats)

      loss_all.append(curr_loss)
      print(f"############## loss: {curr_loss} ##############")

      # Parse training data
      # df_curr = pd.DataFrame(curr_od.reshape(1,dim_od),
                        # columns = [f"x_{i+1}" for i in range(dim_od)])
      # df_curr['loss'] = curr_loss
      # batch_data_i.append(df_curr)
      batch_data_i[i,0:dim_od] = curr_od
      batch_data_i[i,dim_od] = curr_loss

      # Save numpy to csv
      np.savetxt(f"{simulation_run_path_grid}/data_set_ods_0_2000.csv", batch_data_i, delimiter=",")

RuntimeError: The expanded size of the tensor (40401) must match the existing size (40400) at non-singleton dimension 0.  Target sizes: [40401, 2].  Tensor sizes: [40400, 2]

In [23]:
batch_data_i

array([[nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       ...,
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan]])