# Impedance Calibration Test Run

**Overview:**
1. Network Preparation
1. Import Train and Test Sets
2. Specify Calibration Parameters
    - Link Impedance Function
    - Turn Impedance Function
    - Objective/Loss Function
        - First Preference Recovery
        - Exact Overlap
        - Buffer Overlap (in development)
        - Frechet Distance/Area (in development)
3. Run Calibration
    - Particle Swarm Optimization (constrained & non-probabilistic)
    - Maximum likelihood estimation (unconstrained & probabilistic, in development)
4. Export and Run Post Calibration


In [None]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import stochastic_optimization
from tqdm import tqdm
import similaritymeasures
import random

from shapely.ops import LineString, MultiLineString

import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

from network.src import modeling_turns
import speedfactor

# Network Import

In [None]:
#export calibration network
with (config['calibration_fp']/"calibration_network.pkl").open('rb') as fh:
    links, turns = pickle.load(fh)
turn_G = modeling_turns.make_turn_graph(turns)

In [None]:
all_links = gpd.read_file(config['network_fp']/"final_network.gpkg",layer="edges")

In [None]:
#dicts for referencing certain link attributes quickly
length_dict = dict(zip(all_links['linkid'],all_links['length_ft'])) # need this for loss function
geo_dict = dict(zip(all_links['linkid'],all_links['geometry']))

# Remove bike facilities that were installed after the data collection
In the future, this will be done dynamically

In [None]:
links.columns

In [None]:
# save for comparison
links['future_facility'] = links['facility_fwd']

# Remove on-street bike facilities that were present after 2016.
links.loc[(links['year'] > 2016) & links['year'].notna() & (links['link_type']=='road'),'facility_fwd'] = None

# Remove cycletracks/mups if present after 2016
cond = (links['year'] > 2016) & links['year'].notna() & (links['link_type']=='bike')
removed = links[cond]
print(removed.shape[0],'cycletracks/mup removed')
# links = links[cond==False]

# Remove on-street bike facilities if no date


# new = set(links['linkid'].tolist())
# turns = turns[turns['source_linkid'].isin(new) ]

# if no date but bike lane or cycletrack remove
links.loc[links['facility_fwd'].isin(['bike lane','bufferred bike lane','cycletrack']) & links['year'].isna(),'facility_fwd'] = None

In [None]:
# the removed links look correct
# removed.explore()

In [None]:
# # removed facilities
# links.loc[(links['future_facility']!=links['facility_fwd']) & \
#           links['future_facility'].notna(),
#           ['future_facility','facility_fwd','geometry']].explore()

In [None]:
# #remaining infra
# links.loc[links['facility_fwd'].notna(),['facility_fwd','year','geometry']].explore()

# Specify Link Impedance Functions

In [None]:
links.columns

In [None]:
links[links['facility_fwd'].notna()]

In [None]:
links['multi use path'] = links['facility_fwd'].isin(['multi use path','cycletrack']).astype(int)
links.loc[links['multi use path']==True,'lanes'] = 0

links['bike lane'] = links['facility_fwd'].isin(['bike lane','bufferred bike lane']).astype(int)
#links.loc[links['bike lane']==True,'']

In [None]:
betas_links = {
    0 : 'multi use path',
    1 : 'bike lane',
    2 : 'lanes',
    3 : 'above_4'
} 

betas_turns = {
    4 : 'unsig_major_road_crossing'
}

# #this was only .14 overlap
# betas_links = {
#     0 : 'multi use path',
#     1 : 'bike lane',
#     2 : 'AADT',
#     3 : 'above_4'
# } 

# betas_turns = {
#     4 : 'unsig_major_road_crossing'
# }



# #have position of beta next to name of variable
# #NOTE: keys must be in the currect order used
# betas_links = {
#     0 : 'mixed_traffic_no_facil',
#     1 : 'mixed_traffic_w_facil',
#     #0 : 'major_road_w_class_2',
#     # 1 : 'minor_road_w_class_2',
#     # 2 : 'major_road_no_facil',
#     # 3 : 'minor_road_no_facil',
#     2 : 'above_4'
#     #1 : 'motorized'
#     #1 : 'ascent_grade_%'
# } 

# betas_turns = {
#     3 : 'unsig_major_road_crossing'
#     #1 : 'left',
#     #2 : 'right',
#     #3 : 'signalized'
# }


# #have position of beta next to name of variable
# #NOTE: keys must be in the currect order used
# #TODO have this be named tuples or something similar
# # (name=var_name,type,position=position,bounds=[0,3])
# betas_links = {
#     0 : 'AADT',
#     1 : 'lanes',
#     2 : 'here_speed',
#     3 : 'above_4'
# } 

# betas_turns = {
#     4 : 'unsig_major_road_crossing',
#     5 : 'signalized'
# }



# Import Training Set

In [None]:
with (config['calibration_fp']/'full_set.pkl').open('rb') as fh:
    train_set = pickle.load(fh)

train_ods = stochastic_optimization.match_results_to_ods(train_set)

In [None]:
# with (config['calibration_fp']/'test_set.pkl').open('rb') as fh:
#     test_set = pickle.load(fh)
# with (config['calibration_fp']/'train_set.pkl').open('rb') as fh:
#     train_set = pickle.load(fh)

# # match the ods to the network
# train_ods = stochastic_optimization.match_results_to_ods(train_set)
# test_ods = stochastic_optimization.match_results_to_ods(test_set)

In [None]:
# both_ods = list(set.union(set(train_ods),set(test_ods)))
# html = ""
# nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
# nodes.to_crs('epsg:4236',inplace=True)
# nodes['lon'] = nodes.geometry.x
# nodes['lat'] = nodes.geometry.y
# latlon = tuple(zip(nodes['lon'],nodes['lat']))
# nodes = dict(zip(nodes['N'],latlon))
# nodes.get(68196100,0)
# htmls = []
# for od in both_ods:
#     start = od[0]
#     end = od[1]
#     start_lonlat = nodes.get(start,0)
#     end_lonlat = nodes.get(end,0)
#     html = f"https://brouter.damsy.net/latest/#map=12/33.7522/-84.3892/standard&lonlats={start_lonlat[1]},{start_lonlat[0]};{end_lonlat[1]},{end_lonlat[0]}&profile=safety"
#     htmls.append(html)
# with (config['calibration_fp']/"brouter_links.txt").open('w') as fh:
#     for html in htmls:
#         fh.write(f"{html}\n")
# with (config['calibration_fp']/"brouter_ods.txt").open('w') as fh:
#     for od in both_ods:
#         fh.write(f"{od}")

In [None]:
# gdf = train_set[random_trip]['matched_edges']
# gdf['geometry'] = gdf['linkid'].map(geo_dict)
# gdf = gpd.GeoDataFrame(gdf,crs=config['projected_crs_epsg'])
# gdf.explore()

# Calibration Settings

In [None]:
base_impedance_col = "travel_time_min"
loss_function = stochastic_optimization.jaccard_index
loss_function_kwargs = {'length_dict':length_dict}#,'overlap_threshold':0.80}

# loss_function = stochastic_optimization.buffer_overlap
# loss_function_kwargs = {'geo_dict':geo_dict,'buffer_ft':100,'standardize':True}

# link coefficients control the % increase in link travel time (units don't matter)
# turn coefficients control the amount of seconds added from the turn (units matter)
link_bounds = [[-1,0],[-1,0],[0,4],[0,4]]
#[[-1, 2] for _ in range(0, len(betas_links))]
turn_bounds = [[0, 4] for _ in range(0, len(betas_turns))]
if (len(betas_links) > 0) & (len(betas_turns) > 0):
    bounds = np.vstack([link_bounds,turn_bounds])
elif (len(betas_links) > 0):
    bounds = link_bounds
elif (len(betas_turns) > 0):
    bounds = turn_bounds

In [None]:
past_betas = []
past_vals = []
args = (
    past_betas,
    past_vals,
    betas_links,betas_turns,
    train_ods,train_set,
    stochastic_optimization.link_impedance_function,
    base_impedance_col,
    stochastic_optimization.turn_impedance_function,
    links,turns,turn_G,
    loss_function,
    loss_function_kwargs,
    True #whether to print the results of each iteration
)

# Impedance Calibration

In [None]:
#TODO save the list of tripids and save the iterations of the sampled imepdances

In [None]:
from importlib import reload
reload(stochastic_optimization)

start = time.time()
print(list(betas_links.values())+list(betas_turns.values())+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration, bounds, args=args, method='pso', options={'maxiter':50,"popsize":5})
end = time.time()
print(f'Took {(end-start)/60/60:.2f} hours')

In [None]:
x

In [None]:
#print('high stress,','ascent grade %,','left,','right,','signalized,','val')
print(list(betas_links.values())+list(betas_turns.values())+['objective_function'])
print(past_betas[np.array(past_vals).argmin()],np.array(past_vals).min().round(3))

In [None]:
#distribution of loss function values

In [None]:
combined_betas = {**betas_links, **betas_turns}
calibration_result = {}
#get the best betas
best_coefs = past_betas[np.array(past_vals).argmin()]
best_coefs

In [None]:
for key, item in combined_betas.items():
    calibration_result[item] = best_coefs[key]

calibration_result['loss'] = np.array(past_vals).min()
calibration_result['beta_links'] = betas_links
calibration_result['beta_turns'] = betas_turns
calibration_result

In [None]:
#export coefficents
if (config['calibration_fp']/"calibration_results.pkl").exists():
    with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
        calibration_results = pickle.load(fh)
else:
    calibration_results = []
calibration_results.append(calibration_result)

In [None]:
calibration_results

In [None]:
#del calibration_results[-2]

In [None]:
with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
        pickle.dump(calibration_results,fh)
calibration_results

Create GIFs

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import imageio
from io import BytesIO

# Function to plot a GeoSeries and save the plot
def plot_geoseries(geoseries,other_geoseries,i,past_val):
    fig, ax = plt.subplots(figsize=(20, 20))
    #cx.add_basemap(ax)
    other_geoseries.plot(ax=ax,color='blue',style_kwds={'linewidth':2})
    geoseries.plot(ax=ax,color='red')
    ax.set_title(f"Iter:{i} Overlap Function:{past_val}")
    ax.set_axis_off()
    img_bytes = BytesIO()
    plt.savefig(img_bytes, format='png', bbox_inches='tight')
    plt.close()
    return img_bytes.getvalue()

In [None]:
# num_trips = 10

# for z in range(0,num_trips):

#     #choose a random tripid
#     tripid = random.choice(list(train_set.keys()))
#     start_node = train_set[tripid]['start_node']
#     end_node = train_set[tripid]['end_node']

#     matched_edges = train_set[tripid]['matched_edges']
#     matched_edges = np.array(matched_edges)
#     matched_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in matched_edges])
#     matched_line = gpd.GeoSeries(matched_line,crs='epsg:2240')
#     matched_line = matched_line.to_crs('epsg:4326')

#     modeled_lines = []

#     for betas in past_betas:
#         #update network with the correct impedances
#         stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
#                                 link_impedance_function,
#                                 turn_impedance_function,
#                                 links,turns,turn_G)
#         #find shortest path
#         modeled_edges = stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node)['edge_list']
#         modeled_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in modeled_edges])
#         modeled_line = gpd.GeoSeries(modeled_line,crs='epsg:2240')
#         modeled_line = modeled_line.to_crs('epsg:4326')
#         modeled_lines.append(modeled_line)

#     # List of GeoSeries (Replace this with your own GeoSeries list)
#     geoseries_list = modeled_lines

#     # Loop through the list of GeoSeries, plot each one, and save the plot
#     images = []
#     for i, geoseries in enumerate(geoseries_list):
#         past_val = past_vals[i]
#         image_bytes = plot_geoseries(geoseries,matched_line,i,past_val)
#         images.append(imageio.imread(BytesIO(image_bytes)))

#     # Path for saving the GIF
#     gif_path = f"animations/stress_animation_{z}.gif"

#     # Save the images as a GIF
#     imageio.mimsave(Path.cwd()/gif_path, images, format='gif', duration=2)


# Validation

In [None]:
from importlib import reload
reload(stochastic_optimization)

with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
    calibration_results = pickle.load(fh)

In [None]:
# #link_impedance_col = "adj_travel_time_min"
# stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

# #update impedances
# betas = past_betas[np.array(past_vals).argmin()]#x.x
# print(betas)
# stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
#                           link_impedance_function,
#                           base_impedance_col,
#                           turn_impedance_function,
#                           links,turns,turn_G)

# #find shortest path
# results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in test_ods}

# #calulate objective function
# val_to_minimize = loss_function(test_set,results_dict,**loss_function_kwargs)
# val_to_minimize.mean().round(2)

In [None]:
#link_impedance_col = "adj_travel_time_min"
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

#update impedances
betas = past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          link_impedance_function,
                          base_impedance_col,
                          turn_impedance_function,
                          links,turns,turn_G)

#find shortest path
results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in train_ods}

#calulate objective function
val_to_minimize = loss_function(train_set,results_dict,**loss_function_kwargs)
val_to_minimize.mean().round(2)

## Visualize random trip

These did well

In [None]:
fpr_results = stochastic_optimization.first_preference_recovery(train_set,results_dict,**{'length_dict':length_dict,'overlap_threshold':0.7})
fpr_results

In [None]:
import random
tripid = random.choice(fpr_results)
tripid
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in train_set[tripid]['matched_edges'].to_numpy()]
shortest = [tuple(row) for row in train_set[tripid]['shortest_edges'].to_numpy()]

#retrieve modeled path linkids
start_node = train_set[tripid]['origin_node']
end_node = train_set[tripid]['destination_node']
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

chosen_lines = gpd.GeoSeries(chosen_geo,crs='epsg:2240')
shortest_lines = gpd.GeoSeries(shortest_geo,crs='epsg:2240')
modeled_lines = gpd.GeoSeries(modeled_geo,crs='epsg:2240')

stochastic_optimization.visualize_three_no_legend(chosen_lines,shortest_lines,modeled_lines)

and these not so much

In [None]:
import random
not_good = list(set(test_set.keys()) - set(fpr_results))

In [None]:
tripid = random.choice(not_good)
tripid
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in test_set[tripid]['matched_edges'].to_numpy()]
shortest = [tuple(row) for row in test_set[tripid]['shortest_edges'].to_numpy()]

#retrieve modeled path linkids
start_node = test_set[tripid]['origin_node']
end_node = test_set[tripid]['destination_node']
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

chosen_lines = gpd.GeoSeries(chosen_geo,crs='epsg:2240')
shortest_lines = gpd.GeoSeries(shortest_geo,crs='epsg:2240')
modeled_lines = gpd.GeoSeries(modeled_geo,crs='epsg:2240')

stochastic_optimization.visualize_three_no_legend(chosen_lines,shortest_lines,modeled_lines)