# Impedance Calibration Test Run

### Overview:
1. Network Preperation
1. Import Matched Trace Data
2. Specify Calibration Parameters
    - Link Impedance Function
    - Turn Impedance Function
    - Objective Function
        - Exact Overlap
        - Buffer Overlap (in progress)
        - Frechet Distance (in progress)
3. Run Calibration (in progress)
4. Export Results to Examine

In [3]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import stochastic_optimization
from tqdm import tqdm
import similaritymeasures
import random

from shapely.ops import LineString, MultiLineString

import sys
sys.path.insert(0,str(Path.cwd().parent))
from network.src import modeling_turns
import speedfactor

In [4]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
calibration_fp = Path(config['project_directory']) / 'Calibration'
cycleatl_fp = Path(config['project_directory']) / 'CycleAtlanta'
matching_fp = Path(config['project_directory']) / 'Map_Matching'
network_fp = Path(config['project_directory']) / 'Network'
if calibration_fp.exists() == False:
    calibration_fp.mkdir()

# Network Preperation

In [5]:
turns = pd.read_parquet(network_fp/'turns_df.parquet')
links = gpd.read_file(network_fp/'final_network.gpkg',layer='edges')
directed_links = pd.read_parquet(network_fp/'directed_edges.parquet')

In [11]:
links.columns

Index(['A', 'B', 'linkid', 'link_type', 'osmid', 'timestamp', 'version',
       'type', 'highway', 'oneway', 'name', 'bridge', 'tunnel', 'cycleway',
       'service', 'footway', 'sidewalk', 'bicycle', 'foot', 'access', 'area',
       'all_tags', 'geom_type', 'facility_fwd', 'facility_rev', 'year', 'lts',
       'reverse_geometry', 'ascent_m', 'ascent_grade_%', 'descent_m',
       'descent_grade_%', 'length_ft', 'geometry'],
      dtype='object')

In [10]:
directed_links.columns

Index(['source', 'target', 'linkid', 'reverse_link', 'azimuth', 'facility_fwd',
       'facility_rev', 'ascent_m', 'ascent_grade_%', 'descent_m',
       'descent_grade_%'],
      dtype='object')

In [5]:
#add highway into turns
highway_dict = dict(zip(links['linkid'],links['link_type']))
turns['source_link_type'] = turns['source_linkid'].map(highway_dict)
turns['target_link_type'] = turns['target_linkid'].map(highway_dict)
del highway_dict

In [None]:
#remove these types of links from routing
link_types_allowed = ['bike','pedestrian','road']
links['link_type'].unique()
links = links[links['link_type'].isin(link_types_allowed)]
turns = turns[turns['source_link_type'].isin(link_types_allowed) & turns['target_link_type'].isin(link_types_allowed)]

## Only count left, right turns from road to road


In [7]:
turns.loc[(turns['source_link_type']!='road') & (turns['target_link_type']!='road'),'turn_type'] = None

In [8]:
turn_G = modeling_turns.make_turn_graph(turns)

In [9]:
geo_dict = dict(zip(links['linkid'],links['geometry']))

## added a major/minor road classification, everything else is just left as "road"


In [10]:
major_road = ['primary','secondary']
major_road = major_road + [item + '_link' for item in major_road]
minor_road = ['tertiary','unclassified','residential','service','trunk','living_street']
major_road = major_road + [item + '_link' for item in minor_road]
links.loc[links['highway'].isin(major_road),'link_type_new'] = 'major_road'
links.loc[links['highway'].isin(minor_road),'link_type_new'] = 'minor_road'
links.loc[links['link_type_new'].isna(),'link_type_new'] = links.loc[links['link_type_new'].isna(),'link_type']

In [11]:
#links['link_type_new'].unique()
links['high_traffic_stress'] = links['link_type_new'] == 'major_road'

In [12]:
links.columns

Index(['A', 'B', 'linkid', 'link_type', 'osmid', 'timestamp', 'version',
       'type', 'highway', 'oneway', 'name', 'bridge', 'tunnel', 'cycleway',
       'service', 'footway', 'sidewalk', 'bicycle', 'foot', 'access', 'area',
       'all_tags', 'geom_type', 'facility_fwd', 'facility_rev', 'year', 'lts',
       'reverse_geometry', 'ascent_m', 'ascent_grade_%', 'descent_m',
       'descent_grade_%', 'length_ft', 'geometry', 'link_type_new',
       'high_traffic_stress'],
      dtype='object')

In [1]:
# Format elevation variables

Format variables (in progress)
HERE variables have error because of the conflation process

In [13]:
# above_30 = links['speedlimit_range_mph'].isin(['31-40 MPH','41-54 MPH','55-64 MPH'])
# more_than_1_lpd = links['lanes_per_direction'].isin(['2-3','> 4'])
# no_bike_infra = links['bike_facility_type'].isna()
# links['NACTO'] = 1
# links.loc[(above_30 | more_than_1_lpd) & no_bike_infra,'NACTO'] = 0
# links_geo = links['linkid'].map(geo_dict)
# links.reset_index(drop=True,inplace=True)
# links = gpd.GeoDataFrame(links,geometry=links_geo,crs='epsg:2240')
# links[links['NACTO']==0].explore()

Format turn variables (in progress)

In [15]:
turns['left'] = turns['turn_type'] == 'left'
turns['right'] = turns['turn_type'] == 'right'

## Directed network

In [None]:
link_cols_drop = ['facility_fwd','facility_rev','reverse_geometry','ascent_m', 'ascent_grade_%', 'descent_m', 'descent_grade_%']
links.drop(columns=link_cols_drop,inplace=True)

In [14]:
links['ascent_grade_%'] = links['ascent_grade_%'].fillna(0)

In [16]:
speedfactor.calculate_adjusted_speed(links,9)

In [None]:
#remove certain links from the routing process
links

# Specify Link Impedance Functions

## BicyclingPlus Demo Impedance Functions

Turn + Stress Impedance

In [17]:
#TODO allow for certain impedance functions to be left out

In [18]:
#have position of beta next to name of variable
#NOTE: keys must be in the currect order used
betas_links = {
    0 : 'high_traffic_stress',
    1 : 'ascent_grade_%'
} 
betas_turns = {
    2 : 'left',
    3 : 'right',
    4 : 'signalized'
}

'''
Currently works with binary and numeric variables. Categoricals will have to be
cast into a different format for now.

Link impedance is weighted by the length of the link, turns are just the impedance associated
'''

#customize this function to change impedance formula
#TODO streamline process of trying out new impedance functions
def link_impedance_function(betas,beta_links,links):
    #prevent mutating the original links gdf
    links = links.copy()
    
    multiplier = np.zeros(links.shape[0])
    
    #assumes that these effects are additive
    for key, item in beta_links.items():
        multiplier = multiplier + (betas[key] * links[item].values)
    
    links['link_cost'] = links['adj_travel_time_min'] * (1+multiplier)

    return links

def turn_impedance_function(betas,beta_turns,turns):
    #use beta coefficient to calculate turn cost
    base_turn_cost = 30 # from Lowry et al 2016 DOI: http://dx.doi.org/10.1016/j.tra.2016.02.003
    # turn_costs = {
    #     'left': betas[1] * base_turn_cost,
    #     'right': betas[1] * base_turn_cost,
    #     'straight': betas[1] * base_turn_cost
    # }
    #turns['turn_cost'] = turns['turn_type'].map(turn_costs)

    turns = turns.copy()

    turns['turn_cost'] = 0
    #instance impedance
    for key, item in beta_turns.items():
        turns['turn_cost'] = turns['turn_cost'] + (betas[key] * turns[item])

    turns['turn_cost'] = turns['turn_cost'].astype(float)

    return turns

# Import Training Set

In [19]:
with (calibration_fp/'test_set.pkl').open('rb') as fh:
    test_set = pickle.load(fh)
with (calibration_fp/'train_set.pkl').open('rb') as fh:
    train_set = pickle.load(fh)

#match the ods to the network
train_ods = stochastic_optimization.match_results_to_ods(train_set)
test_ods = stochastic_optimization.match_results_to_ods(test_set)

In [20]:
import stochastic_optimization
from importlib import reload
reload(stochastic_optimization)

<module 'stochastic_optimization' from 'c:\\Users\\tpassmore6\\Documents\\GitHub\\BikewaySimDev\\impedance_calibration\\stochastic_optimization.py'>

In [None]:
#TODO put the loss/objective functions in a class for documentation purposes

In [21]:
objective_function = stochastic_optimization.first_preference_recovery
length_dict = dict(zip(links['linkid'],links['length_ft'])) # need this for objective function
objective_function_kwargs = {'length_dict':length_dict,'overlap_threshold':0.80}

# objective_function = stochastic_optimization.buffer_overlap
# objective_function_kwargs = {'geo_dict':geo_dict,'buffer_ft':100,'standardize':True}

#not really sure how to best set boundary conditions yet
num_of_coefs = len(betas_links) + len(betas_turns)
bounds = [[0, 5] for _ in range(0, num_of_coefs)]
#TODO turn to dict instead

In [22]:
past_betas = []
past_vals = []
args = (
    past_betas,
    past_vals,
    betas_links,betas_turns,
    ods,train_set,
    link_impedance_function,
    turn_impedance_function,
    links,turns,turn_G,
    objective_function,
    objective_function_kwargs
)

In [23]:
from importlib import reload
reload(stochastic_optimization)

<module 'stochastic_optimization' from 'c:\\Users\\tpassmore6\\Documents\\GitHub\\BikewaySimDev\\impedance_calibration\\stochastic_optimization.py'>

In [24]:
start = time.time()
# args = (df_edges,turns,turn_G,matched_traces,False)
print('high stress,','ascent grade %,','left,','right,','signalized,','val')
x = minimize(stochastic_optimization.impedance_calibration, bounds, args=args, method='pso', options={'maxiter':5})
end = time.time()
print(f'Took {(end-start)/60/60} hours')

high stress, ascent grade %, left, right, signalized, val
[2.3 3.1 1.1 8.4 1.3] -0.004
[5.2 4.2 2.1 9.4 5.2] -0.002
[0.3 5.2 4.1 7.1 2.4] -0.012
[8.2 8.2 8.2 0.4 8. ] -0.002
[7.3 1.4 7.1 6.2 4.1] -0.008
[6.2 7.3 9.3 4.  0.3] -0.002
[1.5 9.4 5.4 1.5 3.3] -0.006
[3.  0.4 6.1 5.2 9.2] -0.002
[9.3 2.2 0.3 3.1 7.5] -0.006
[4.5 6.1 3.  2.3 6.2] -0.002
Negative Impedance Coefficient Detected
[0.3 5.4 4.9 7.  4.1] -0.008
[0.3 5.2 4.1 7.1 2.4] -0.012
[5.6 5.3 3.2 7.9 6.3] -0.002
[0.9 4.2 2.9 7.5 2.1] -0.008
Negative Impedance Coefficient Detected
[0.1 5.1 4.8 3.6 2.5] -0.014
[2.4 1.  4.2 3.5 1.6] -0.01
[6.2 2.7 2.1 3.8 5.4] -0.004
[4.2 6.  3.2 3.1 4.9] -0.002
[1.4 3.6 5.1 4.8 2.4] -0.008
Negative Impedance Coefficient Detected
[0.1 5.  4.5 4.7 2.4] -0.014
Negative Impedance Coefficient Detected
Negative Impedance Coefficient Detected
[2.1 5.4 5.3 4.  2.1] -0.004
Negative Impedance Coefficient Detected
Negative Impedance Coefficient Detected
[5.7 2.9 1.9 5.5 3.3] -0.004
Negative Impedance Coeffi

In [25]:
x

     fun: -0.014
 message: 'maximum number of iterations is reached'
    nfev: 50
     nit: 5
  status: -1
 success: False
       x: array([0.05424486, 5.04365132, 4.54454701, 4.72097365, 2.44650265])

In [26]:
np.array(past_vals).min()

-0.014

In [27]:
past_betas[np.array(past_vals).argmin()]

(0.1, 5.1, 4.8, 3.6, 2.5)

Create GIFs

In [28]:
import geopandas as gpd
import matplotlib.pyplot as plt
import imageio
from io import BytesIO

# Function to plot a GeoSeries and save the plot
def plot_geoseries(geoseries,other_geoseries,i,past_val):
    fig, ax = plt.subplots(figsize=(20, 20))
    #cx.add_basemap(ax)
    other_geoseries.plot(ax=ax,color='blue',style_kwds={'linewidth':2})
    geoseries.plot(ax=ax,color='red')
    ax.set_title(f"Iter:{i} Overlap Function:{past_val}")
    ax.set_axis_off()
    img_bytes = BytesIO()
    plt.savefig(img_bytes, format='png', bbox_inches='tight')
    plt.close()
    return img_bytes.getvalue()

In [29]:
# num_trips = 10

# for z in range(0,num_trips):

#     #choose a random tripid
#     tripid = random.choice(list(train_set.keys()))
#     start_node = train_set[tripid]['start_node']
#     end_node = train_set[tripid]['end_node']

#     matched_edges = train_set[tripid]['matched_edges']
#     matched_edges = np.array(matched_edges)
#     matched_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in matched_edges])
#     matched_line = gpd.GeoSeries(matched_line,crs='epsg:2240')
#     matched_line = matched_line.to_crs('epsg:4326')

#     modeled_lines = []

#     for betas in past_betas:
#         #update network with the correct impedances
#         stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
#                                 link_impedance_function,
#                                 turn_impedance_function,
#                                 links,turns,turn_G)
#         #find shortest path
#         modeled_edges = stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node)['edge_list']
#         modeled_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in modeled_edges])
#         modeled_line = gpd.GeoSeries(modeled_line,crs='epsg:2240')
#         modeled_line = modeled_line.to_crs('epsg:4326')
#         modeled_lines.append(modeled_line)

#     # List of GeoSeries (Replace this with your own GeoSeries list)
#     geoseries_list = modeled_lines

#     # Loop through the list of GeoSeries, plot each one, and save the plot
#     images = []
#     for i, geoseries in enumerate(geoseries_list):
#         past_val = past_vals[i]
#         image_bytes = plot_geoseries(geoseries,matched_line,i,past_val)
#         images.append(imageio.imread(BytesIO(image_bytes)))

#     # Path for saving the GIF
#     gif_path = f"animations/stress_animation_{z}.gif"

#     # Save the images as a GIF
#     imageio.mimsave(Path.cwd()/gif_path, images, format='gif', duration=2)


#### Calculate overlap for test set
Need to use best set of betas to 

In [75]:
link_impedance_col = "adj_travel_time_min"

#update impedances
betas = past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          link_impedance_function,
                          turn_impedance_function,
                          links,turns,turn_G)

#find shortest path
results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in test_ods}

# #calulate objective function
# val_to_minimize = objective_function(test_set,results_dict,**objective_function_kwargs)
# np.round(val_to_minimize,2)

In [76]:
def first_preference_recovery(match_results,results_dict,**kwargs):
    '''
    Seen in Meister et al. 2024: https://doi.org/10.1016/j.jcmr.2024.100018

    "FPR is the percentage of correct predictions assuming that the predicted choice
    is the one with the highest choice probability"

    This has been modified to look at the similarity between the modeled route
    and the chosen route instead of choice probabilities. A correct modeled route will
    contain all or most of the links included in the map matched trip. An overlap threshold
    controls what percentage of intersection between the chosen and modeled route is
    needed to be considered a match. A 100% overlap threshold means that the modeled
    route contains all of the links included in the chosen route. A 0% overlap threshold
    means that the modeled route doesn't need to contain any of links in the chosen route
    to count. Length is used to weight the overlap appropriately (i.e., missing short links
    isn't as big of a deal as long ones).

    This function returns a 1 or a 0 depending on the overlap threshold set. The average is
    taken across all the trips, hence it will be between 0 and 1.

    '''

    result = []
    
    for tripid, item in match_results.items():

        start_node = item['origin_node']
        end_node = item['destination_node']

        #retrieve linkids in (linkid:int,reverse_link:boolean) format
        chosen = [tuple(row) for row in match_results[tripid]['matched_edges'].to_numpy()]
        modeled_edges = results_dict[(start_node,end_node)]['edge_list']

        #get lengths (non-directional)
        chosen_length = np.sum([kwargs['length_dict'][linkid[0]] for linkid in chosen])
        #modeled_length = np.sum([kwargs['length_dict'][linkid[0]] for linkid in modeled_edges])

        #convert to sets
        chosen = set(chosen)
        modeled_edges = set(modeled_edges)

        #find intersection of sets
        shared = list(set.intersection(chosen,modeled_edges))

        #find intersection length
        intersection_length = np.sum([kwargs['length_dict'][linkid[0]] for linkid in shared])

        # result.append((intersection_length,chosen_length))
        #result.append((intersection_length,chosen_length))

        overlap_calc = intersection_length / chosen_length

        if overlap_calc >= kwargs['overlap_threshold']:
            result.append(tripid)
    
    #TODO another interpretation could be percentage of all currect?
    result = np.array(result)
    
    #result = len(result) / len(match_results)

    # if kwargs['standardize']:
    #     #average intersect over chosen length
    #     result = np.mean(result[:,0] / result[:,1])
    # else:
    #     #total intersect over total chosen length
    #     result = np.sum(result[:,0]) / np.sum(result)
    
    #return negative result because we want to minimize
    return result

In [77]:
#calculate for each one
vals_to_minimize = first_preference_recovery(test_set,results_dict,**objective_function_kwargs)

In [36]:
test_set.keys()

dict_keys([80, 104, 107, 118, 120, 133, 134, 136, 142, 155, 167, 168, 177, 188, 197, 212, 231, 243, 261, 267, 274, 281, 282, 293, 304, 306, 309, 322, 324, 356, 369, 370, 371, 381, 382, 395, 403, 406, 409, 411, 423, 428, 441, 458, 462, 468, 469, 483, 487, 488, 501, 503, 504, 521, 547, 557, 559, 562, 571, 588, 594, 597, 607, 615, 663, 666, 677, 678, 691, 704, 707, 715, 735, 782, 784, 812, 821, 831, 837, 841, 846, 861, 911, 933, 939, 2053, 2068, 2069, 2104, 2110, 2127, 2128, 2130, 2164, 2168, 2173, 2193, 2200, 2208, 2212, 2232, 2233, 2235, 2311, 2323, 2379, 2398, 2424, 2453, 2469, 2477, 2484, 2505, 2508, 2512, 2548, 2551, 2559, 2573, 2582, 2587, 2589, 2591, 2594, 2621, 2660, 2699, 2753, 2820, 2822, 2870, 2879, 2914, 2937, 2947, 2953, 2963, 2982, 2986, 4109, 4113, 4137, 4143, 4169, 4220, 4221, 4269, 4291, 4292, 4296, 4318, 4335, 4410, 4425, 4489, 4492, 4533, 4535, 4640, 4657, 4670, 4674, 4691, 4750, 4755, 4759, 4768, 4773, 4792, 4794, 4808, 4812, 4865, 4881, 4886, 4892, 4902, 4921, 4938, 4

In [37]:
vals_to_minimize

array([  735,  8881,  9022, 14744])

# Compare against shortest path results (training set)

Calculate overlap for shortest path

In [53]:
# link_impedance_col = "adj_travel_time_min"

# #update
# stochastic_optimization.back_to_base_impedance(link_impedance_col,links,turns,turn_G)

# #find shortest path
# results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in ods}

# #calulate objective function
# val_to_minimize = objective_function(train_set,results_dict,**objective_function_kwargs)
# print(val_to_minimize)

-0.052


Calculate for shortest path (no elevation adjustment)

In [64]:
# link_impedance_col = "travel_time_min"

# #update
# stochastic_optimization.back_to_base_impedance(link_impedance_col,links,turns,turn_G)

# #find shortest path
# results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in ods}

# #calulate objective function
# val_to_minimize = objective_function(train_set,results_dict,**objective_function_kwargs)
# print(val_to_minimize)

KeyError: (69531971, 5416216875)

## Visualize random trip

In [65]:
vals_to_minimize

array([  735,  8881,  9022, 14744])

These did well

In [68]:
import random
tripid = random.choice(vals_to_minimize)
tripid
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in test_set[tripid]['matched_edges'].to_numpy()]
shortest = [tuple(row) for row in test_set[tripid]['shortest_edges'].to_numpy()]

#retrieve modeled path linkids
start_node = test_set[tripid]['origin_node']
end_node = test_set[tripid]['destination_node']
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

chosen_lines = gpd.GeoSeries(chosen_geo,crs='epsg:2240')
shortest_lines = gpd.GeoSeries(shortest_geo,crs='epsg:2240')
modeled_lines = gpd.GeoSeries(modeled_geo,crs='epsg:2240')

stochastic_optimization.visualize_three_no_legend(chosen_lines,shortest_lines,modeled_lines)

and these not so much

In [78]:
import random

not_good = list(set(test_set.keys()) - set(vals_to_minimize))

In [79]:
tripid = random.choice(not_good)
tripid
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in test_set[tripid]['matched_edges'].to_numpy()]
shortest = [tuple(row) for row in test_set[tripid]['shortest_edges'].to_numpy()]

#retrieve modeled path linkids
start_node = test_set[tripid]['origin_node']
end_node = test_set[tripid]['destination_node']
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

chosen_lines = gpd.GeoSeries(chosen_geo,crs='epsg:2240')
shortest_lines = gpd.GeoSeries(shortest_geo,crs='epsg:2240')
modeled_lines = gpd.GeoSeries(modeled_geo,crs='epsg:2240')

stochastic_optimization.visualize_three_no_legend(chosen_lines,shortest_lines,modeled_lines)