# Impedance Calibration Testing

### Overview:
1. Network Preperation
1. Import Matched Trace Data
2. Specify Calibration Parameters
    - Link Impedance Function
    - Turn Impedance Function
    - Objective Function
        - Exact Overlap
        - Buffer Overlap (in progress)
        - Frechet Distance (in progress)
3. Run Calibration (in progress)
4. Export Results to Examine

In [None]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import stochastic_optimization
from tqdm import tqdm
import similaritymeasures

from shapely.ops import LineString, MultiLineString

import sys
sys.path.insert(0,str(Path.cwd().parent))
from network.src import modeling_turns

In [None]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
network_fp = Path(config['project_directory']) / "Map_Matching"
traces_fp = Path(config['project_directory']) / "CycleAtlanta"
export_fp = Path(config['project_directory']) / "Calibration"

In [None]:
# #pyproj is pointing to a postgis installation?
# import pyproj
# #pyproj.datadir.get_data_dir()
# #https://pyproj4.github.io/pyproj/stable/api/datadir.html#pyproj.datadir.get_data_dir
# #https://stackoverflow.com/questions/69630630/on-fresh-conda-installation-of-pyproj-pyproj-unable-to-set-database-path-pypr
# proj_dir = r'C:\Users\tpassmore6\Anaconda3\envs\geo-env\Library\share\proj'
# pyproj.datadir.set_data_dir(proj_dir)

# Network Preperation

In [None]:
with (fp / 'chosen.pkl').open('rb') as fh:
    links,turns,turn_G = pickle.load(fh)
del turn_G

In [None]:
turn_G = modeling_turns.make_turn_graph(turns)

In [None]:
#recalculate length and add to network
fp = Path.home() / "Documents/BikewaySimData/Projects/gdot"
edges = gpd.read_file(fp/'networks/elevation_added.gpkg',layer="links")
edges.to_crs('epsg:2240',inplace=True)
geo_dict = dict(zip(edges['linkid'],edges['geometry']))
length_dict = dict(zip(edges['linkid'],edges.length))
del edges

links['length_ft'] = links['linkid'].map(length_dict)

In [None]:
# #shouldnt need this anymore
# multi_edges = df_edges.loc[df_edges[['source','target']].duplicated(keep=False),['source','target']]
# src = turns.merge(multi_edges,left_on=['source_A','source_B'],right_on=['source','target']).index.tolist()
# trgt = turns.merge(multi_edges,left_on=['target_A','target_B'],right_on=['source','target']).index.tolist()
# multi_edges = list(set(src + trgt))

Format variables (in progress)

In [None]:
links['high_traffic_stress'] = links['highway'] == 'primary'
# df_edges['bike_facility_type'].value_counts()
# df_edges['high_traffic_stress'] = df_edges['bike_facility_type'].isna() & (df_edges['highway'].map(levels) > 4 | df_edges['speed limit'] > 30)

Format turn variables (in progress)

In [None]:
turns['left'] = turns['turn_type'] == 'left'

# Specify Link Impedance Functions

In [None]:
#have position of beta next to name of variable
betas_links = {
    0 : 'ascent_grade',
    1 : 'high_traffic_stress',
}

betas_turns = {
    2 : 'signalized',
    3 : 'unsignalized',
    4 : 'left',
}

'''
Currently works with binary and numeric variables. Categoricals will have to be
cast into a different format for now.

Link impedance is weighted by the length of the link, turns are just the impedance associated
'''

#customize this function to change impedance formula
#TODO streamline process of trying out new impedance functions
def link_impedance_function(betas,beta_links,links):
    #prevent mutating the original links gdf
    links = links.copy()
    
    links['attr_multiplier'] = 0

    for key, item in beta_links.items():
        links['attr_multiplier'] = links['attr_multiplier'] + (betas[key] * links[item])

    links['link_cost'] = links['length_ft'] * (1 + links['attr_multiplier'])
    
    return links

def turn_impedance_function(betas,beta_turns,turns):
    #use beta coefficient to calculate turn cost
    base_turn_cost = 30 # from Lowry et al 2016 DOI: http://dx.doi.org/10.1016/j.tra.2016.02.003
    # turn_costs = {
    #     'left': betas[1] * base_turn_cost,
    #     'right': betas[1] * base_turn_cost,
    #     'straight': betas[1] * base_turn_cost
    # }
    #turns['turn_cost'] = turns['turn_type'].map(turn_costs)

    turns = turns.copy()

    turns['turn_cost'] = 0

    for key, item in beta_turns.items():
        turns['turn_cost'] = turns['turn_cost'] + (betas[key] * turns[item])

    turns['turn_cost'] = turns['turn_cost'].astype(float)

    return turns

Test impedance calculation

In [None]:
betas = [3,4,20,20,50]

#round betas to nearest hundreths
betas = np.round(betas,2)

# #prevent negative link weights
# if (betas < 0).any():
#     val = 0
#     return val

#use provided link impedance function and update edge costs
links = link_impedance_function(betas, betas_links, links)
cost_dict = dict(zip(links['linkid'],links['link_cost']))
turns['source_link_cost'] = turns['source_linkid'].map(cost_dict)
turns['target_link_cost'] = turns['target_linkid'].map(cost_dict)

#use provided turn impedance function and update turn costs
turns = turn_impedance_function(betas, betas_turns, turns)

#cacluate new total cost and round to tenth place
turns['total_cost'] = (turns['source_link_cost'] + turns['target_link_cost'] + turns['turn_cost']).round(1)

#round the rest too
turns['source_link_cost'] = turns['source_link_cost'].round(1)
turns['target_link_cost'] = turns['target_link_cost'].round(1)
turns['turn_cost'] = turns['turn_cost'].round(1)


update edge weights

In [None]:
cols = ['source_linkid','source_reverse_link','target_linkid','target_reverse_link','total_cost']

In [None]:
turn_G.get_edge_data(u=(2,False),v=(0,True))

In [None]:
inputs = [betas,betas_links,betas_turns,link_impedance_function,turn_impedance_function,links,turns,turn_G]
stochastic_optimization.impedance_update(*inputs)

In [None]:
turn_G.get_edge_data(u=(2,False),v=(0,True))

In [None]:
turn_G.get_edge_data(u=(2,False),v=(0,True))

In [None]:
updated_edge_costs = {((row[0],row[1]),(row[2],row[3])):row[4] for row in turns[cols].itertuples(index=False)}

In [None]:
nx.set_edge_attributes(turn_G,values=updated_edge_costs,name='weight')

# Import Matched Trace Data
Dictionary Containing Origins and Destinations, Matches, Shortest Paths

In [None]:
#export for impedance calibration
with (Path('D:/matched_traces')/'ready_for_calibration.pkl').open('rb') as fh:
    match_results = pickle.load(fh)
# with (Path('D:/matched_traces')/'ready4calibration.pkl').open('rb') as fh:
#     matched_traces = pickle.load(fh)

#retrieve all ods?

Import trips table and knock out excessiely long trips, primarily exercise trips etc.

## Split the data into s training set and a testing set

In [None]:
import random

def get_random_key(dictionary):
    random_key =  random.choice(list(dictionary.keys()))
    #recursion?
    if isinstance(dictionary.get(random_key),str):
        random_key = get_random_key(dictionary)
    return random_key

In [None]:
random_keys = set([get_random_key(match_results) for x in range(0,100)])
match_results = {key:item for key, item in match_results.items() if key in random_keys}

In [None]:
from importlib import reload
reload(stochastic_optimization)

In [None]:
ods = stochastic_optimization.match_results_to_ods(match_results)

# objective_function = stochastic_optimization.exact_overlap
# objective_function_args = {'length_dict':length_dict,'standardize':True}

objective_function = stochastic_optimization.buffer_overlap
objective_function_args = {'geo_dict':geo_dict,'buffer_ft':50,'standardize':True}


In [None]:
test_vals = stochastic_optimization.impedance_calibration(ods,match_results,
                                  betas,betas_links,betas_turns,
                                  link_impedance_function,
                                  turn_impedance_function,
                                  links,turns,turn_G,
                                  objective_function,
                                  **objective_function_args
                                  )
test_vals

Create dataframe of ods

In [None]:
ods = [(key,item['start_node'],item['end_node']) for key, item in match_results.items()]
ods = pd.DataFrame(ods,columns=['tripid','start_node','end_node'])

In [None]:
#simplify ods for shortest path calculations
ods0 = [tuple(row) for row in ods[['start_node','end_node']].drop_duplicates().to_numpy()]

Test version

In [None]:
# o = 68168815
# d = 7502147722
# turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns,turn_G,[o],[d])
# length, edge_list = nx.single_source_dijkstra(turn_G,source=o,target=d,weight='weight')
# turn_G = modeling_turns.remove_virtual_links_new(turn_G,virtual_starts,virtual_ends)

In [None]:
# Define a function to calculate Dijkstra's shortest paths for a single source
def impedance_path(turns,turn_G,o,d):
    #NOTE: without these it'll throw a 'the result is ambiguous error'
    o = int(o)
    d = int(d)
    
    turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns,turn_G,[o],[d])
    length, edge_list = nx.single_source_dijkstra(turn_G,source=o,target=d,weight='weight')
    edge_list = edge_list[1:-1] #chop off the virtual nodes added
    turn_G = modeling_turns.remove_virtual_links_new(turn_G,virtual_starts,virtual_ends)
    return {'length':np.round(length,1), 'edge_list':edge_list}

results_dict = {(start_node,end_node):impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in ods0}
# #takes 11 minutes for 3000 trips

# Select Random Trip for Examples

In [None]:
ods

In [None]:
row = ods.sample(1).squeeze()
tripid = row['tripid']
start_node = row['start_node']
end_node = row['end_node']
print(tripid,start_node,end_node)

## Visualize random trip

In [None]:
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in match_results[tripid]['matched_edges'].to_numpy()]
shortest = [tuple(row) for row in match_results[tripid]['shortest_edges'].to_numpy()]
#retrieve modeled path linkids
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

chosen_lines = gpd.GeoSeries(chosen_geo,crs='epsg:2240')
shortest_lines = gpd.GeoSeries(shortest_geo,crs='epsg:2240')
modeled_lines = gpd.GeoSeries(modeled_geo,crs='epsg:2240')

stochastic_optimization.visualize_three_no_legend(chosen_lines,shortest_lines,modeled_lines)

# Objective Function
This function calculates the success of a modeled trip when compared against the matched one. There are several available but only one can be used in the impedance calibration.


## Method 1 Exact Overlap
This objective function looks at whether the edges taken in the modeled trip matches the ones taken in the matched trip.

Two ways to calculate:
- By trip and then average (weight equally on short and long trips)
- Total intersected length divided by total chosen length (longer trips have more weight but perhaps a more complete picture)

In [None]:
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in match_results[tripid]['matched_edges'].to_numpy()]
#retrieve modeled path linkids
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get lengths (non-directional)
chosen_length = np.sum([length_dict[linkid[0]] for linkid in chosen])
modeled_length = np.sum([length_dict[linkid[0]] for linkid in modeled_edges])

#convert to sets
chosen = set(chosen)
modeled_edges = set(modeled_edges)

#find intersection of sets
shared = list(set.intersection(chosen,modeled_edges))

#find intersection length
intersection_length = np.sum([length_dict[linkid[0]] for linkid in shared])

#here we can have an if condition that determines which method to do

# calculate exact overlap (chosen length will never be greater than modeled length)
exact_overlap = intersection_length / chosen_length
exact_overlap

## Method 2: Buffer Overlap
Having the modeled trip have to match the chosen trip exactly might be unrealistic since our impedance function isn't able to account for everything. Instead it may be better to see if the impedance function is generally producing results that are similar to the chosen ones. In this objective function, both the chosen and modeled routes are buffered and intersected. This intersection is divided by the union of the two buffered geometries to get a percentage. The more overlap between the two, the closer to one this intersection will be. 

In [None]:
# set buffer dist
buffer_ft = 500

In [None]:
#retrieve chosen path linkids and convert them to tuple
chosen = [tuple(row) for row in match_results[tripid]['matched_edges'].to_numpy()]
#shortest = [tuple(row) for row in match_results[linkid]['shortest_edges'].to_numpy()]
#retrieve modeled path linkids
modeled_edges = results_dict[(start_node,end_node)]['edge_list']

#get geos (non-directional)
chosen_geo = [geo_dict[linkid[0]] for linkid in chosen]
#shortest_geo = [geo_dict[linkid[0]] for linkid in shortest]
modeled_geo = [geo_dict[linkid[0]] for linkid in modeled_edges]

#turn into linestring and then buffer
chosen_geo = MultiLineString(chosen_geo).buffer(buffer_ft)
modeled_geo = MultiLineString(modeled_geo).buffer(buffer_ft)

chosen_area = chosen_geo.area
modeled_area = modeled_geo.area
intersection_area = chosen_geo.intersection(modeled_geo).area

# intersected area divided by the non overlapping area
# close to one the better
buffer_overlap = intersection_area / (chosen_area+modeled_area-intersection_area)
buffer_overlap

#take the median?

## Method 3: Frechet Distance
This third method uses Frechet Distanct which can be used as a measure of similarity between two curves. With this method, link direction is also accounted for. The higher the frechet distance, the less similar the results. We should probably divide this number by the length of the chosen/modeled route to standardize it somewhat accross trips.

In [None]:
rdp_ft = 50

In [None]:
#retrieve tuples of the format (linkid:int,reverse_link:boolean)
chosen = [tuple(row) for row in match_results[tripid]['matched_edges'].to_numpy()]
#shortest = [tuple(row) for row in match_results[linkid]['shortest_edges'].to_numpy()]
modeled = results_dict[(start_node,end_node)]['edge_list']

#retrieve coordinates, revesing coordinate sequence if neccessary
def retrieve_coordinates(link,geo_dict):
    line = np.array(geo_dict[link[0]].coords)
    if link[1] == True:
        line = line[::-1]
    return line

chosen_geo = [retrieve_coordinates(link,geo_dict) for link in chosen]
modeled_geo = [retrieve_coordinates(link,geo_dict) for link in modeled]

#turn to a single line
chosen_geo = LineString(np.vstack(chosen_geo))
modeled_geo = LineString(np.vstack(modeled_geo))

#simplify with rdp
chosen_coords = np.array(chosen_geo.simplify(rdp_ft).coords)
modeled_coords = np.array(modeled_geo.simplify(rdp_ft).coords)

#find frechet distance
frechet_distance = similaritymeasures.frechet_dist(chosen_coords,modeled_coords)
frechet_distance

#can minimize total frechet distance or an average value

# Visualization (in progress)
For comparing results? Come back to this later

In [None]:
test.columns

test['percent_detour'] = (((test['length_ft']-test['shortest_length_ft'])/test['shortest_length_ft'])*100).round(1)


In [None]:
import pandas as pd
trip_and_user = pd.read_pickle(export_fp/'trip_and_user.pkl')

test_merge = test.merge(trip_and_user,on='tripid')

In [None]:
tripid = test.loc[test['overlap']<0.2,'tripid'].sample(1).item()
tripid

In [None]:
row['starttime']

In [None]:
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster, PolyLineTextPath
from folium.map import FeatureGroup
def visualize(test_merge,tripid):


     gdf = test_merge.copy()

     gdf.set_geometry("geometry",inplace=True)
     gdf.set_crs("epsg:2240",inplace=True)

     # Your GeoDataFrames
     chosen_path = gdf.loc[gdf['tripid']==tripid,['tripid','geometry']]
     shortest_path = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_geo']].set_geometry('shortest_geo').set_crs(gdf.crs)
     intersection = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_intersect_geo']].set_geometry('shortest_intersect_geo').set_crs(gdf.crs)
     modeled_path = gdf.loc[gdf['tripid']==tripid,['tripid','geometry_modeled']].set_geometry('geometry_modeled').set_crs(gdf.crs)

     #start point
     start_N = gdf.loc[gdf['tripid']==tripid,'start'].item()
     start_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==start_N,'geometry'].item()

     #end point
     end_N = gdf.loc[gdf['tripid']==tripid,'end'].item()
     end_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==end_N,'geometry'].item()

     # reproject
     x_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.x
     y_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.y

     # Create a Folium map centered around the mean of the GPS points
     center = [y_mean,x_mean-.04]
     mymap = folium.Map(location=center, zoom_start=13)

     # Convert GeoDataFrames to GeoJSON
     chosen_path_geojson = chosen_path.to_crs(epsg='4326').to_json()
     shortest_path_geojson = shortest_path.to_crs(epsg='4326').to_json()
     intersection_geojson = intersection.to_crs(epsg='4326').to_json()
     modeled_path_geojson = modeled_path.to_crs(epsg='4326').to_json()

     # Create FeatureGroups for each GeoDataFrame
     chosen_path_fg = FeatureGroup(name='Chosen Path')
     shortest_path_fg = FeatureGroup(name='Shortest Path')
     intersection_fg = FeatureGroup(name='Buffer Intersection',show=False)
     modeled_path_fg = FeatureGroup(name='Modeled Path')

     # Add GeoJSON data to FeatureGroups
     folium.GeoJson(chosen_path_geojson, name='Chosen Path',
                    style_function=lambda x: {'color': '#fc8d62', 'weight': 12}).add_to(chosen_path_fg)
     folium.GeoJson(shortest_path_geojson, name='Shortest Path',
                    style_function=lambda x: {'color': '#66c2a5', 'weight': 8}).add_to(shortest_path_fg)
     folium.GeoJson(intersection_geojson, name='Buffer Intersection',
                    style_function=lambda x: {'color':"gray",'fillColor':"#ffff00","fillOpacity": 0.75}).add_to(intersection_fg)
     folium.GeoJson(modeled_path_geojson, name='Modeled Path',
                    style_function=lambda x: {'color': '#8da0cb','weight': 8}).add_to(modeled_path_fg)

     # Add FeatureGroups to the map
     chosen_path_fg.add_to(mymap)
     shortest_path_fg.add_to(mymap)
     intersection_fg.add_to(mymap)
     modeled_path_fg.add_to(mymap)

     # Add start and end points with play and stop buttons
     start_icon = folium.Icon(color='green',icon='play',prefix='fa')
     end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
     folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
     folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

     # Add layer control to toggle layers on/off
     folium.LayerControl().add_to(mymap)

     #retrive overlap
     # exact_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_exact_overlap_prop'].item()
     # buffer_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_buffer_overlap'].item()
     row = gdf.loc[gdf['tripid']==tripid].squeeze()

     # Add legend with statistics
     #TODO what happened to duration
     legend_html = f'''
          <div style="position: fixed; 
                    bottom: 5px; left: 5px; width: 300px; height: 400px; 
                    border:2px solid grey; z-index:9999; font-size:14px;
                    background-color: white;
                    opacity: 0.9;">
          &nbsp; <b>Trip ID: {tripid}, User ID: {row['userid']}</b> <br>
          &nbsp; <b> Date: {row['starttime']} </b> <br>
          &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i>,
          End Point &nbsp; <i class="fa fa-stop" style="color:red"></i> <br>
          
          &nbsp; Chosen Path &nbsp; <div style="width: 20px; height: 5px; background-color: #fc8d62; display: inline-block;"></div> <br>
          &nbsp; Shortest Path &nbsp; <div style="width: 20px; height: 5px; background-color: #66c2a5; display: inline-block;"></div> <br>
          &nbsp; Modeled Path &nbsp; <div style="width: 20px; height: 5px; background-color: #8da0cb; display: inline-block;"></div> <br>
          &nbsp; Buffer Overlap &nbsp; <div style="width: 20px; height: 10px; background-color: #ffff00; display: inline-block;"></div> <br>

          &nbsp; Percent Detour: {row['percent_detour']:.0f}% <br>
          &nbsp; Shortest Path Overlap: {row['shortest_buffer_overlap']*100:.0f}% <br>
          &nbsp; Modeled Path Overlap: {row['overlap']*100:.0f}% <br>
          &nbsp; Trip Type: {row['trip_type']} <br>
          &nbsp; Length (mi): {row['length_ft']/5280:.0f} <br>
          &nbsp; Age: {row['age']} <br>
          &nbsp; Gender: {row['gender']} <br>
          &nbsp; Income: {row['income']} <br>
          &nbsp; Ethnicity: {row['ethnicity']} <br>
          &nbsp; Cycling Frequency: {row['cyclingfreq']} <br>
          &nbsp; Rider History: {row['rider_history']} <br>
          &nbsp; Rider Type: {row['rider_type']} <br><br>

          </div>
          '''
     mymap.get_root().html.add_child(folium.Element(legend_html))

     # Save the map to an HTML file or display it in a Jupyter notebook
     #mymap.save('map.html')
     # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
     return mymap  # Uncomment if you are using Jupyter notebook

     #TODO add in the legend with trip info and then we're golden


# Trial Calibration Run

# Excess Code

In [None]:
test_merge.tripid

In [None]:

#get geometry from edges
modeled_edges = links.set_index(['source','target']).loc[edge_list]

# modeled_edges = links.merge(linkids.loc[edge_list],on=['linkid','reverse_link'],how='inner')
# modeled_edges = gpd.GeoDataFrame(modeled_edges,geometry='geometry')

shortest_paths[(source,target)] = {
        'edges': set(modeled_edges['linkid'].tolist()),
        'geometry':MultiLineString(modeled_edges['geometry'].tolist()),#modeled_edges.dissolve()['geometry'].item(),
        'length':MultiLineString(modeled_edges['geometry'].tolist()).length
        }



In [None]:
#turn shortest paths dict to dataframe
shortest_paths = pd.DataFrame.from_dict(shortest_paths,orient='index')
shortest_paths.reset_index(inplace=True)
shortest_paths.columns = ['start','end','linkids','geometry','length']
#shortest_paths[['start','end']] = shortest_paths['index'].apply(lambda x: pd.Series(x))
#shortest_paths.drop(columns=['index'],inplace=True)

#add modeled paths to matched_traces dataframe
merged = matched_traces.merge(shortest_paths,on=['start','end'],suffixes=(None,'_modeled'))

In [None]:
def exact_overlap(modeled_linkids,chosen_linkids):

    
    
    sum_all = merged['length'].sum() * 5280
    all_overlap = 0

    for idx, row in merged.iterrows():
        #find shared edges
        chosen_and_shortest = row['linkids_modeled'] & row['linkids']
        #get the lengths of those links
        overlap_length = links.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
        #overlap_length = np.sum([link_lengths.get(link_tup,'error') for link_tup in chosen_and_shortest])
        all_overlap += overlap_length

    #calculate objective function value
    val = all_overlap / sum_all
    print('Exact overlap percent is:',np.round(val*100,1),'%')



if exact:
    

#calculate approximate overlap (new approach)
else:
    #buffer and dissolve generated route and matched route
    buffer_ft = 500

    merged.set_geometry('geometry',inplace=True)
    merged['buffered_geometry'] = merged.buffer(buffer_ft)
    merged.set_geometry('buffered_geometry',inplace=True)
    merged['area'] = merged.area

    merged.set_geometry('geometry_modeled',inplace=True)
    merged['buffered_geometry_modeled'] = merged.buffer(buffer_ft)
    merged.set_geometry('buffered_geometry_modeled',inplace=True)
    merged['area_modeled'] = merged.area

    #for each row find intersection between buffered features
    merged['intersection'] = merged.apply(lambda row: row['buffered_geometry'].intersection(row['buffered_geometry_modeled']), axis=1)

    # merged['intersection'] = merged.apply(
    #     lambda row: shapely.intersection(row['buffered_geometry'],row['buffered_geometry_modeled']))
    merged.set_geometry('intersection',inplace=True)
    merged['intersection_area'] = merged.area

    #find the overlap with the total area (not including intersections)
    #if the modeled/chosen links are different, then overlap decreases
    #punishes cirquitious modeled routes that utilize every link in the chosen one but include extraneous ones
    merged['overlap'] = merged['intersection_area'] / (merged['area_modeled'] + merged['area'] - merged['intersection_area'])

    #find average overlap (using median to reduce impact of outliers?)
    val = merged['overlap'].median()
    print('Median overlap percent is:',np.round(val*100,1),'%')

if follow_up:
    return merged

return -val#, merged

In [None]:
betas = [2,3,4,5,6]
#link_impedance_function(betas,beta_links,df_edges)
#(turn_impedance_function(betas,beta_turns,turns)['turn_cost'] > 0).sum()

In [None]:
kwargs = {
    'beta_links': beta_links,
    'beta_turns': beta_turns,
    'links': df_edges,
    'pseudo_links': turns,
    'turn_G': turn_G,
    'matched_traces': matched_traces,
    'link_impedance_function': link_impedance_function,
    'turn_impedance_function': turn_impedance_function,
    'exact': False,
    'follow_up': False
}
args = tuple(v for k, v in kwargs.items())
len(args)

In [None]:
bounds = [[0, 5] for _ in range(0, 5)]
bounds

In [None]:
import stochastic_optimization
from importlib import reload
reload(stochastic_optimization)

start = time.time()
# args = (df_edges,turns,turn_G,matched_traces,False)
x = minimize(stochastic_optimization.objective_function, bounds, args=args, method='pso', options={'maxiter':5})
end = time.time()
print(f'Took {(end-start)/60/60} hours')
#results[segment_filepath] = (x.x,x.fun)

In [None]:
turns

## Impedance Function 2
- Link Specific:
    - Average Grade (%grade)
    - Vehicle Seperation from OSM/ARC Inventory (1 = None, 2 = Bike Lane, 3 = MUP/Curb protected bike lanes)
    - Number of lanes from HERE ()
- Turn Specific
    - Unsignalized left/straight across roads with higher than tertiary classification (0 or 1)
    - Signalized intersection left/straight (0 or 1)

## Applying Link Costs
---
Dict keys must correspond to column names in links GeoDataFrame. Multiple dicts can be passed to turns the impacts of changing impedances. The links cost function is of this format:
$$ C_e = \frac{l_e*60^2}{s*5280} * (1-\sum \beta_i x_{i,e}) $$

where:
- $e$ is an edge/link in network graph $G$ with V vertices/nodes and E edges/links
- $l_e$ is the length of the link in feet
- $\beta$ is the impedance coefficient for attribute $i$
- $X_{i,e}$ is the value of attribute $i$ for link $e$
- $s$ is the assumed average speed of the cyclist in mph

Notes:
- Negative attributes **decrease** impedance  
- Positive attributes **increase** impedance
- **Negative link costs are not allowed**
- Time to traverse a link has already been calculated in the prepare_network function

In [None]:
# #%% prepare link dataframe
# links['bike'] = links['bl'] + links['pbl'] + links['mu']
# links['bike'] = links['bike'] >= 1

# cost_columns = ['linkid','bike','length_ft']#,'up-grade','down-grade','length_ft']
# df_edges = df_edges.merge(links[cost_columns],on='linkid')

# # df_edges['grade'] = np.nan
# # df_edges.loc[df_edges['reverse_link'],'grade'] = df_edges['down-grade']
# # df_edges.loc[~df_edges['reverse_link'],'grade'] = df_edges['up-grade']
# # #ignore downs
# # df_edges.loc[df_edges['grade']<0,'grade'] = 0
# # df_edges.drop(columns=['up-grade','down-grade','bearing'],inplace=True)

In [None]:
# #fix set
# import ast
# matched_traces['linkids'] = matched_traces['linkids'].apply(lambda x: eval(x))

In [None]:
#drop loops
matched_traces = matched_traces.loc[matched_traces['start']!=matched_traces['end']]

In [None]:
# with (fp / 'impedance_calibration.pkl').open('rb') as fh:
#     (df_edges,turns,turn_G) = pickle.load(fh)
args = (df_edges,turns,turn_G,matched_traces,False)

In [None]:
import stochastic_optimization
from importlib import reload
reload(stochastic_optimization)

In [None]:
# source = 68294161
# target = 2400730083

# turn_G, virtual_edges = modeling_turns.add_virtual_links(turns,turn_G,source,[target])   

# virtual_edges

# turn_G.out_edges(target)
# #turn_G.in_edges((5416154182, 2400730083))

# list(turn_G.in_edges(target))[0]

# test = nx.ego_graph(turn_G,source,4)
# test.edges()

# import networkx as nx
# test_target = (5318092552,5416166514)

# length, node_list = nx.single_source_dijkstra(turn_G,source,test_target,weight='weight')
# node_list

# turn_G = modeling_turns.remove_virtual_edges(turn_G,virtual_edges)

# import stochastic_optimization
# from importlib import reload
# reload(stochastic_optimization)
# reload(modeling_turns)

# betas = [1.14593853, 0.60739776]
# val, merged = stochastic_optimization.objective_function(betas,*args)

# merged[1].set_geometry('geometry_modeled').set_crs('epsg:2240').explore()

Need to re-create routes using the coefficients so we can do vizualization

In [None]:
import stochastic_optimization
from importlib import reload
reload(stochastic_optimization)

betas = np.array([0.09231109, 2.35131751])
args = (df_edges,turns,turn_G,matched_traces,False,True)
test = stochastic_optimization.objective_function(betas,*args)

In [None]:
#tripid = 891#30000
tripid = 7257#9806#891
mymap = visualize(test_merge,tripid)
mymap

In [None]:
# for linkid, start_node, end_node in tqdm(ods.itertuples(index=False),total=ods.shape[0]):
#     turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns,turn_G,[start_node],[end_node])
#     length, edge_list = nx.single_source_dijkstra(turn_G,source=start_node,target=end_node,weight='weight')
#     turn_G = modeling_turns.remove_virtual_links_new(turn_G,virtual_starts,virtual_ends)
# #takes 
# import networkx as nx
# import concurrent.futures

# results_dict = {}

# # Define a function to calculate Dijkstra's shortest paths for a single source
# def impedance_path(turns, turn_G, tripid, o, d):
#     turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns, turn_G, [o], [d])
#     length, edge_list = nx.single_source_dijkstra(turn_G, source=o, target=d, weight='weight')
#     turn_G = modeling_turns.remove_virtual_links_new(turn_G, virtual_starts, virtual_ends)
#     return tripid, {'length': length, 'edge_list': edge_list}

# # Define the number of concurrent workers based on your system's capability
# # You may need to experiment to find the optimal number
# num_workers = 4  # For example

# # Initialize ThreadPoolExecutor or ProcessPoolExecutor
# with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
#     # Submit the tasks for each origin destination pair
#     futures = [executor.submit(impedance_path, turns, turn_G, tripid, origin, destination) 
#                for tripid, origin, destination in ods.itertuples(index=False)]

#     # Wait for all tasks to complete
#     concurrent.futures.wait(futures)

#     # Retrieve results
#     for future in futures:
#         tripid, result_dict = future.result()
#         results_dict[tripid] = result_dict
# import concurrent.futures
# from tqdm import tqdm

# results_dict = {}

# # Define a function to calculate Dijkstra's shortest paths for a single source
# def impedance_path(turns, turn_G, tripid, o, d):
#     turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns, turn_G, [o], [d])
#     length, edge_list = nx.single_source_dijkstra(turn_G, source=o, target=d, weight='weight')
#     turn_G = modeling_turns.remove_virtual_links_new(turn_G, virtual_starts, virtual_ends)
#     results_dict[tripid] = {'length': length, 'edge_list': edge_list}
#     #return tripid, {'length': length, 'edge_list': edge_list}


# # Define the number of concurrent workers based on your system's capability
# # You may need to experiment to find the optimal number
# num_workers = 20  # For example

# # Initialize ThreadPoolExecutor or ProcessPoolExecutor
# with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
#     # Submit the tasks for each origin destination pair
#     futures = [executor.submit(impedance_path, turns, turn_G, tripid, origin, destination) 
#                for tripid, origin, destination in ods.itertuples(index=False)]
    
#     # Wrap the futures with tqdm to create a progress bar
#     with tqdm(total=len(futures)) as pbar:
#         for future in futures:
#             future.add_done_callback(lambda p: pbar.update())

#     # # Retrieve results
#     # for future in futures:
#     #     tripid, result_dict = future.result()
#     #     results_dict[tripid] = result_dict


# # Assuming you have a graph called G and a list of origins and destinations
# # Define your graph G and the list of origins and destinations here
# results_dict = {}

# # Define a function to calculate Dijkstra's shortest paths for a single source
# def impedance_path(turns,turn_G,tripid,o,d,results_dict):
#     turn_G, virtual_starts, virtual_ends = modeling_turns.add_virtual_links_new(turns,turn_G,[o],[d])
#     length, edge_list = nx.single_source_dijkstra(turn_G,source=o,target=d,weight='weight')
#     turn_G = modeling_turns.remove_virtual_links_new(turn_G,virtual_starts,virtual_ends)
#     return length, edge_list

# # Define the number of concurrent workers based on your system's capability
# # You may need to experiment to find the optimal number
# num_workers = 4  # For example

# # Initialize ThreadPoolExecutor or ProcessPoolExecutor
# with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
#     # Submit the tasks for each origin destination pair
#     futures = [executor.submit(impedance_path,[turns,turn_G,tripid,origin,destination,results_dict]) for tripid, origin, destination, in ods.itertuples(index=False)]

#     # Wait for all tasks to complete
#     concurrent.futures.wait(futures)

#     # Retrieve results
#     results = [future.result() for future in futures]

# # Process the results as needed
# for result in results:
#     # Do something with the result
#     print(result)

#Os = list(set([item['start_node'] for key, item in match_results.items()]))
#Ds = list(set([item['end_node'] for key, item in match_results.items()]))