# Preparing Matched Traces for Calibration
- Determine which links should be included for routing (+ include all links found during map matching)
- 
Before calibration, we need decide which traces made the cut using the match_ratio. Then we need to attach the start and end nodes and then examine how well the shortest path (with/without turns) explains the routing behavior to compare against the optimization.

In [None]:
import pickle
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
from shapely.ops import MultiLineString, LineString
import geopandas as gpd

from bikewaysim.paths import config
from bikewaysim.impedance_calibration import speedfactor, stochastic_optimization
from bikewaysim import map_match
from bikewaysim.network import prepare_network, modeling_turns

# Filter the match map results

In [None]:
# #NOTE temporary for GDOT report, replace when done
# # with (config['matching_fp'] /'match_dict_full.pkl').open('rb') as fh:
# #     match_dict = pickle.load(fh)
# with (Path('D:\PROJECTS\GDOT\GDOT\Map_Matching\matched_0.pkl')).open('rb') as fh:
#     match_dict = pickle.load(fh)
    
# cutoff = 0.90 # set pct of points that need to be matched

# total = len(match_dict)
# match_ratios = {tripid:item['match_ratio'] for tripid, item in match_dict.items() if isinstance(item,str)==False}
# failed_matches = total - len(match_ratios)
# match_ratios = pd.Series(match_ratios)
# above_threshold = match_ratios[match_ratios > 0.90].index.tolist()
# match_dict = {key:item for key, item in match_dict.items() if key in above_threshold}
# below_threshold = total - failed_matches - len(above_threshold)

# print(len(match_dict),'/',total,'successful matches')
# print(failed_matches,'failed to match')
# print(below_threshold,'partial match')


In [None]:
# print the available match dicts
print([x.stem for x in config['matching_fp'].glob('match_dict_full_*.pkl')])

matching_index = 5

In [None]:
#NOTE temporary delete after GDOT
# gdot_trips = set(list(match_dict.keys()))
with (config['matching_fp'] / f'match_dict_full_{matching_index}.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

cutoff = 0.90 # set pct of points that need to be matched
above_threshold, below_threshold, failed_matches, match_ratios = map_match.mapmatch_results(match_dict,cutoff)
match_dict = {key:item for key,item in match_dict.items() if key in above_threshold}

#NOTE filter to just trips used for GDOT
# match_dict = {tripid:item for tripid,item in match_dict.items() if (tripid in gdot_trips) & (isinstance(item,str)==False)}

In [None]:
#get route attributes
trips = pd.read_pickle(config['cycleatl_fp']/'trips_4.pkl')
trips = trips.loc[trips['tripid'].isin(list(match_dict.keys()))]#,'userid'].nunique()
users = pd.read_pickle(config['cycleatl_fp']/'users_4.pkl')
users = users[users['userid'].isin(set(trips['userid'].tolist()))]
print(trips.shape[0],'trips')
print(users.shape[0],'users')

In [None]:
# get list of all links used in map matching to make sure these are kept in the calibration network
map_matching_links = set()
for tripid, items in match_dict.items():
    map_matching_links.update(set([tuple(x) for x in items['edges'].values]))

# Create calibration network
Create dummy variables and make any other changes that weren't done in the final network export step.

In [None]:
# links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)

In [None]:
directed_links = pd.read_parquet(config['network_fp']/'directed_edges.parquet') # has the directional variables
links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges') # has the non-directional variables
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
nodes = dict(zip(nodes['N'],nodes.geometry))
turns = pd.read_parquet(config['network_fp']/'turns_df.parquet')

In [None]:
#merge with links
link_cols_drop = ['A','B','ascent_ft','ascent_grade_cat','descent_ft','descent_grade_cat','facility_fwd','facility_rev']
links.drop(columns=link_cols_drop,inplace=True)
directed_cols_to_add = ['linkid','source','target','reverse_link','ascent_ft','ascent_grade_cat','facility_fwd']
links = pd.merge(links,directed_links[directed_cols_to_add],on='linkid')
# del directed_links
links.rename(columns={'source':'A','target':'B'},inplace=True)

In [None]:
# Remove wrongway
oneway_dict = dict(zip(links['linkid'],links['oneway']))
turns['source_oneway'] = turns['source_linkid'].map(oneway_dict)
turns['target_oneway'] = turns['target_linkid'].map(oneway_dict)
del oneway_dict

source_exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in turns[['source_linkid','source_reverse_link']].values]
target_exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in turns[['target_linkid','target_reverse_link']].values]
# source_wrongway = ((turns['source_oneway'] == True) & (turns['source_reverse_link'] == True)) == False
# target_wrongway = ((turns['target_oneway'] == True) & (turns['target_reverse_link'] == True)) == False
source_wrongway = ((turns[['source_oneway','source_reverse_link']] == True).all(axis=1)==False) | (source_exception)
target_wrongway = ((turns[['target_oneway','target_reverse_link']] == True).all(axis=1)==False) | (target_exception)
turns = turns[source_wrongway & target_wrongway]

#remove wrongway links
#TODO did we remove these in the export network step too?
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in links[['linkid','reverse_link']].values]
links = links.loc[((links[['oneway','reverse_link']]==True).all(axis=1) == False) | exception]

#TODO post GDOT
#add elevation adjusted travel times based on assumed speed on flat ground
# speedfactor.calculate_adjusted_speed(links,9)
assumed_speed_mph = 9
links['travel_time_min'] = links.length / 5280 / assumed_speed_mph * 60

In [None]:
#create dummy variables for modeling
links['2lpd'] = (links['lanes'] == 2).astype(int)
links['3+lpd'] = (links['lanes'] == 3).astype(int)
links['(30,40] mph'] = (links['speed']=='(30,40]').astype(int)
links['(40,inf) mph'] = (links['speed']=='(40,inf)').astype(int)
links['[4k,10k) aadt'] = (links['AADT']=='[4k,10k)').astype(int)
links['[10k,inf) aadt'] = (links['AADT']=='[10k,inf)').astype(int)
links['[4,6) grade'] = (links['ascent_grade_cat']=='[4,6)').astype(int)
links['[6,inf) grade'] = (links['ascent_grade_cat']=='[6,inf)').astype(int)
links['bike lane'] = links['facility_fwd'].isin(['bike lane','bufferred bike lane']).astype(int)
links['cycletrack'] = links['facility_fwd'].isin(['cycletrack']).astype(int)
links['multi use path'] = links['facility_fwd'].isin(['multi use path']).astype(int)

#condensed variables
links['(30,inf) mph'] = (links[['(30,40] mph','(40,inf) mph']] == 1).any(axis=1)
links['multi use path and cycletrack'] = (links[['cycletrack','multi use path']] == 1).any(axis=1)

#TODO add sidepath variables here

# the report variables
links0 = links.copy()
links0['multi use path'] = links0['facility_fwd'].isin(['multi use path','cycletrack']).astype(int)
links0['bike lane'] = links0['facility_fwd'].isin(['bike lane','bufferred bike lane']).astype(int)
links0.loc[(links0['multi use path']==True) | (links['link_type'].isin(['bike','pedestrian','sidewalk'])),'lanes'] = 0
links0
links0['above_4'] = links0['ascent_grade_cat'].isin(['[4,6)','[6,inf)'])
links0 = links0[['linkid','reverse_link','multi use path','bike lane','lanes','above_4']]
links = pd.merge(links,links0,suffixes=('','_original'),on=['linkid','reverse_link'])

#create layer of unsignalized crossings for examining
unsig_major_road_crossing = set(turns.loc[turns['unsig_major_road_crossing']==True,'source_B'].tolist())
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
nodes = nodes[nodes['N'].isin(unsig_major_road_crossing)]
nodes.to_file(config['calibration_fp']/'unsig_major_road_crossing.gpkg')

In [None]:
# only allow these types for routing
link_types_allowed = ['bike','pedestrian','road']
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in links[['linkid','reverse_link']].values]
links = links[links['link_type'].isin(link_types_allowed) | exception]


In [None]:
((links[['linkid','reverse_link']]==(35062.0,False)).all(axis=1)).sum()

In [None]:
before = links.copy()
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in before[['linkid','reverse_link']].values]

In [None]:
# remove isolated links
links, turns = prepare_network.remove_isolates(links,turns)

print(links.shape[0],'links and',len(set(links['A'].append(links['B']).tolist())),'nodes')
print(turns.shape[0],'turns')

#export calibration network
with (config['calibration_fp']/"calibration_network.pkl").open('wb') as fh:
    pickle.dump((links,turns),fh)

In [None]:
# Remove duplicate links and export


In [None]:
links.to_file(config['calibration_fp']/'calibration_network.gpkg',layer='final')

In [None]:
# without_isolates = set([tuple([x,y]) for x,y in links[['linkid','reverse_link']].values])
# missing = [tuple([x,y]) not in without_isolates for x,y in before[['linkid','reverse_link']].values]
# before[np.array(exception) & np.array(missing)].explore()#.to_file(Path.home()/'Downloads/')
# ((links[['linkid','reverse_link']]==(35062.0,False)).all(axis=1)).sum()

# Assemble the match data for shortest path routing and calibration

In [None]:
links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

In [None]:
links.set_index(['linkid','reverse_link'],inplace=True,drop=False)
match_results = {}
#shortest_results = {}
failed_shortest_path = []

for tripid, items in tqdm(match_dict.items()):

    #get start and end linkid
    start = tuple(match_dict[tripid]['edges'].iloc[0,:].values)
    end = tuple(match_dict[tripid]['edges'].iloc[-1,:].values)

    #get start and end node for shortest and impedance routing
    #TODO change this to be live so we don't run into errors when the matching network is different
    start = links.loc[start,'A']
    end = links.loc[end,'B']

    # start = (round(nodes.get(start).x,2),round(nodes.get(start).y,2))
    # end = (round(nodes.get(end).x,2),round(nodes.get(end).y,2))

    match_results[tripid] = {
    'origin_node': start,
    'destination_node': end,
    'trip_start_time': items['trace'].iloc[0,2].year,
    'match_ratio': items['match_ratio'],
    'matched_edges': match_dict[tripid]['edges'],
    'shortest_edges': pd.DataFrame(stochastic_optimization.impedance_path(turns,turn_G,links,start,end)['edge_list'],columns=['linkid','reverse_link'])
    }
# trip_ods = pd.DataFrame.from_dict(match_results,orient='index')
# trip_ods.reset_index(inplace=True)
# trip_ods.rename(columns={'index':'tripid'},inplace=True)
#export for impedance calibration
with (config['calibration_fp']/'ready_for_calibration.pkl').open('wb') as fh:
    pickle.dump(match_results,fh)
# links.reset_index(inplace=True)

In [None]:
tripid

In [None]:
match_dict[tripid]

In [None]:
start

In [None]:
# TODO add this to the export network section
# # add this for later
# link_types = dict(zip(links['linkid'],links['link_type']))
# turns['source_link_type'] = turns['source_linkid'].map(link_types)
# turns['target_link_type'] = turns['source_linkid'].map(link_types)

# # #unit conversions
# links['length_mi'] = (links['length_ft'] / 5280).round(2)
# links['ascent_ft'] = (links['ascent_m'] * 3.28084).round(0)
# #links.drop(columns=['length_ft','ascent_m'],inplace=True)

# #get node degree
# degree = links['A'].append(links['B']).value_counts()
# links['A_deg'] = links['A'].map(degree)
# links['B_deg'] = links['B'].map(degree)
# #remove excess dead end pedestrian links
# dead_ends = (links['link_type']=='pedestrian')&((links['A_deg']==1)|(links['B_deg']==1))
# links = links[dead_ends==False]
# #unique scenario but there's an expressway tag that needs to be removed
# import ast
# john_lewis_freedom_pkwy = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('expressway',0)=='yes')
# links = links[john_lewis_freedom_pkwy==False]
# surfaces = ['dirt','unpaved','gravel','fine_gravel','dirt/sand','ground']
# unpaved = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('surface',0) in surfaces)
# #links[unpaved].explore(tooltip=False)
# links = links[unpaved==False]
# #unpaved.unique()

# Move on after this

In [None]:

#     # #euclidean distance between points
#     # snode = nodes.loc[nodes['N']==start,'geometry'].item()
#     # dnode = nodes.loc[nodes['N']==end,'geometry'].item()
    
#     # #add geo features
#     # edge_geo = pd.merge(match_dict[tripid]['edges'],edges[['linkid','geometry']],on=['linkid'],how='left')
#     # edge_geo = gpd.GeoDataFrame(edge_geo,geometry='geometry')
#     # edge_geo_dissolved = MultiLineString(edge_geo['geometry'].tolist())
#     # linkids = set(edge_geo['linkid'].tolist())

    
    
    

#     #TODO use .array version to get rid of errors

#     forward = pd.merge(edge_df,edges[['source','target','linkid','geometry']],on=['source','target'])[['linkid','geometry']]
#     reverse = pd.merge(edge_df,edges[['source','target','linkid','geometry']],left_on=['target','source'],right_on=['source','target'])[['linkid','geometry']]
#     shortest_path = pd.concat([forward,reverse],ignore_index=True)
#     shortest_linkids = set(shortest_path['linkid'].tolist())
#     shortest_geo = gpd.GeoDataFrame(shortest_path)
#     shortest_geo_dissolved = MultiLineString(shortest_geo['geometry'].tolist())

#     #exact overlap
#     chosen_and_shortest = linkids & shortest_linkids
#     overlap_length = edges.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
#     exact_overlap = overlap_length / edge_geo.length.sum()

#     #buffer overlap
#     buffer_ft = 500
#     chosen = edge_geo_dissolved.buffer(buffer_ft)
#     shortest = shortest_geo_dissolved.buffer(buffer_ft)
#     intersection = chosen.intersection(shortest)
#     buffer_overlap = intersection.area / (chosen.area + shortest.area - intersection.area)

#     #collapse to multilinestring with length
#     #add length
#     matched_trips[tripid] = {'start':start,
#                             'end':end,
#                             'start_end_dist_ft': snode.distance(dnode),
#                             'match_ratio': match_dict[tripid]['match_ratio'], 
#                             'linkids':str(linkids),
#                             'geometry':edge_geo_dissolved,
#                             'length_ft':edge_geo.length.sum(),
#                             'shortest_length_ft': impedance,
#                             'shortest_linkids': shortest_linkids,
#                             'shortest_geo': shortest_geo_dissolved,
#                             'shortest_exact_overlap_length': overlap_length,
#                             'shortest_exact_overlap_prop': exact_overlap,
#                             'shortest_buffer_overlap': buffer_overlap,
#                             'shortest_intersect_geo': intersection
#                             }
# # matched_trips = {}
# # loop_trips = []

# # for tripid,items in tqdm(match_dict.items()):

# #     #failed matches will be str type
# #     if isinstance(items,dict):

# #         #get start and end linkid
# #         start = match_dict[tripid]['edges'].iloc[0,:]
# #         end = match_dict[tripid]['edges'].iloc[-1,:]
        
# #         #get start and end node
# #         start_a_b = edges.loc[(edges['linkid']==start['linkid']) & (edges['reverse_link']==start['reverse_link']),['source','target']]
# #         end_a_b = edges.loc[(edges['linkid']==end['linkid']) & (edges['reverse_link']==end['reverse_link']),['source','target']]

# #         if start['reverse_link']:
# #             start = start_a_b['source'].item()
# #         else:
# #             start = start_a_b['target'].item()

# #         if end['reverse_link']:
# #             end = end_a_b['target'].item()
# #         else:
# #             end = end_a_b['source'].item()

# #         #euclidean distance between points
# #         snode = nodes.loc[nodes['N']==start,'geometry'].item()
# #         dnode = nodes.loc[nodes['N']==end,'geometry'].item()
        
# #         #add geo features
# #         edge_geo = pd.merge(match_dict[tripid]['edges'],edges[['linkid','geometry']],on=['linkid'],how='left')
# #         edge_geo = gpd.GeoDataFrame(edge_geo,geometry='geometry')
# #         edge_geo_dissolved = MultiLineString(edge_geo['geometry'].tolist())
# #         linkids = set(edge_geo['linkid'].tolist())

# #         #shortest path routing here
# #         impedance, path = nx.single_source_dijkstra(MDG,start,end,weight="length_ft")
        
# #         if len(path) < 2:
# #             loop_trips.append(tripid)
# #             continue

# #         #turn to edge list
# #         edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
# #         edge_df = pd.DataFrame(edge_list,columns=['source','target'])

# #         #TODO use .array version to get rid of errors

# #         forward = pd.merge(edge_df,edges[['source','target','linkid','geometry']],on=['source','target'])[['linkid','geometry']]
# #         reverse = pd.merge(edge_df,edges[['source','target','linkid','geometry']],left_on=['target','source'],right_on=['source','target'])[['linkid','geometry']]
# #         shortest_path = pd.concat([forward,reverse],ignore_index=True)
# #         shortest_linkids = set(shortest_path['linkid'].tolist())
# #         shortest_geo = gpd.GeoDataFrame(shortest_path)
# #         shortest_geo_dissolved = MultiLineString(shortest_geo['geometry'].tolist())

# #         #exact overlap
# #         chosen_and_shortest = linkids & shortest_linkids
# #         overlap_length = edges.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
# #         exact_overlap = overlap_length / edge_geo.length.sum()

# #         #buffer overlap
# #         buffer_ft = 500
# #         chosen = edge_geo_dissolved.buffer(buffer_ft)
# #         shortest = shortest_geo_dissolved.buffer(buffer_ft)
# #         intersection = chosen.intersection(shortest)
# #         buffer_overlap = intersection.area / (chosen.area + shortest.area - intersection.area)

# #         #collapse to multilinestring with length
# #         #add length
# #         matched_trips[tripid] = {'start':start,
# #                               'end':end,
# #                               'start_end_dist_ft': snode.distance(dnode),
# #                               'match_ratio': match_dict[tripid]['match_ratio'], 
# #                               'linkids':str(linkids),
# #                               'geometry':edge_geo_dissolved,
# #                               'length_ft':edge_geo.length.sum(),
# #                               'shortest_length_ft': impedance,
# #                               'shortest_linkids': shortest_linkids,
# #                               'shortest_geo': shortest_geo_dissolved,
# #                               'shortest_exact_overlap_length': overlap_length,
# #                               'shortest_exact_overlap_prop': exact_overlap,
# #                               'shortest_buffer_overlap': buffer_overlap,
# #                               'shortest_intersect_geo': intersection
# #                               }
# Want to display when a trip goes through a signalized intersection and also how many times they do it. Need to take the list of edges from the matched_traces_dict and contruct a list of turns from it. This list of turns can then be used to get the right node ids. Later turn this into a function.
# tripid = 4100
# edges = match_dict[tripid]['edges']

# # make list of edges and turns
# list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
# df_edges['tup'] = list(zip(df_edges['linkid'],df_edges['reverse_link']))
# chosen_links = df_edges.set_index('tup').loc[list_of_edges]
# list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
# df_of_turns = pd.DataFrame(list_of_turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])
# df_of_turns
# subset = pseudo_df.merge(df_of_turns,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

# # get list of nodes
# signals = subset.loc[subset['signalized']==True,'source_B'].value_counts()
# two_way_stops = subset.loc[subset['unsignalized']==True,'source_B'].value_counts()

# #get node coordinates
# #nodes.merge(signals,left_on='N',right_index=True)

# test = nodes.merge(signals,left_on='N',right_index=True)
# test.columns = ['N','geometry','num_times']


# # now value counts 
# #two_way_stops.value_counts().head(20)
# ## Use linkids to add network summaries 

# #turn into dataframe
# df = pd.DataFrame.from_dict(matched_trips,orient='index')
# #into geodataframe
# gdf = gpd.GeoDataFrame(df,geometry='geometry',crs='epsg:2240')

# gdf.reset_index(inplace=True)
# gdf.rename(columns={'index':'tripid'},inplace=True)

# test_merge = pd.read_csv(config['network_fp'].parent/'all_attrs.csv')
# prev = gdf.copy()
# gdf = gdf.merge(test_merge,on='tripid')
# gdf
# def visualize(tripid,gdf,nodes):

#    '''
#    This function displays the matched vs shortest route for a particular trip
#    It also displays the trip characteristics side be side and plots the any signalized
#    intersections and stressful turns passed through.
#    '''

#    #gdf contains all the trips and the trip gemometries as mutlilinestrings
#    gdf = gdf.copy()

#    # Your GeoDataFrames
#    chosen_path = gdf.loc[gdf['tripid']==tripid,['tripid','geometry']]
#    shortest_path = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_geo']].set_geometry('shortest_geo').set_crs(gdf.crs)
#    intersection = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_intersect_geo']].set_geometry('shortest_intersect_geo').set_crs(gdf.crs)

#    #from these we want to get the locations and number of singalized intersections and stressful crossing passed through
#    edges = match_dict[tripid]['edges']
#    list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
#    list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
#    df_of_turns = pd.DataFrame(list_of_turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])
#    subset = pseudo_df.merge(df_of_turns,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

#    # from this subset we can get the right node ids
#    #TODO turns should be by edges probably?
#    #turns = subset[['source_B','turn_type']]
#    signals = subset.loc[subset['signalized']==True,'source_B'].value_counts()
#    two_way_stops = subset.loc[subset['unsignalized']==True,'source_B'].value_counts()

#    #and then get the correct rows of the gdf
#    #turns = nodes.merge(signals,left_on='N',right_on='')
#    signals = nodes.merge(signals,left_on='N',right_index=True)
#    signals.columns = ['N','geometry','num_times']
#    two_way_stops = nodes.merge(two_way_stops,left_on='N',right_index=True)
#    two_way_stops.columns = ['N','geometry','num_times']

#    # get the start and end point for plotting
#    start_N = gdf.loc[gdf['tripid']==tripid,'start'].item()
#    start_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==start_N,'geometry'].item()
#    end_N = gdf.loc[gdf['tripid']==tripid,'end'].item()
#    end_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==end_N,'geometry'].item()

#    # Create a Folium map centered around the mean of the chosen route
#    x_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.x
#    y_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.y
#    center = [y_mean,x_mean]
#    mymap = folium.Map(location=center, zoom_start=14)

#    # Convert GeoDataFrames to GeoJSON
#    chosen_path_geojson = chosen_path.to_crs(epsg='4326').to_json()
#    shortest_path_geojson = shortest_path.to_crs(epsg='4326').to_json()
#    intersection_geojson = intersection.to_crs(epsg='4326').to_json()

#    # Create FeatureGroups for each GeoDataFrame
#    chosen_path_fg = FeatureGroup(name='Chosen Path')
#    shortest_path_fg = FeatureGroup(name='Shortest Path',show=False)
#    intersection_fg = FeatureGroup(name='Buffer Intersection',show=False)

#    # Add GeoJSON data to FeatureGroups
#    folium.GeoJson(chosen_path_geojson, name='Chosen Path', style_function=lambda x: {'color': 'red'}).add_to(chosen_path_fg)
#    folium.GeoJson(shortest_path_geojson, name='Shortest Path', style_function=lambda x: {'color': 'blue'}).add_to(shortest_path_fg)
#    folium.GeoJson(intersection_geojson, name='Buffer Intersection', style_function=lambda x: {'color': 'yellow'}).add_to(intersection_fg)

#    # Add FeatureGroups to the map
#    chosen_path_fg.add_to(mymap)
#    shortest_path_fg.add_to(mymap)
#    intersection_fg.add_to(mymap)

#    if signals.shape[0] > 0:
#       signals_geojson = signals.to_crs(epsg='4326').to_json()
#       signals_fg = FeatureGroup(name='Signals')

#       folium.GeoJson(
#       signals_geojson,
#       name="Traffic Signal Turn Movement",
#       marker=folium.Circle(radius=20, fill_color="red", fill_opacity=.5, color="black", weight=1),
#       tooltip=folium.GeoJsonTooltip(fields=['N','num_times']),
#       popup=folium.GeoJsonPopup(fields=['N','num_times']),
#       #    style_function= lambda feature: {
#       #        'fillColor': colormap(feature['properties']['speed_mph']),
#       #    },
#       highlight_function=lambda feature: {"color":"yellow","weight":3}
#       ).add_to(signals_fg)
#       signals_fg.add_to(mymap)

#    if two_way_stops.shape[0] > 0:
#       two_way_stops_geojson = two_way_stops.to_crs(epsg='4326').to_json()
#       two_way_stops_fg = FeatureGroup(name='Two Way Stop (chosen)')

#       folium.GeoJson(
#       two_way_stops_geojson,
#       name="Two Way Stop with High Stress Cross Street",
#       marker=folium.Circle(radius=20, fill_color="yellow", fill_opacity=.5, color="black", weight=1),
#       tooltip=folium.GeoJsonTooltip(fields=['N','num_times']),
#       popup=folium.GeoJsonPopup(fields=['N','num_times']),
#       #    style_function= lambda feature: {
#       #        'fillColor': colormap(feature['properties']['speed_mph']),
#       #    },
#       highlight_function=lambda feature: {"color":"yellow","weight":3}
#       ).add_to(two_way_stops_fg)

#       two_way_stops_fg.add_to(mymap)


#    # Add start and end points with play and stop buttons
#    start_icon = folium.Icon(color='green',icon='play',prefix='fa')
#    end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
#    folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
#    folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

#    #autofit content not in this version?
#    #folium.FitOverlays().add_to(mymap)

#    # Add layer control to toggle layers on/off
#    folium.LayerControl().add_to(mymap)

#    #retrive overlap
#    exact_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_exact_overlap_prop'].item()
#    buffer_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_buffer_overlap'].item()

#    attr = gdf.loc[gdf['tripid']==tripid].squeeze()

#    # Add legend with statistics
#    legend_html = f'''
#    <div style="position: fixed; 
#             bottom: 5px; left: 5px; width: 300px; height: 500px; 
#             border:2px solid grey; z-index:9999; font-size:14px;
#             background-color: white;
#             opacity: 0.9;">
#    &nbsp; <b>Tripid: {tripid}</b> <br>
#    &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i><br>
#    &nbsp; End Point &nbsp; <i class="fa fa-stop" style="color:red"></i><br>
#    &nbsp; Exact Overlap: {exact_overlap*100:.2f}% <br>
#    &nbsp; Buffer Overlap: {buffer_overlap*100:.2f}% <br>

#    &nbsp; Trip Type: {attr['trip_type']} <br>
#    &nbsp; Length (mi): {attr['length_ft']/5280:.0f} <br>
#    &nbsp; Age: {attr['age']} <br>
#    &nbsp; Gender: {attr['gender']} <br>
#    &nbsp; Income: {attr['income']} <br>
#    &nbsp; Ethnicity: {attr['ethnicity']} <br>
#    &nbsp; Cycling Frequency: {attr['cyclingfreq']} <br>
#    &nbsp; Rider History: {attr['rider_history']} <br>
#    &nbsp; Rider Type: {attr['rider_type']} <br><br>

#    &nbsp; Residential %: {attr['highway.residential']*100:.2f}% <br>
#    &nbsp; Secondary %: {attr['highway.secondary']*100:.2f}% <br>
#    &nbsp; Tertiary %: {attr['highway.tertiary']*100:.2f}% <br>

#    &nbsp; # of bridges: {int(attr['bridge'])} <br>
#    &nbsp; # of left turns: {int(attr['left'])} <br>
#    &nbsp; # of straight turns: {int(attr['straight'])} <br>
#    &nbsp; # of right turns: {int(attr['right'])} <br>
#    &nbsp; # of stressful turns: {int(attr['unsignalized'])} <br>
#    &nbsp; # of signalized turns: {int(attr['signalized'])} <br>

#    </div>
#    '''

#    mymap.get_root().html.add_child(folium.Element(legend_html))

#    # Save the map to an HTML file or display it in a Jupyter notebook
#    #mymap.save('map.html')
#    # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
#    return mymap  # Uncomment if you are using Jupyter notebook

#    #TODO add in the legend with trip info and then we're golden

# gdf
# examined = []
# #TODO add dots for signals and unsignalized
# #have slides on turns
# gdf.head()
# tripid = gdf['tripid'].sample(1).item()
# tripid = 2499
# examined.append(tripid)
# visualize(tripid,gdf,nodes)
# with (export_fp/'ready4calibration.pkl').open('wb') as fh:
#     pickle.dump(gdf,fh)
# with (export_fp/'ready4calibration.pkl').open('wb') as fh:
#     pickle.dump(gdf,fh)
# #viz version (used for optimization too)
