# Preparing Matched Traces for Calibration
Before calibration, we need decide which traces made the cut using the match_ratio. Then we need to attach the start and end nodes and then examine how well the shortest path (with/without turns) explains the routing behavior to compare against the optimization.

In [1]:
import pickle
from pathlib import Path
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
from shapely.ops import MultiLineString
import networkx as nx

import folium
import geopandas as gpd
from folium.plugins import MarkerCluster, PolyLineTextPath
from folium.map import FeatureGroup

In [2]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
export_fp = Path(config['project_directory']) / 'Map_Matching'
if export_fp.exists() == False:
    export_fp.mkdir()

In [3]:
#file paths
matched_fp = Path(config['project_directory']) / "Map_Matching"
network_fp = Path(config['project_directory']) / "Network"
traces_fp = Path(config['project_directory']) / "CycleAtlanta"
export_fp = Path(config['project_directory']) / "Calibration"

# Import network

In [4]:
directed_edges = pd.read_parquet(network_fp/'directed_edges.parquet')
edges_w_attr = gpd.read_file(network_fp/'final_network.gpkg',layer='edges')

In [5]:
edges_w_attr.columns

Index(['A', 'B', 'linkid', 'link_type', 'osmid', 'timestamp', 'version',
       'type', 'highway', 'oneway', 'name', 'bridge', 'tunnel', 'cycleway',
       'service', 'footway', 'sidewalk', 'bicycle', 'foot', 'access', 'area',
       'all_tags', 'geom_type', 'facility_fwd', 'facility_rev', 'year', 'lts',
       'reverse_geometry', 'ascent_m', 'ascent_grade_%', 'descent_m',
       'descent_grade_%', 'length_ft', 'geometry'],
      dtype='object')

In [6]:
#add highway into turns (need this for later steps)
link_types_allowed = ['bike','pedestrian','road']
edges_w_attr = edges_w_attr[edges_w_attr['link_type'].isin(link_types_allowed)]
directed_edges = directed_edges[directed_edges['linkid'].isin(edges_w_attr['linkid'])]

In [7]:
directed_edges = directed_edges.merge(edges_w_attr[['linkid','oneway','length_ft']],on='linkid',how='left')
del edges_w_attr

In [8]:
wrongway = (directed_edges['reverse_link'] & directed_edges['oneway']) == False
directed_edges = directed_edges[wrongway]

Index(['source', 'target', 'reverse_link', 'azimuth', 'linkid', 'osmid',
       'link_type', 'name', 'oneway', 'length_ft', 'bridge', 'tunnel',
       'link_type', 'highway', 'speedlimit_range_mph', 'lanes_per_direction',
       'bike_facility_type', 'ascent_m', 'ascent_grade', '(0,2]_ascent',
       '(2,4]_ascent', '(4,6]_ascent', '(6,10]_ascent', '(10,15]_ascent',
       '(15,inf]_ascent'],

In [9]:
# with (network_fp.parent / 'chosen.pkl').open('rb') as fh:
#     df_edges,pseudo_df,pseudo_G = pickle.load(fh)
# df_edges.columns
# #recalculate length and add to network
# fp = Path.home() / "Documents/BikewaySimData/Projects/gdot"
# edges = gpd.read_file(fp/'networks/elevation_added.gpkg',layer="links")
# edges.to_crs('epsg:2240',inplace=True)
# geo_dict = dict(zip(edges['linkid'],edges['geometry']))
# length_dict = dict(zip(edges['linkid'],edges.length))

# df_edges['length_ft'] = df_edges['linkid'].map(length_dict)

# # df_edges = df_edges.merge(edges[['linkid','geometry']],on=['linkid'])
# # df_edges = gpd.GeoDataFrame(df_edges,geometry='geometry',crs=edges.crs)
# # df_edges = df_edges.loc[:,~df_edges.columns.duplicated()].copy()
# # df_edges.reset_index(drop=True,inplace=True)
# # df_edges.columns

In [10]:
# shortest path, so only retain shortest edge when there's a multiedge
keep_min = directed_edges.groupby(['source','target'])['length_ft'].idxmin()
no_multi_edges = directed_edges.loc[keep_min]

#create directed graph
G = nx.DiGraph()
G.add_weighted_edges_from(list(zip(no_multi_edges['source'],no_multi_edges['target'],no_multi_edges['length_ft'].round(2))),weight='length_ft')

## import matched traces


In [11]:
with (matched_fp /'matched_0.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

In [12]:
match_ratios = {tripid:item['match_ratio'] for tripid, item in match_dict.items() if isinstance(item,str)==False}
match_ratios = pd.Series(match_ratios)
print((match_ratios > 0.90).sum(),'/',len(match_ratios))

1803 / 2407


In [13]:
above_threshold = match_ratios[match_ratios > 0.90].index.tolist()

In [14]:
match_dict = {key:item for key, item in match_dict.items() if key in above_threshold}

In [15]:
# with (network_fp/'matched_trips_df.pkl').open('rb') as fh:
#     trips_df_export = pickle.load(fh)

In [16]:
# match_dict = {key:item for key, item in match_dict.items() if key in set(trips_df_export['tripid'].tolist())}

In [17]:
# ratio_threshold = 0.75 # 75% of the points in the trace were matched
# total_matches = len(match_dict.keys())
# match_dict = {key:item for key, item in match_dict.items() if item['match_ratio'] >= ratio_threshold}
# print(len(match_dict.keys()),'/',total_matches,'trips had at least',ratio_threshold*100,'% of coordinates matched')

In [18]:
# import random

# def get_random_key(dictionary):
#     random_key =  random.choice(list(dictionary.keys()))
#     #recursion?
#     if isinstance(dictionary.get(random_key),str):
#         random_key = get_random_key(dictionary)
#     return random_key

# match_dict[get_random_key(match_dict)].keys()


# Calculate Shortest Path

In [19]:
match_results = {}
#shortest_results = {}
failed_shortest_path = []

for tripid, items in tqdm(match_dict.items()):
# tripid = 32773
# items = match_dict[tripid]

    #failed matches will be str type
    if isinstance(items,str):
        continue

    #get start and end linkid
    start = match_dict[tripid]['edges'].iloc[0,:]
    end = match_dict[tripid]['edges'].iloc[-1,:]

    #get start and end node for shortest and impedance routing
    start_a_b = directed_edges.loc[(directed_edges['linkid']==start['linkid']) & (directed_edges['reverse_link']==start['reverse_link']),['source','target']]
    end_a_b = directed_edges.loc[(directed_edges['linkid']==end['linkid']) & (directed_edges['reverse_link']==end['reverse_link']),['source','target']]

    if start['reverse_link']:
        start = start_a_b['source'].item()
    else:
        start = start_a_b['target'].item()

    if end['reverse_link']:
        end = end_a_b['target'].item()
    else:
        end = end_a_b['source'].item()
    
    #find shortest path
    impedance, path = nx.single_source_dijkstra(G,start,end,weight="length_ft")

    if len(path) < 2:
        failed_shortest_path.append(tripid)
        continue

    #turn to edge list
    edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
    edge_df = pd.DataFrame(edge_list,columns=['source','target'])

    #convert to linkid and reverse link column to match format
    edge_df = pd.merge(edge_df,no_multi_edges[['source','target','linkid','reverse_link']],on=['source','target'])[['linkid','reverse_link']]
    #shortest_results[tripid] = edge_df

    match_results[tripid] = {
    'origin_node': start,
    'destination_node': end,
    'trip_start_time': items['trace'].iloc[0,2].year,
    'match_ratio': items['match_ratio'],
    'matched_edges': match_dict[tripid]['edges'],
    'shortest_edges': edge_df
    }


  0%|          | 0/1803 [00:00<?, ?it/s]

100%|██████████| 1803/1803 [02:15<00:00, 13.29it/s]


In [20]:
trip_ods = pd.DataFrame.from_dict(match_results,orient='index')
trip_ods.reset_index(inplace=True)
trip_ods.rename(columns={'index':'tripid'},inplace=True)

In [21]:
trip_ods

Unnamed: 0,tripid,origin_node,destination_node,trip_start_time,match_ratio,matched_edges,shortest_edges
0,71,3742629144,69229262,2012,1.0,linkid reverse_link 0 1.125732e...,linkid reverse_link 0 1125732394 ...
1,80,3486712871,3486632542,2012,1.0,linkid reverse_link 0 1.125725e+0...,linkid reverse_link 0 1125725087 ...
2,104,5411893060,69147502,2012,1.0,linkid reverse_link 0 1.125731e...,linkid reverse_link 0 1125731425 ...
3,107,5558883423,6702910685,2012,1.0,linkid reverse_link 0 1.125617e...,linkid reverse_link 0 1125616876 ...
4,110,1998904177,69536974,2012,1.0,linkid reverse_link 0 1.125731e...,linkid reverse_link 0 1125731262 ...
...,...,...,...,...,...,...,...
1798,34187,69282165,69463289,2016,1.0,linkid reverse_link 0 1.125734e...,linkid reverse_link 0 1125733784 ...
1799,34289,5674541876,9656766356,2016,1.0,linkid reverse_link 0 1.125688e+0...,linkid reverse_link 0 1125687766 ...
1800,34313,7495990151,7865851372,2016,1.0,linkid reverse_link 0 1.125681e+0...,linkid reverse_link 0 1125680907 ...
1801,34340,68228587,68382805,2016,1.0,linkid reverse_link 0 1.125694e+0...,linkid reverse_link 0 1125694134 ...


In [22]:
print('Could not find shortest path for',len(failed_shortest_path),'trips')

Could not find shortest path for 0 trips


# Process match results
- Summurize the successful matches (use match_ratio parameter to refine) in a dataframe with columns for tripid, starting network node, ending network node, and the match ratio.
- Find the shortest path (using link distance) between the two network nodes and caluclate similarity metrics.

In [23]:
trip_ods

Unnamed: 0,tripid,origin_node,destination_node,trip_start_time,match_ratio,matched_edges,shortest_edges
0,71,3742629144,69229262,2012,1.0,linkid reverse_link 0 1.125732e...,linkid reverse_link 0 1125732394 ...
1,80,3486712871,3486632542,2012,1.0,linkid reverse_link 0 1.125725e+0...,linkid reverse_link 0 1125725087 ...
2,104,5411893060,69147502,2012,1.0,linkid reverse_link 0 1.125731e...,linkid reverse_link 0 1125731425 ...
3,107,5558883423,6702910685,2012,1.0,linkid reverse_link 0 1.125617e...,linkid reverse_link 0 1125616876 ...
4,110,1998904177,69536974,2012,1.0,linkid reverse_link 0 1.125731e...,linkid reverse_link 0 1125731262 ...
...,...,...,...,...,...,...,...
1798,34187,69282165,69463289,2016,1.0,linkid reverse_link 0 1.125734e...,linkid reverse_link 0 1125733784 ...
1799,34289,5674541876,9656766356,2016,1.0,linkid reverse_link 0 1.125688e+0...,linkid reverse_link 0 1125687766 ...
1800,34313,7495990151,7865851372,2016,1.0,linkid reverse_link 0 1.125681e+0...,linkid reverse_link 0 1125680907 ...
1801,34340,68228587,68382805,2016,1.0,linkid reverse_link 0 1.125694e+0...,linkid reverse_link 0 1125694134 ...


In [24]:
#export for impedance calibration
with (export_fp/'ready_for_calibration.pkl').open('wb') as fh:
    pickle.dump(match_results,fh)

# Move on after this

# Comparison Metrics
Experiment with various ways of comparing the matched route to the shortest route here.

In [25]:

#     # #euclidean distance between points
#     # snode = nodes.loc[nodes['N']==start,'geometry'].item()
#     # dnode = nodes.loc[nodes['N']==end,'geometry'].item()
    
#     # #add geo features
#     # edge_geo = pd.merge(match_dict[tripid]['edges'],edges[['linkid','geometry']],on=['linkid'],how='left')
#     # edge_geo = gpd.GeoDataFrame(edge_geo,geometry='geometry')
#     # edge_geo_dissolved = MultiLineString(edge_geo['geometry'].tolist())
#     # linkids = set(edge_geo['linkid'].tolist())

    
    
    

#     #TODO use .array version to get rid of errors

#     forward = pd.merge(edge_df,edges[['source','target','linkid','geometry']],on=['source','target'])[['linkid','geometry']]
#     reverse = pd.merge(edge_df,edges[['source','target','linkid','geometry']],left_on=['target','source'],right_on=['source','target'])[['linkid','geometry']]
#     shortest_path = pd.concat([forward,reverse],ignore_index=True)
#     shortest_linkids = set(shortest_path['linkid'].tolist())
#     shortest_geo = gpd.GeoDataFrame(shortest_path)
#     shortest_geo_dissolved = MultiLineString(shortest_geo['geometry'].tolist())

#     #exact overlap
#     chosen_and_shortest = linkids & shortest_linkids
#     overlap_length = edges.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
#     exact_overlap = overlap_length / edge_geo.length.sum()

#     #buffer overlap
#     buffer_ft = 500
#     chosen = edge_geo_dissolved.buffer(buffer_ft)
#     shortest = shortest_geo_dissolved.buffer(buffer_ft)
#     intersection = chosen.intersection(shortest)
#     buffer_overlap = intersection.area / (chosen.area + shortest.area - intersection.area)

#     #collapse to multilinestring with length
#     #add length
#     matched_trips[tripid] = {'start':start,
#                             'end':end,
#                             'start_end_dist_ft': snode.distance(dnode),
#                             'match_ratio': match_dict[tripid]['match_ratio'], 
#                             'linkids':str(linkids),
#                             'geometry':edge_geo_dissolved,
#                             'length_ft':edge_geo.length.sum(),
#                             'shortest_length_ft': impedance,
#                             'shortest_linkids': shortest_linkids,
#                             'shortest_geo': shortest_geo_dissolved,
#                             'shortest_exact_overlap_length': overlap_length,
#                             'shortest_exact_overlap_prop': exact_overlap,
#                             'shortest_buffer_overlap': buffer_overlap,
#                             'shortest_intersect_geo': intersection
#                             }
# # matched_trips = {}
# # loop_trips = []

# # for tripid,items in tqdm(match_dict.items()):

# #     #failed matches will be str type
# #     if isinstance(items,dict):

# #         #get start and end linkid
# #         start = match_dict[tripid]['edges'].iloc[0,:]
# #         end = match_dict[tripid]['edges'].iloc[-1,:]
        
# #         #get start and end node
# #         start_a_b = edges.loc[(edges['linkid']==start['linkid']) & (edges['reverse_link']==start['reverse_link']),['source','target']]
# #         end_a_b = edges.loc[(edges['linkid']==end['linkid']) & (edges['reverse_link']==end['reverse_link']),['source','target']]

# #         if start['reverse_link']:
# #             start = start_a_b['source'].item()
# #         else:
# #             start = start_a_b['target'].item()

# #         if end['reverse_link']:
# #             end = end_a_b['target'].item()
# #         else:
# #             end = end_a_b['source'].item()

# #         #euclidean distance between points
# #         snode = nodes.loc[nodes['N']==start,'geometry'].item()
# #         dnode = nodes.loc[nodes['N']==end,'geometry'].item()
        
# #         #add geo features
# #         edge_geo = pd.merge(match_dict[tripid]['edges'],edges[['linkid','geometry']],on=['linkid'],how='left')
# #         edge_geo = gpd.GeoDataFrame(edge_geo,geometry='geometry')
# #         edge_geo_dissolved = MultiLineString(edge_geo['geometry'].tolist())
# #         linkids = set(edge_geo['linkid'].tolist())

# #         #shortest path routing here
# #         impedance, path = nx.single_source_dijkstra(MDG,start,end,weight="length_ft")
        
# #         if len(path) < 2:
# #             loop_trips.append(tripid)
# #             continue

# #         #turn to edge list
# #         edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
# #         edge_df = pd.DataFrame(edge_list,columns=['source','target'])

# #         #TODO use .array version to get rid of errors

# #         forward = pd.merge(edge_df,edges[['source','target','linkid','geometry']],on=['source','target'])[['linkid','geometry']]
# #         reverse = pd.merge(edge_df,edges[['source','target','linkid','geometry']],left_on=['target','source'],right_on=['source','target'])[['linkid','geometry']]
# #         shortest_path = pd.concat([forward,reverse],ignore_index=True)
# #         shortest_linkids = set(shortest_path['linkid'].tolist())
# #         shortest_geo = gpd.GeoDataFrame(shortest_path)
# #         shortest_geo_dissolved = MultiLineString(shortest_geo['geometry'].tolist())

# #         #exact overlap
# #         chosen_and_shortest = linkids & shortest_linkids
# #         overlap_length = edges.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
# #         exact_overlap = overlap_length / edge_geo.length.sum()

# #         #buffer overlap
# #         buffer_ft = 500
# #         chosen = edge_geo_dissolved.buffer(buffer_ft)
# #         shortest = shortest_geo_dissolved.buffer(buffer_ft)
# #         intersection = chosen.intersection(shortest)
# #         buffer_overlap = intersection.area / (chosen.area + shortest.area - intersection.area)

# #         #collapse to multilinestring with length
# #         #add length
# #         matched_trips[tripid] = {'start':start,
# #                               'end':end,
# #                               'start_end_dist_ft': snode.distance(dnode),
# #                               'match_ratio': match_dict[tripid]['match_ratio'], 
# #                               'linkids':str(linkids),
# #                               'geometry':edge_geo_dissolved,
# #                               'length_ft':edge_geo.length.sum(),
# #                               'shortest_length_ft': impedance,
# #                               'shortest_linkids': shortest_linkids,
# #                               'shortest_geo': shortest_geo_dissolved,
# #                               'shortest_exact_overlap_length': overlap_length,
# #                               'shortest_exact_overlap_prop': exact_overlap,
# #                               'shortest_buffer_overlap': buffer_overlap,
# #                               'shortest_intersect_geo': intersection
# #                               }
# Want to display when a trip goes through a signalized intersection and also how many times they do it. Need to take the list of edges from the matched_traces_dict and contruct a list of turns from it. This list of turns can then be used to get the right node ids. Later turn this into a function.
# tripid = 4100
# edges = match_dict[tripid]['edges']

# # make list of edges and turns
# list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
# df_edges['tup'] = list(zip(df_edges['linkid'],df_edges['reverse_link']))
# chosen_links = df_edges.set_index('tup').loc[list_of_edges]
# list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
# df_of_turns = pd.DataFrame(list_of_turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])
# df_of_turns
# subset = pseudo_df.merge(df_of_turns,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

# # get list of nodes
# signals = subset.loc[subset['signalized']==True,'source_B'].value_counts()
# two_way_stops = subset.loc[subset['unsignalized']==True,'source_B'].value_counts()

# #get node coordinates
# #nodes.merge(signals,left_on='N',right_index=True)

# test = nodes.merge(signals,left_on='N',right_index=True)
# test.columns = ['N','geometry','num_times']


# # now value counts 
# #two_way_stops.value_counts().head(20)
# ## Use linkids to add network summaries 

# #turn into dataframe
# df = pd.DataFrame.from_dict(matched_trips,orient='index')
# #into geodataframe
# gdf = gpd.GeoDataFrame(df,geometry='geometry',crs='epsg:2240')

# gdf.reset_index(inplace=True)
# gdf.rename(columns={'index':'tripid'},inplace=True)

# test_merge = pd.read_csv(network_fp.parent/'all_attrs.csv')
# prev = gdf.copy()
# gdf = gdf.merge(test_merge,on='tripid')
# gdf
# def visualize(tripid,gdf,nodes):

#    '''
#    This function displays the matched vs shortest route for a particular trip
#    It also displays the trip characteristics side be side and plots the any signalized
#    intersections and stressful turns passed through.
#    '''

#    #gdf contains all the trips and the trip gemometries as mutlilinestrings
#    gdf = gdf.copy()

#    # Your GeoDataFrames
#    chosen_path = gdf.loc[gdf['tripid']==tripid,['tripid','geometry']]
#    shortest_path = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_geo']].set_geometry('shortest_geo').set_crs(gdf.crs)
#    intersection = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_intersect_geo']].set_geometry('shortest_intersect_geo').set_crs(gdf.crs)

#    #from these we want to get the locations and number of singalized intersections and stressful crossing passed through
#    edges = match_dict[tripid]['edges']
#    list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
#    list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
#    df_of_turns = pd.DataFrame(list_of_turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])
#    subset = pseudo_df.merge(df_of_turns,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

#    # from this subset we can get the right node ids
#    #TODO turns should be by edges probably?
#    #turns = subset[['source_B','turn_type']]
#    signals = subset.loc[subset['signalized']==True,'source_B'].value_counts()
#    two_way_stops = subset.loc[subset['unsignalized']==True,'source_B'].value_counts()

#    #and then get the correct rows of the gdf
#    #turns = nodes.merge(signals,left_on='N',right_on='')
#    signals = nodes.merge(signals,left_on='N',right_index=True)
#    signals.columns = ['N','geometry','num_times']
#    two_way_stops = nodes.merge(two_way_stops,left_on='N',right_index=True)
#    two_way_stops.columns = ['N','geometry','num_times']

#    # get the start and end point for plotting
#    start_N = gdf.loc[gdf['tripid']==tripid,'start'].item()
#    start_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==start_N,'geometry'].item()
#    end_N = gdf.loc[gdf['tripid']==tripid,'end'].item()
#    end_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==end_N,'geometry'].item()

#    # Create a Folium map centered around the mean of the chosen route
#    x_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.x
#    y_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.y
#    center = [y_mean,x_mean]
#    mymap = folium.Map(location=center, zoom_start=14)

#    # Convert GeoDataFrames to GeoJSON
#    chosen_path_geojson = chosen_path.to_crs(epsg='4326').to_json()
#    shortest_path_geojson = shortest_path.to_crs(epsg='4326').to_json()
#    intersection_geojson = intersection.to_crs(epsg='4326').to_json()

#    # Create FeatureGroups for each GeoDataFrame
#    chosen_path_fg = FeatureGroup(name='Chosen Path')
#    shortest_path_fg = FeatureGroup(name='Shortest Path',show=False)
#    intersection_fg = FeatureGroup(name='Buffer Intersection',show=False)

#    # Add GeoJSON data to FeatureGroups
#    folium.GeoJson(chosen_path_geojson, name='Chosen Path', style_function=lambda x: {'color': 'red'}).add_to(chosen_path_fg)
#    folium.GeoJson(shortest_path_geojson, name='Shortest Path', style_function=lambda x: {'color': 'blue'}).add_to(shortest_path_fg)
#    folium.GeoJson(intersection_geojson, name='Buffer Intersection', style_function=lambda x: {'color': 'yellow'}).add_to(intersection_fg)

#    # Add FeatureGroups to the map
#    chosen_path_fg.add_to(mymap)
#    shortest_path_fg.add_to(mymap)
#    intersection_fg.add_to(mymap)

#    if signals.shape[0] > 0:
#       signals_geojson = signals.to_crs(epsg='4326').to_json()
#       signals_fg = FeatureGroup(name='Signals')

#       folium.GeoJson(
#       signals_geojson,
#       name="Traffic Signal Turn Movement",
#       marker=folium.Circle(radius=20, fill_color="red", fill_opacity=.5, color="black", weight=1),
#       tooltip=folium.GeoJsonTooltip(fields=['N','num_times']),
#       popup=folium.GeoJsonPopup(fields=['N','num_times']),
#       #    style_function= lambda feature: {
#       #        'fillColor': colormap(feature['properties']['speed_mph']),
#       #    },
#       highlight_function=lambda feature: {"color":"yellow","weight":3}
#       ).add_to(signals_fg)
#       signals_fg.add_to(mymap)

#    if two_way_stops.shape[0] > 0:
#       two_way_stops_geojson = two_way_stops.to_crs(epsg='4326').to_json()
#       two_way_stops_fg = FeatureGroup(name='Two Way Stop (chosen)')

#       folium.GeoJson(
#       two_way_stops_geojson,
#       name="Two Way Stop with High Stress Cross Street",
#       marker=folium.Circle(radius=20, fill_color="yellow", fill_opacity=.5, color="black", weight=1),
#       tooltip=folium.GeoJsonTooltip(fields=['N','num_times']),
#       popup=folium.GeoJsonPopup(fields=['N','num_times']),
#       #    style_function= lambda feature: {
#       #        'fillColor': colormap(feature['properties']['speed_mph']),
#       #    },
#       highlight_function=lambda feature: {"color":"yellow","weight":3}
#       ).add_to(two_way_stops_fg)

#       two_way_stops_fg.add_to(mymap)


#    # Add start and end points with play and stop buttons
#    start_icon = folium.Icon(color='green',icon='play',prefix='fa')
#    end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
#    folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
#    folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

#    #autofit content not in this version?
#    #folium.FitOverlays().add_to(mymap)

#    # Add layer control to toggle layers on/off
#    folium.LayerControl().add_to(mymap)

#    #retrive overlap
#    exact_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_exact_overlap_prop'].item()
#    buffer_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_buffer_overlap'].item()

#    attr = gdf.loc[gdf['tripid']==tripid].squeeze()

#    # Add legend with statistics
#    legend_html = f'''
#    <div style="position: fixed; 
#             bottom: 5px; left: 5px; width: 300px; height: 500px; 
#             border:2px solid grey; z-index:9999; font-size:14px;
#             background-color: white;
#             opacity: 0.9;">
#    &nbsp; <b>Tripid: {tripid}</b> <br>
#    &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i><br>
#    &nbsp; End Point &nbsp; <i class="fa fa-stop" style="color:red"></i><br>
#    &nbsp; Exact Overlap: {exact_overlap*100:.2f}% <br>
#    &nbsp; Buffer Overlap: {buffer_overlap*100:.2f}% <br>

#    &nbsp; Trip Type: {attr['trip_type']} <br>
#    &nbsp; Length (mi): {attr['length_ft']/5280:.0f} <br>
#    &nbsp; Age: {attr['age']} <br>
#    &nbsp; Gender: {attr['gender']} <br>
#    &nbsp; Income: {attr['income']} <br>
#    &nbsp; Ethnicity: {attr['ethnicity']} <br>
#    &nbsp; Cycling Frequency: {attr['cyclingfreq']} <br>
#    &nbsp; Rider History: {attr['rider_history']} <br>
#    &nbsp; Rider Type: {attr['rider_type']} <br><br>

#    &nbsp; Residential %: {attr['highway.residential']*100:.2f}% <br>
#    &nbsp; Secondary %: {attr['highway.secondary']*100:.2f}% <br>
#    &nbsp; Tertiary %: {attr['highway.tertiary']*100:.2f}% <br>

#    &nbsp; # of bridges: {int(attr['bridge'])} <br>
#    &nbsp; # of left turns: {int(attr['left'])} <br>
#    &nbsp; # of straight turns: {int(attr['straight'])} <br>
#    &nbsp; # of right turns: {int(attr['right'])} <br>
#    &nbsp; # of stressful turns: {int(attr['unsignalized'])} <br>
#    &nbsp; # of signalized turns: {int(attr['signalized'])} <br>

#    </div>
#    '''

#    mymap.get_root().html.add_child(folium.Element(legend_html))

#    # Save the map to an HTML file or display it in a Jupyter notebook
#    #mymap.save('map.html')
#    # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
#    return mymap  # Uncomment if you are using Jupyter notebook

#    #TODO add in the legend with trip info and then we're golden

# gdf
# examined = []
# #TODO add dots for signals and unsignalized
# #have slides on turns
# gdf.head()
# tripid = gdf['tripid'].sample(1).item()
# tripid = 2499
# examined.append(tripid)
# visualize(tripid,gdf,nodes)
# with (export_fp/'ready4calibration.pkl').open('wb') as fh:
#     pickle.dump(gdf,fh)
# with (export_fp/'ready4calibration.pkl').open('wb') as fh:
#     pickle.dump(gdf,fh)
# #viz version (used for optimization too)
