# Map Matching

## TODO (in order of priority):
1. Make sure that the cleanup code works (come back to this)
    1. Want to get rid of the really small out and backs
    1. Have method for doing this, but worried that it will remove valid out and backing
    1. Frechet distance could be a way to tell if the cleaning removes too much out and backing? Or some sort of overlap metric?
1. Use geo_dict to assemble edge for calculated frechet distance
    1. Frechet distance will give a decent measure of how close the match is to the trace
    1. Will need to reverse link geometry if passing the other way otherwise it won't work
1. Match all the traces and export for impedance calibration
    1. The process flow into this step is still uncertain


In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
#from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path

from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

import map_match

In [2]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
export_fp = Path(config['project_directory'])
if export_fp.exists() == False:
    export_fp.mkdir()

## Set up the filepaths

In [3]:
#file paths
#network_fp = Path(config['project_directory']) / 'Map_Matching/matching.gpkg'
network_fp = Path(config['project_directory']) / 'Network' 
traces_fp = Path(config['project_directory']) / 'CycleAtlanta'
export_fp = Path(config['project_directory']) / 'Map_Matching'

# Prepare Network
Bring in the pre-processed network and filter as needed

## Iterative Matching
Sometimes matching can be improved just by limiting what can be matched to. For this, we want to start by mapping to the most restrictive network. Then we add more links if a match hasn't been found.

1. Network without parking lot roads
1. Network without oneway restrictions
1. Full network

In [4]:
# with (network_fp / 'nodes.pkl').open('rb') as fh:
#     nodes = pickle.load(fh)
# # with (network_fp / 'directed_edges.pkl').open('rb') as fh:
# #     df_edges = pickle.load(fh)
# directed_links = pd.read_parquet(network_fp/'directed_edges.parquet')

In [5]:
df_edges = gpd.read_file(network_fp/'final_network.gpkg',layer='edges')
nodes = gpd.read_file(network_fp/'final_network.gpkg',layer='nodes')

Link Filtering

In [6]:
link_types_allowed = ['bike','pedestrian','road']
df_edges['link_type'].unique()

array(['road', 'service', 'parking_and_driveways', 'pedestrian', 'bike',
       'sidewalk_or_crossing'], dtype=object)

In [7]:
df_edges = df_edges[df_edges['link_type'].isin(link_types_allowed)]

In [8]:
df_edges.rename(columns={'A':'source','B':'target'},inplace=True)

In [9]:
#only allow roads + pedestrian + bike?

In [10]:
# df_edges = df_edges.merge(edges[['linkid','geometry']])
# df_edges = gpd.GeoDataFrame(df_edges,geometry='geometry',crs=edges.crs)

In [11]:
#filter network and remove isolates

# #filter using link type
# link_types = ['bike','road','pedestrian','service']
# filtered = df_edges[df_edges['link_type'].isin(link_types)]
# #create multigraph
# import networkx as nx
# MDG = nx.Graph()

# #remove isolates
# MDG.add_edges_from(list(zip(filtered['source'],filtered['target'])))
# largest_cc = max(nx.connected_components(MDG), key=len)

#subset 

#turn to multigraph, remove isolates turn back into link dataframe
#also remove nodes
# in_largest_cc = filtered['source'].isin(largest_cc) | filtered['target'].isin(largest_cc)
# filtered['isolate'] = False
# filtered.loc[~in_largest_cc,'isolate'] = True
# matching_network_links = filtered.loc[filtered['isolate']==False]

# node_filt = set(matching_network_links['source'].append(matching_network_links['target']).tolist())
# matching_network_nodes = nodes.loc[nodes['N'].isin(node_filt)]

In [12]:
# #import network
# edges = gpd.read_file(network_fp/'final_network.gpkg',layer="final_network")
# #edges.reset_index(inplace=True)
# #edges.rename(columns={'index':'linkid'},inplace=True)

#TODO next features
# need to add the ability to remove isolates when doing this so we don't have matches to links that don't go anywhere
# allow_wrongway = 
# link_types_to_include = ['road','bike','pedestrian]

# Breaking Apart Multi-Edges
Occasionally, there will be multiple edges between two nodes. A common place for this to occur are local roads connected to main roads that form u-shapes. Leuven map mapmatching cannot handle multi-edges as the only information stored in the network graph are the nodes. Usually, it's obvious which edge should be retained, but an easy way to still include both edges for map matching is to break these edges by thier centroid into smaller edges with new nodes. The below cell does this.

The centroid nodes created in this step are given a unique id. The map matching function removes these new nodes so that they don't appear in the results.

In [13]:
#TODO condense this into a funciton

#get max ids for adding the new midpoint nodes
max_nodeid = nodes['N'].max()
max_linkid = df_edges['linkid'].max()

# identify multi-edges so that we can break them apart (we already have duplicate edges)
df_sorted = df_edges.sort_values(by=['source','target'])
grouped_df = df_sorted.groupby(['source','target'])['linkid'].nunique().reset_index(name='num_linkid')
grouped_df = grouped_df[grouped_df['num_linkid']>1]
merged = pd.merge(df_sorted,grouped_df,on=['source','target'])
multi_edges = df_edges[df_edges['linkid'].isin(set(merged['linkid'].tolist()))]

#remove these multi-edges from the dataframe
non_multi_edges = df_edges[~df_edges['linkid'].isin(set(merged['linkid'].tolist()))]
new_links, new_nodes = map_match.explode_network_midpoint(multi_edges,max_nodeid,max_linkid)
exploded_nodes = pd.concat([nodes,new_nodes],ignore_index=True)
exploded_links = pd.concat([non_multi_edges,new_links],ignore_index=True)

In [14]:
#create map matching graph network (no wrongway)
map_con = map_match.make_network(exploded_links,exploded_nodes,False)

In [15]:
exploded_nodes.set_crs(config['projected_crs_epsg'],inplace=True)
exploded_links.set_crs(config['projected_crs_epsg'],inplace=True)

#optional inspect exploded network
#exploded_nodes.to_file(network_fp/'matching_network.gpkg',layer='exploded_nodes')
#exploded_links.to_file(network_fp/'matching_network.gpkg',layer='exploded_links')

Unnamed: 0,source,target,linkid,link_type,osmid,timestamp,version,type,highway,oneway,...,year,lts,reverse_geometry,ascent_m,ascent_grade_%,descent_m,descent_grade_%,length_ft,geometry,source_linkid
0,6710023987,69428484,1125582092,road,50346065.0,1.608717e+09,19.0,way,primary,False,...,,4.0,1,4.5,0.96,-2.1,-0.45,1527.030230,"LINESTRING (2199028.737 1318143.985, 2198937.4...",
1,6715039076,69359601,1125582111,road,630014235.0,1.565940e+09,2.0,way,residential,False,...,,2.0,1,0.6,0.73,-0.1,-0.17,256.320965,"LINESTRING (2199301.414 1317716.546, 2199244.0...",
2,6715039076,69359606,1125582112,road,630014235.0,1.565940e+09,2.0,way,residential,False,...,,2.0,0,0.0,0.00,-1.7,-1.08,253.291057,"LINESTRING (2199301.414 1317716.546, 2199345.5...",
3,8317885804,69302632,1125582121,road,9250903.0,1.610525e+09,3.0,way,service,False,...,,3.0,0,1.6,0.68,-1.9,-0.79,660.819821,"LINESTRING (2200337.281 1317974.107, 2200336.5...",
4,5426585244,5426567807,1125582134,road,143758462.0,1.636971e+09,11.0,way,secondary,False,...,,4.0,1,0.0,0.00,-0.1,-0.06,41.016447,"LINESTRING (2202935.350 1318104.648, 2202941.7...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79872,10291023734,3468992596,1125782140,,,,,,,False,...,,,,,,,,,"LINESTRING (2219574.311 1391641.987, 2219618.6...",1.125780e+09
79873,5422336613,10291023735,1125782141,,,,,,,False,...,,,,,,,,,"LINESTRING (2223868.217 1395025.098, 2223826.0...",1.125780e+09
79874,10291023735,69405652,1125782142,,,,,,,False,...,,,,,,,,,"LINESTRING (2223826.064 1395054.445, 2223858.1...",1.125780e+09
79875,5422336613,10291023736,1125782143,,,,,,,False,...,,,,,,,,,"LINESTRING (2223868.217 1395025.098, 2223895.7...",1.125780e+09


# Trace Data
For map matching, we're using GPS traces that have been processed so that each point is spaced a certain distance apart, coordinates in between this distance are dropped to improve computation time. 

In [16]:
#load all traces
with (traces_fp/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

In [17]:
with (traces_fp/'trips_3.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)

In [18]:
coords_dict = {key:item for key, item in coords_dict.items() if key in trips_df['tripid'].tolist()}

In [19]:
#import list of trips to include
len(coords_dict.keys())

2765

The matching setting dictionary stores all of the settings used for map matching, so they can be retrieved later for study

In [20]:
if (export_fp / 'matching_settings_df.pkl').exists():
    with (export_fp / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

[Leueven Documentation](https://github.com/wannesm/LeuvenMapMatching/blob/9ca9f0b73665252f2ee492fae9dd243feef2f39d/leuvenmapmatching/matcher/distance.py)

Attach matching index to the match dict instead

In [21]:
from importlib import reload
reload(map_match)

matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
    'non_emitting_states': False, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 55, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False
}

#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)

#check if there are existing matches, using these settings


#use this in the qaqc section to line up the ratings with the settings used
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

Settings have been used before


# Single Match Example


In [22]:
single_matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.001, #Minimum normalized probability of observations (ema)
    'non_emitting_states': True, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 50, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': False, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False
}


### Notes
- 33393 has issues with the starting match since it starts from the train station, only a really low min_prob_norm helps

In [23]:
import random
random_trip = random.choice(list(coords_dict.keys()))

In [24]:
random_trip = 33393

In [25]:
trace = coords_dict[random_trip]

In [26]:
# try removing points until a speed above 4mph is detected
trace = trace.loc[(trace['speed_mph'] > 4).idxmax():,:]

In [27]:
single_matches = {}
single_matches[random_trip] = map_match.leuven_match(trace,single_matching_settings,map_con,exploded_links)
single_matches

{33393: {'edges':        linkid  reverse_link
  0  1125635393         False,
  'last_matched': 3,
  'match_ratio': 0.022222222222222223,
  'max_lattice_width': 50,
  'trace':      sequence  tripid            datetime        lat        lon  speed_mph  \
  0           0   33393 2016-06-24 17:20:57  33.761314 -84.339436   8.032127   
  1           1   33393 2016-06-24 17:21:01  33.761309 -84.339261   9.999330   
  2           2   33393 2016-06-24 17:21:06  33.761357 -84.339023   9.530636   
  3           3   33393 2016-06-24 17:21:11  33.761350 -84.338771  10.693703   
  4           4   33393 2016-06-24 17:21:15  33.761321 -84.338569  10.851931   
  ..        ...     ...                 ...        ...        ...        ...   
  131       131   33393 2016-06-24 17:31:32  33.749136 -84.328601   0.000000   
  132       132   33393 2016-06-24 17:32:17  33.749124 -84.328600   0.000000   
  133       133   33393 2016-06-24 17:33:16  33.749074 -84.328608   0.000000   
  134       134   33393 201

In [28]:
#map_match.visualize_match(random_trip, single_matches, df_edges)

# Multi Match
Takes 3hr25min for 2,765 traces

In [29]:
# #load existing matches/if none then create a new dict
# if (export_fp/f'matched_{matching_index}.pkl').exists():
#     with (export_fp/f'matched_{matching_index}.pkl').open('rb') as fh:
#         match_dict = pickle.load(fh)
# else:
#     match_dict = {}

#     # with (export_fp/f'match_{matching_index}_{len(match_dict.keys())}_trips.pkl').open('wb') as fh:
# #     pickle.dump(match_dict,fh)

In [30]:
match_dict = {}

In [31]:
for tripid in tqdm(list(coords_dict.keys())):
    
    check = match_dict.get(tripid,False)

    if isinstance(check,bool):
        trace = coords_dict[tripid]
        match = map_match.leuven_match(trace,matching_settings,map_con,exploded_links)
        match_dict[tripid] = match


100%|██████████| 2765/2765 [2:01:20<00:00,  2.63s/it]  


Create dataframe with the ratio of points matched, the total gps distance, the total network distance, and mean match distance

In [32]:
import time
time.time()

1717288132.006298

In [36]:
export_fp

WindowsPath('D:/PROJECTS/GDOT/Map_Matching')

In [37]:
time.time()

1717288197.1580367

In [38]:
if export_fp.exists() == False:
    export_fp.mkdir(parents=True)

# export the matching settings tested
with (export_fp/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)

# export the matched traces
#TODO add date
with (export_fp/f'matched_{matching_index}.pkl').open('wb') as fh:
    pickle.dump(match_dict,fh)    

# Deprecated past here

In [None]:
# ## Print matching stats
# Outputs:
# - 'edges', 
# - 'last_matched', 
# - 'match_ratio', 
# - 'max_lattice_width', 
# - 'trace', 
# - 'match_lines', 
# - 'interpolated_points', 
# - 'match_time_sec', 
# - 'gps_distance', 
# - 'time', 
# - 'settings'
# matching_index = 0
# with (export_fp/f'matched_{matching_index}.pkl').open('rb') as fh:
#     match_dict = pickle.load(fh)    
# #how many failed matches
# failed = {key:item for key, item in match_dict.items() if isinstance(item,str)}
# print(len(failed.keys()),'failed matches')
# success = {key:item for key, item in match_dict.items() if isinstance(item,str) == False}
# length_dict = {row['linkid']:row['geometry'].length for idx, row in df_edges[['linkid','geometry']].drop_duplicates().iterrows()}
# results = [[key,item['match_ratio'],item['gps_distance'],item['edges']['linkid'].map(length_dict).sum(),item['match_lines']['length'].mean()] for key, item in success.items()]
# results = pd.DataFrame(results,columns=['tripid','match_ratio','gps_distance','network_distance','mean_match_distance'])
# results
# print((results['match_ratio']>0.9).sum(),'/',results.shape[0],'trips had a quality match')
# trips_df_export = trips_df.reset_index(drop=True).merge(results,on='tripid')
# with (export_fp/'matched_trips_df.pkl').open('wb') as fh:
#     pickle.dump(trips_df_export,fh)
# # Examine matches
# import random

# def get_random_key(dictionary):
#     random_key =  random.choice(list(dictionary.keys()))
#     #recursion?
#     if isinstance(dictionary.get(random_key),str):
#         random_key = get_random_key(dictionary)
#     return random_key
# results['match_ratio'].hist()
# (results['match_ratio']>0.8).sum()
# pool = results.loc[(results['match_ratio']>.9) & (results['match_ratio']<1),'tripid'].tolist()
# #tripid = get_random_key(match_dict)
# tripid = random.choice(pool)
# map_match.visualize_match(tripid, match_dict, df_edges)

# # Post Match Cleanup (in development)
# Some trips have out-and-backing and or take the wrong link if two nodes have more than one link between them. This step goes through and cleans these trips.

# For out and backing:
# Subset network graph to only the edges between origin and destination and then use Dijkstra's algorithim to return the shortest path. Check the Frechet distance to determine if cleaned match better represents trajectory than previously.
# trip_w_out_and_backing = 550
# map_match.visualize_match(trip_w_out_and_backing, match_dict, df_edges)

# match_dict[801]['edges']
# gpd.GeoDataFrame(match_dict[801]['edges'].merge(edges,on='linkid')).explore()
# #TODO use to make network graph, then subset with trip
# import networkx as nx

# MDG = nx.MultiDiGraph()  # Create a MultiDiGraph
#     #itertuples used to maintain the type
# for idx, row in exploded_edges.iterrows():
#     #edge_data = {linkid: row[2],'reverse_link': False, 'azimuth': row[4]}
#     MDG.add_edge(int(row['A']), int(row['B']), **{'weight': row['length_ft']})#**edge_data)  # Add edge with linkid attribute
#     #add reverse link if oneway is not true
#     MDG.add_edge(int(row['B']), int(row['A']), **{'weight': row['length_ft']})
#     # if row[3] == False:
#     #     edge_data['reverse_link'] = True 
#     #     #reverse the azimuth
#     #     edge_data['azimuth'] = row[5]
#     #     MDG.add_edge(row[1], row[0], **edge_data)

# #exploded_edges, exploded_nodes
# tripid = 801

# test = match_dict[801]['edges'].merge(edges,on='linkid')
# sub_nodes = test['A'].append(test['B']).unique().tolist()
# #get start and end linkid
# start = match_dict[tripid]['edges'].iloc[0,:]
# end = match_dict[tripid]['edges'].iloc[-1,:]

# #get start and end node
# start_a_b = edges.loc[edges['linkid']==start['linkid'],['A','B']]
# end_a_b = edges.loc[edges['linkid']==end['linkid'],['A','B']]

# if start['forward']:
#     start = start_a_b['A'].item()
# else:
#     start = start_a_b['B'].item()

# if end['forward']:
#     end = end_a_b['B'].item()
# else:
#     end = end_a_b['A'].item()
# sub_nodes[0]
# start
# start in sub_nodes
# end in sub_nodes
# path
# subgraph = MDG.subgraph(sub_nodes)
# length, path = nx.single_source_dijkstra(subgraph,start,end,weight='weight')

# #turn to edge list
# edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
# edge_df = pd.DataFrame(edge_list,columns=['A','B'])
# forward = pd.merge(edge_df,edges[['A','B','linkid','geometry']],on=['A','B'])#[['linkid','A','B','geometry']]
# forward
# reverse = pd.merge(edge_df,edges[['A','B','linkid','geometry']],left_on=['B','A'],right_on=['A','B'])[['linkid','A','B','geometry']]
# shortest_path = pd.concat([forward,reverse],ignore_index=True)
# shortest_path = shortest_path.loc[shortest_path.groupby(['A','B'])['length_ft'].idxmin()]
# gpd.GeoDataFrame(shortest_path).explore()
# #TODO deal with duplicate links
# shortest_path.explore()
# For multi-edges, buffer the 2+ edges and take the one that hits the most gps points
# import numpy as np
# exploded_edges['A_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,0]
# exploded_edges['B_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,1]
# duplicate_edges = exploded_edges.loc[exploded_edges[['A_sort','B_sort']].duplicated(keep=False),'linkid'].unique()
# gps_points = match_dict[tripid]['trace']
# # matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# # matched_trip = gpd.GeoDataFrame(matched_trip)
# # from shapely.ops import MultiLineString
# # buffered_geo = MultiLineString(matched_trip.geometry.tolist()).buffer(100)
# # match['trace'].intersects(buffered_geo).sum()
# # # export 
# # with (export_fp/'sample_matched.pkl').open('wb') as fh:
# #     pickle.dump(match_dict,fh)
# # with (export_fp/'sample_matched.pkl').open('rb') as fh:
# #     match_dict = pickle.load(fh)

# # Visualization

# import folium
# import geopandas as gpd
# from folium.plugins import MarkerCluster, PolyLineTextPath
# from folium.map import FeatureGroup

# #tripid = 29837#7257#9806#30000#8429

# # Your GeoDataFrames
# matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# matched_trip = gpd.GeoDataFrame(matched_trip)
# gps_points = match_dict[tripid]['trace']
# match_lines = match_dict[tripid]['match_lines']

# #get the start and end point for mapping
# start_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmin(),'geometry']
# end_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmax(),'geometry']

# # reproject and get the center of the map
# x_mean = gps_points.to_crs(epsg='4326')['geometry'].x.mean()
# y_mean = gps_points.to_crs(epsg='4326')['geometry'].y.mean()

# # Create a Folium map centered around the mean of the GPS points
# center = [y_mean,x_mean]
# mymap = folium.Map(location=center, zoom_start=14)

# # Convert GeoDataFrames to GeoJSON
# matched_trip_geojson = matched_trip[['linkid','geometry']].to_crs(epsg='4326').to_json()
# gps_points_geojson = gps_points[['sequence','geometry']].to_crs(epsg='4326').to_json()
# match_lines_geojson = match_lines[['sequence','match_lines']].to_crs(epsg='4326').to_json()

# # Create FeatureGroups for each GeoDataFrame
# matched_trip_fg = FeatureGroup(name='Matched Trip')
# gps_points_fg = FeatureGroup(name='GPS Points')
# match_lines_fg = FeatureGroup(name='Match Lines')

# # Add GeoJSON data to FeatureGroups
# folium.GeoJson(matched_trip_geojson, name='Matched Trip', style_function=lambda x: {'color': 'red'}).add_to(matched_trip_fg)

# # Add circles to the GPS Points FeatureGroup
# for idx, row in gps_points.iterrows():
#     folium.Circle(location=[row['lat'], row['lon']], radius=5, color='grey', fill=True, fill_color='grey').add_to(gps_points_fg)

# # Add GeoJSON data to Match Lines FeatureGroup with transparent and grey style
# folium.GeoJson(match_lines_geojson, name='Match Lines', style_function=lambda x: {'color': 'grey', 'opacity': 0.5}).add_to(match_lines_fg)

# # Add FeatureGroups to the map
# matched_trip_fg.add_to(mymap)
# gps_points_fg.add_to(mymap)
# match_lines_fg.add_to(mymap)

# # Add start and end points with play and stop buttons
# start_icon = folium.Icon(color='green',icon='play',prefix='fa')
# end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
# folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
# folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

# # Add layer control to toggle layers on/off
# folium.LayerControl().add_to(mymap)

# # Add legend with statistics
# #TODO what happened to duration
# legend_html = f'''
#     <div style="position: fixed; 
#             bottom: 5px; left: 5px; width: 300px; height: 250px; 
#             border:2px solid grey; z-index:9999; font-size:14px;
#             background-color: white;
#             opacity: 0.9;">
#     &nbsp; <b>Trip ID: {tripid} </b> <br>
#     &nbsp; <b> Match Date: {match_dict[tripid]['time']} </b> <br>
#     &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i>,
#     End Point &nbsp; <i class="fa fa-stop" style="color:red"></i> <br>
    
#     &nbsp; Matched Path &nbsp; <div style="width: 20px; height: 5px; background-color: red; display: inline-block;"></div> <br>
#     &nbsp; Match Lines Path &nbsp; <div style="width: 20px; height: 5px; background-color: gray; display: inline-block;"></div> <br>
 
#     &nbsp; Points Matched: {match_dict[tripid]['last_matched']}/{match_dict[tripid]['trace'].shape[0]} <br>
#     &nbsp; Match Ratio: {match_dict[tripid]['match_ratio']:.2f} <br>
#     &nbsp; GPS Distance: {match_dict[tripid]['gps_distance']:.1f} ft. <br>
#     &nbsp; Matched Trace Distance: {matched_trip.length.sum():.0f} ft. <br>
#     &nbsp; Mean Matching Distance: {match_dict[tripid]['match_lines'].length.mean():.0f} ft. 

#     </div>
#     '''
# mymap.get_root().html.add_child(folium.Element(legend_html))

# # Save the map to an HTML file or display it in a Jupyter notebook
# #mymap.save('map.html')
# # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
# mymap  # Uncomment if you are using Jupyter notebook

# #TODO add in the legend with trip info and then we're golden

# match_dict[tripid].keys()
# match_dict[tripid]['match_ratio']
# match_dict[tripid].keys()
# help(InMemMap)
# help(DistanceMatcher)
# :param map_con: Map object to connect to map database
#         :param obs_noise: Standard deviation of noise
#         :param obs_noise_ne: Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
#         :param max_dist_init: Maximum distance from start location (if not given, uses max_dist)
#         :param max_dist: Maximum distance from path (this is a hard cut, min_prob_norm should be better)
#         :param min_prob_norm: Minimum normalized probability of observations (ema)
#         :param non_emitting_states: Allow non-emitting states. A non-emitting state is a state that is
#             not associated with an observation. Here we assume it can be associated with a location in between
#             two observations to allow for pruning. It is advised to set min_prob_norm and/or max_dist to avoid
#             visiting all possible nodes in the graph.
#         :param non_emitting_length_factor: Reduce the probability of a sequence of non-emitting states the longer it
#             is. This can be used to prefer shorter paths. This is separate from the transition probabilities because
#             transition probabilities are averaged for non-emitting states and thus the length is also averaged out.
#         :param max_lattice_width: Restrict the lattice (or possible candidate states per observation) to this value.
#             If there are more possible next states, the states with the best likelihood so far are selected.

#         :param dist_noise: Standard deviation of difference between distance between states and distance
#             between observatoins. If not given, set to obs_noise
#         :param dist_noise_ne: If not given, set to dist_noise
#         :param restrained_ne: Avoid non-emitting states if the distance between states and between
#             observations is close to each other.
#         :param avoid_goingback: If true, the probability is lowered for a transition that returns back to a
#             previous edges or returns to a position on an edge.

# # #get list of coords
# # gps_trace = list(zip(trace.geometry.y,trace.geometry.x))

# # #perform matching
# # states, last_matched = matcher.match(gps_trace)
# # only_nodes = matcher.path_pred_onlynodes

# # print("States\n------")
# # print(states)
# # print("Nodes\n------")
# # print(only_nodes)
# # print("")
# # matcher.print_lattice_stats()
# # fig, ax = plt.subplots(1, 1)
# # mmviz.plot_map(map_con, matcher=matcher,
# #                ax=ax,
# #                show_labels=True, show_matching=True, show_graph=False,
# #                filename="my_plot.png")
# # test = matcher.lattice[4]
# # m = max(test.values_all(), key=lambda m: m.logprob) # for the 4th point get the one with the highest logprob

# # m.logprob
# # import numpy as np
# # t = {x.cname.split('_')[0] + '_' + x.cname.split('_')[1]: x.logprob for x in test.values_all()}
# # check = pd.DataFrame.from_dict(t,orient='index',columns=['logprob']).sort_values('logprob',ascending=False)
# # check
# # (check.index == '5424132517_7151205661').sum()
# # testing = trace.copy()
# # testing.geometry = testing.buffer(1000)
# # intersect = gpd.overlay(edges,testing)
# # intersect[(intersect['A_B'] == '5424132517_7151205661') & (intersect['sequence'] == 4)]

# # #reduce the states size with match_nodes
# # reduced_states = list(set(edges))

# # #calculate the match ratio
# # match_ratio = last_matched / (len(gps_trace)-1)
    
# # #retreive matched edges from network
# # geos_list = [geos_dict.get(id,0) for id in reduced_states]

# # #turn into geodataframe
# # matched_trip = gpd.GeoDataFrame(data={'A_B':reduced_states,'geometry':geos_list},geometry='geometry',crs='epsg:2240')

# # #turn tuple to str
# # matched_trip['A_B'] = matched_trip['A_B'].apply(lambda row: f'{row[0]}_{row[1]}')

# # #reset index to add an edge sequence column
# # matched_trip.reset_index().rename(columns={'index':'edge_sequence'},inplace=True)

# # trace['interpolated_point'] = pd.Series([ Point(x.edge_m.pi) for x in matcher.lattice_best ])
# # trace = trace.loc[0:last_matched]
# # trace['match_lines'] = trace.apply(lambda row: LineString([row['geometry'],row['interpolated_point']]),axis=1)

# # interpolated_points = trace[['sequence','interpolated_point']]
# # interpolated_points = gpd.GeoDataFrame(interpolated_points,geometry='interpolated_point')

# # match_lines = trace[['sequence','match_lines']]
# # match_lines = gpd.GeoDataFrame(match_lines,geometry='match_lines')
# # match_lines['length'] = match_lines.length


# # interpolated_points.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='interpolated_points')
# # match_lines.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='match_lines')

# # #%%









