# Map Matching

## TODO (in order of priority):
1. Make sure that the cleanup code works (come back to this)
    1. Want to get rid of the really small out and backs
    1. Have method for doing this, but worried that it will remove valid out and backing
    1. Frechet distance could be a way to tell if the cleaning removes too much out and backing? Or some sort of overlap metric?
1. Use geo_dict to assemble edge for calculated frechet distance
    1. Frechet distance will give a decent measure of how close the match is to the trace
    1. Will need to reverse link geometry if passing the other way otherwise it won't work
1. Match all the traces and export for impedance calibration
    1. The process flow into this step is still uncertain


In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
#from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path

from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

from bikewaysim.paths import config
from bikewaysim import map_match
from bikewaysim.network import prepare_network

# Prepare Network
Bring in the pre-processed network and filter as needed to remove links that are unlikely to be used for routing.

### Break Apart Multi-Edges
Occasionally, there will be multiple edges between two nodes. A common place for this to occur are local roads connected to main roads that form u-shapes. Leuven map mapmatching cannot handle multi-edges as the only information stored in the network graph are the nodes. Usually, it's obvious which edge should be retained, but an easy way to still include both edges for map matching is to break these edges by thier centroid into smaller edges with new nodes. This is done by ``explode_network_midpoint`` in the ``prepare_mapmatch_network`` function. The centroid nodes created in this step are given a unique id. The map matching function removes these new nodes so that they don't appear in the results.

### Iterative Matching Approach
Sometimes matching can be improved just by limiting what can be matched to. For this, we want to start by mapping to the most restrictive network. Then we add more links if a match hasn't been found.

In [None]:
links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')

## Decide which link types to allow for routing
Suggest starting with the most restrictive network. Just bike, pedestrian, and road links. These do not need to match up to the calibration network. Anything included in the matching network that actually gets routed on will be included in calibration network automatically.

In [None]:
# remove infra before 2016 so it doesn't match to these
after = links['facility'].isin(['cycletrack','multi use path']) & \
          (links['link_type']!='road') & \
          links['year'].notna() & \
          (links['year']>2016)
# links[after].drop(columns=['all_tags']).explore()
links = links[after==False]

In [None]:
#only allow roads + pedestrian + bike
link_types_allowed = ['bike','pedestrian','road','service','connector']
print(links['link_type'].unique())

#TODO add the ability to go the wrongway on residential streets ONLY
allow_wrongway_on = ['residential','living_street']
links.loc[links['highway'].isin(allow_wrongway_on),'oneway'] = False

In [None]:
exploded_links, exploded_nodes, map_con = map_match.prepare_mapmatch_network(links,nodes,link_types_allowed,False)

In [None]:
# # OPTIONAL inspect exploded network
# exploded_nodes.to_file(config['network_fp']/'matching_network.gpkg',layer='exploded_nodes')
# exploded_links.to_file(config['network_fp']/'matching_network.gpkg',layer='exploded_links')

# Trace Data
For map matching, we're using GPS traces that have been processed so that each point is spaced a certain distance apart, coordinates in between this distance are dropped to improve computation time. 

In [None]:
#load all traces
with (config['cycleatl_fp']/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

# import trips that we want to match
with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)

# subset the coords dict by just the trips we're trying to match
coords_dict = {key:item for key, item in coords_dict.items() if key in trips_df['tripid'].tolist()}
# drop speed_mph below zero if that hasn't been done
coords_dict = {key:item[item['speed_mph']>1] for key, item in coords_dict.items() if item[item['speed_mph']>1].shape[0] > 0}

print('Map matching',len(coords_dict.keys()),'trips')

In [None]:
#run the privacy filter 
def privacy_distance(df,privacy_dist=500):
    first_point = df['geometry'].iloc[0].buffer(privacy_dist)
    last_point = df['geometry'].iloc[-1].buffer(privacy_dist)
    double_buffer = df['geometry'].iloc[0].buffer(privacy_dist*2)
    if df['geometry'].intersects(double_buffer).all():
        return
    else:
        first_cut = df['geometry'].intersects(first_point).idxmin() # find the first point where it's false
        last_cut = df['geometry'].intersects(last_point).idxmax() - 1
        if df.loc[first_cut:last_cut,:].shape[0] == 0:
            return
        else:
            return df.loc[first_cut:last_cut,:]
coords_dict = {key:privacy_distance(item) for key, item in coords_dict.items()}

# Single Match Example


In [None]:
single_matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1500, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.001, #Minimum normalized probability of observations (ema)
    'non_emitting_states': True, #Allow non-emitting states
    'non_emitting_length_factor': 0.001, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 20, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': False, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': False, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False
}
from importlib import reload
reload(map_match)
import random
random_trip = random.choice(list(coords_dict.keys()))
# random_trip = 3079
trace = coords_dict[random_trip]
# trace = trace[trace['speed_mph']>1]
single_matches = {}
single_matches[random_trip] = map_match.leuven_match(trace,single_matching_settings,map_con,exploded_links)
if isinstance(single_matches,str) == False:
    m = map_match.visualize_match(random_trip, single_matches, links, config)
print(trips_df.loc[random_trip,['trip_type','description','avg_speed_mph']])
m

# Map Matching Settings
Check the [Leueven Documentation](https://github.com/wannesm/LeuvenMapMatching/blob/9ca9f0b73665252f2ee492fae9dd243feef2f39d/leuvenmapmatching/matcher/distance.py) for more information.

In [None]:
from importlib import reload
reload(map_match)

# The matching setting dictionary stores all of the settings used for map matching, so they can be retrieved later for study
if (config['matching_fp'] / 'matching_settings_df.pkl').exists():
    with (config['matching_fp'] / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
    'non_emitting_states': True, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 20, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False,
    'link_types': str(np.sort(link_types_allowed)),
    'allow_wrongway': False
}
#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)

#check if there are existing matches, using these settings

#use this in the qaqc section to line up the ratings with the settings used
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

In [None]:
match_dict = {tripid:map_match.leuven_match(trace,matching_settings,map_con,exploded_links) for tripid, trace in tqdm(coords_dict.items(),total=len(coords_dict))}
#553 at 2hr:10min

Create dataframe with the ratio of points matched, the total gps distance, the total network distance, and mean match distance

In [None]:
if config['matching_fp'].exists() == False:
    config['matching_fp'].mkdir(parents=True)

# export the matching settings tested
with (config['matching_fp']/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)

# export the matched traces
#TODO add date
print('saving as',f'matched_{matching_index}.pkl')
with (config['matching_fp']/f'matched_{matching_index}.pkl').open('wb') as fh:
    pickle.dump(match_dict,fh)    

# Round 2, add more links

In [None]:
from importlib import reload
reload(map_match)

# load matches remove bad matches
index = 2
matching_index = 4

# export 
with (config['matching_fp']/f'matched_{matching_index}.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

#load all traces
with (config['cycleatl_fp']/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)
coords_dict = {key:item for key, item in coords_dict.items() if key in trips_df['tripid'].tolist()}

# get bad matches
cutoff = 0.9
success, failed_matches, substandard_matches, match_ratios = map_match.mapmatch_results(match_dict,cutoff)

# subset the coords dict
coords_dict = {key:item for key,item in coords_dict.items() if key in failed_matches+substandard_matches}
print(len(coords_dict),'to match')

In [None]:
links = gpd.read_file(config['network_fp']/'networks.gpkg',layer='osm_links')
nodes = gpd.read_file(config['network_fp']/'networks.gpkg',layer='osm_nodes')
links.rename(columns={'osm_linkid':'linkid','osm_A':'A','osm_B':'B'},inplace=True)
nodes.rename(columns={'osm_N':'N'},inplace=True)

In [None]:
print(links['link_type'].unique())
link_types_allowed = ['bike','pedestrian','road','service']

In [None]:
exploded_links, exploded_nodes, map_con = map_match.prepare_mapmatch_network(links,nodes,link_types_allowed,False)

- Try increasing the lattice width adn the obs_noise

In [None]:
if (config['matching_fp'] / 'matching_settings_df.pkl').exists():
    with (config['matching_fp'] / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

matching_settings = {
    'obs_noise': 200, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
    'non_emitting_states': True, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 50, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False,
    'link_types': str(np.sort(link_types_allowed)),
    'allow_wrongway': False
}

#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)

#check if there are existing matches, using these settings

#use this in the qaqc section to line up the ratings with the settings used
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

In [None]:
match_dict = {tripid:map_match.leuven_match(trace,matching_settings,map_con,exploded_links) for tripid, trace in tqdm(coords_dict.items(),total=len(coords_dict))}

In [None]:
# get bad matches
reload(map_match)
cutoff = 0.9
success_matches, substandard_matches, failed_matches, match_ratios = map_match.mapmatch_results(match_dict,cutoff)

## Remaining Issues
- on the beltline trip 262 starts midway and because it's not near any nodes it appears to just fail outright
- should i try breaking apart certain links to have more nodes?
- trip 17722 still starts out in a parking lot need the parking lot links for that or start removing points from the origin until a match is made
- for trip 12479, mdconough blvd is under construction now but it wasn't back then. for the network need to add back in the few links under construction
- not sure why 10126 isn't matching but 


In [None]:
import random
from importlib import reload
reload(map_match)
tripid = random.choice(failed_matches)
# tripid = 262
print(tripid)
map_match.visualize_failed_match(tripid, coords_dict, links, config)

In [None]:
#try re-matching
trace = coords_dict[tripid]
matching_settings['max_lattice_width'] = 50
matching_settings['dist_noise'] = 200
match = map_match.leuven_match(trace,matching_settings,map_con,exploded_links)

In [None]:
match.keys()

In [None]:
match_dict[tripid] = match

In [None]:
from importlib import reload
reload(map_match)
html_map = map_match.visualize_match(tripid, match_dict,links,config)
html_map

In [None]:
if config['matching_fp'].exists() == False:
    config['matching_fp'].mkdir(parents=True)

# export the matching settings tested
with (config['matching_fp']/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)

# export the matched traces
#TODO add date for second round
with (config['matching_fp']/f'matched_{matching_index}_2.pkl').open('wb') as fh:
    pickle.dump(match_dict,fh)    

# Deprecated past here





Tmw:
- Create functions to reduce clutter
- figure out why there are fewer trips now
- examine some of the new matches
- have a crossing one
- add a wrongway one 
- need to conflate bike paths with road

In [None]:
# ## Print matching stats
# Outputs:
# - 'edges', 
# - 'last_matched', 
# - 'match_ratio', 
# - 'max_lattice_width', 
# - 'trace', 
# - 'match_lines', 
# - 'interpolated_points', 
# - 'match_time_sec', 
# - 'gps_distance', 
# - 'time', 
# - 'settings'
# matching_index = 0
# with (export_fp/f'matched_{matching_index}.pkl').open('rb') as fh:
#     match_dict = pickle.load(fh)    
# #how many failed matches
# failed = {key:item for key, item in match_dict.items() if isinstance(item,str)}
# print(len(failed.keys()),'failed matches')
# success = {key:item for key, item in match_dict.items() if isinstance(item,str) == False}
# length_dict = {row['linkid']:row['geometry'].length for idx, row in df_edges[['linkid','geometry']].drop_duplicates().iterrows()}
# results = [[key,item['match_ratio'],item['gps_distance'],item['edges']['linkid'].map(length_dict).sum(),item['match_lines']['length'].mean()] for key, item in success.items()]
# results = pd.DataFrame(results,columns=['tripid','match_ratio','gps_distance','network_distance','mean_match_distance'])
# results
# print((results['match_ratio']>0.9).sum(),'/',results.shape[0],'trips had a quality match')
# trips_df_export = trips_df.reset_index(drop=True).merge(results,on='tripid')
# with (export_fp/'matched_trips_df.pkl').open('wb') as fh:
#     pickle.dump(trips_df_export,fh)
# # Examine matches
# import random

# def get_random_key(dictionary):
#     random_key =  random.choice(list(dictionary.keys()))
#     #recursion?
#     if isinstance(dictionary.get(random_key),str):
#         random_key = get_random_key(dictionary)
#     return random_key
# results['match_ratio'].hist()
# (results['match_ratio']>0.8).sum()
# pool = results.loc[(results['match_ratio']>.9) & (results['match_ratio']<1),'tripid'].tolist()
# #tripid = get_random_key(match_dict)
# tripid = random.choice(pool)
# map_match.visualize_match(tripid, match_dict, df_edges)

# # Post Match Cleanup (in development)
# Some trips have out-and-backing and or take the wrong link if two nodes have more than one link between them. This step goes through and cleans these trips.

# For out and backing:
# Subset network graph to only the edges between origin and destination and then use Dijkstra's algorithim to return the shortest path. Check the Frechet distance to determine if cleaned match better represents trajectory than previously.
# trip_w_out_and_backing = 550
# map_match.visualize_match(trip_w_out_and_backing, match_dict, df_edges)

# match_dict[801]['edges']
# gpd.GeoDataFrame(match_dict[801]['edges'].merge(edges,on='linkid')).explore()
# #TODO use to make network graph, then subset with trip
# import networkx as nx

# MDG = nx.MultiDiGraph()  # Create a MultiDiGraph
#     #itertuples used to maintain the type
# for idx, row in exploded_edges.iterrows():
#     #edge_data = {linkid: row[2],'reverse_link': False, 'azimuth': row[4]}
#     MDG.add_edge(int(row['A']), int(row['B']), **{'weight': row['length_ft']})#**edge_data)  # Add edge with linkid attribute
#     #add reverse link if oneway is not true
#     MDG.add_edge(int(row['B']), int(row['A']), **{'weight': row['length_ft']})
#     # if row[3] == False:
#     #     edge_data['reverse_link'] = True 
#     #     #reverse the azimuth
#     #     edge_data['azimuth'] = row[5]
#     #     MDG.add_edge(row[1], row[0], **edge_data)

# #exploded_edges, exploded_nodes
# tripid = 801

# test = match_dict[801]['edges'].merge(edges,on='linkid')
# sub_nodes = test['A'].append(test['B']).unique().tolist()
# #get start and end linkid
# start = match_dict[tripid]['edges'].iloc[0,:]
# end = match_dict[tripid]['edges'].iloc[-1,:]

# #get start and end node
# start_a_b = edges.loc[edges['linkid']==start['linkid'],['A','B']]
# end_a_b = edges.loc[edges['linkid']==end['linkid'],['A','B']]

# if start['forward']:
#     start = start_a_b['A'].item()
# else:
#     start = start_a_b['B'].item()

# if end['forward']:
#     end = end_a_b['B'].item()
# else:
#     end = end_a_b['A'].item()
# sub_nodes[0]
# start
# start in sub_nodes
# end in sub_nodes
# path
# subgraph = MDG.subgraph(sub_nodes)
# length, path = nx.single_source_dijkstra(subgraph,start,end,weight='weight')

# #turn to edge list
# edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
# edge_df = pd.DataFrame(edge_list,columns=['A','B'])
# forward = pd.merge(edge_df,edges[['A','B','linkid','geometry']],on=['A','B'])#[['linkid','A','B','geometry']]
# forward
# reverse = pd.merge(edge_df,edges[['A','B','linkid','geometry']],left_on=['B','A'],right_on=['A','B'])[['linkid','A','B','geometry']]
# shortest_path = pd.concat([forward,reverse],ignore_index=True)
# shortest_path = shortest_path.loc[shortest_path.groupby(['A','B'])['length_ft'].idxmin()]
# gpd.GeoDataFrame(shortest_path).explore()
# #TODO deal with duplicate links
# shortest_path.explore()
# For multi-edges, buffer the 2+ edges and take the one that hits the most gps points
# import numpy as np
# exploded_edges['A_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,0]
# exploded_edges['B_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,1]
# duplicate_edges = exploded_edges.loc[exploded_edges[['A_sort','B_sort']].duplicated(keep=False),'linkid'].unique()
# gps_points = match_dict[tripid]['trace']
# # matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# # matched_trip = gpd.GeoDataFrame(matched_trip)
# # from shapely.ops import MultiLineString
# # buffered_geo = MultiLineString(matched_trip.geometry.tolist()).buffer(100)
# # match['trace'].intersects(buffered_geo).sum()
# # # export 
# # with (export_fp/'sample_matched.pkl').open('wb') as fh:
# #     pickle.dump(match_dict,fh)
# # with (export_fp/'sample_matched.pkl').open('rb') as fh:
# #     match_dict = pickle.load(fh)

# # Visualization

# import folium
# import geopandas as gpd
# from folium.plugins import MarkerCluster, PolyLineTextPath
# from folium.map import FeatureGroup

# #tripid = 29837#7257#9806#30000#8429

# # Your GeoDataFrames
# matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# matched_trip = gpd.GeoDataFrame(matched_trip)
# gps_points = match_dict[tripid]['trace']
# match_lines = match_dict[tripid]['match_lines']

# #get the start and end point for mapping
# start_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmin(),'geometry']
# end_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmax(),'geometry']

# # reproject and get the center of the map
# x_mean = gps_points.to_crs(epsg='4326')['geometry'].x.mean()
# y_mean = gps_points.to_crs(epsg='4326')['geometry'].y.mean()

# # Create a Folium map centered around the mean of the GPS points
# center = [y_mean,x_mean]
# mymap = folium.Map(location=center, zoom_start=14)

# # Convert GeoDataFrames to GeoJSON
# matched_trip_geojson = matched_trip[['linkid','geometry']].to_crs(epsg='4326').to_json()
# gps_points_geojson = gps_points[['sequence','geometry']].to_crs(epsg='4326').to_json()
# match_lines_geojson = match_lines[['sequence','match_lines']].to_crs(epsg='4326').to_json()

# # Create FeatureGroups for each GeoDataFrame
# matched_trip_fg = FeatureGroup(name='Matched Trip')
# gps_points_fg = FeatureGroup(name='GPS Points')
# match_lines_fg = FeatureGroup(name='Match Lines')

# # Add GeoJSON data to FeatureGroups
# folium.GeoJson(matched_trip_geojson, name='Matched Trip', style_function=lambda x: {'color': 'red'}).add_to(matched_trip_fg)

# # Add circles to the GPS Points FeatureGroup
# for idx, row in gps_points.iterrows():
#     folium.Circle(location=[row['lat'], row['lon']], radius=5, color='grey', fill=True, fill_color='grey').add_to(gps_points_fg)

# # Add GeoJSON data to Match Lines FeatureGroup with transparent and grey style
# folium.GeoJson(match_lines_geojson, name='Match Lines', style_function=lambda x: {'color': 'grey', 'opacity': 0.5}).add_to(match_lines_fg)

# # Add FeatureGroups to the map
# matched_trip_fg.add_to(mymap)
# gps_points_fg.add_to(mymap)
# match_lines_fg.add_to(mymap)

# # Add start and end points with play and stop buttons
# start_icon = folium.Icon(color='green',icon='play',prefix='fa')
# end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
# folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
# folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

# # Add layer control to toggle layers on/off
# folium.LayerControl().add_to(mymap)

# # Add legend with statistics
# #TODO what happened to duration
# legend_html = f'''
#     <div style="position: fixed; 
#             bottom: 5px; left: 5px; width: 300px; height: 250px; 
#             border:2px solid grey; z-index:9999; font-size:14px;
#             background-color: white;
#             opacity: 0.9;">
#     &nbsp; <b>Trip ID: {tripid} </b> <br>
#     &nbsp; <b> Match Date: {match_dict[tripid]['time']} </b> <br>
#     &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i>,
#     End Point &nbsp; <i class="fa fa-stop" style="color:red"></i> <br>
    
#     &nbsp; Matched Path &nbsp; <div style="width: 20px; height: 5px; background-color: red; display: inline-block;"></div> <br>
#     &nbsp; Match Lines Path &nbsp; <div style="width: 20px; height: 5px; background-color: gray; display: inline-block;"></div> <br>
 
#     &nbsp; Points Matched: {match_dict[tripid]['last_matched']}/{match_dict[tripid]['trace'].shape[0]} <br>
#     &nbsp; Match Ratio: {match_dict[tripid]['match_ratio']:.2f} <br>
#     &nbsp; GPS Distance: {match_dict[tripid]['gps_distance']:.1f} ft. <br>
#     &nbsp; Matched Trace Distance: {matched_trip.length.sum():.0f} ft. <br>
#     &nbsp; Mean Matching Distance: {match_dict[tripid]['match_lines'].length.mean():.0f} ft. 

#     </div>
#     '''
# mymap.get_root().html.add_child(folium.Element(legend_html))

# # Save the map to an HTML file or display it in a Jupyter notebook
# #mymap.save('map.html')
# # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
# mymap  # Uncomment if you are using Jupyter notebook

# #TODO add in the legend with trip info and then we're golden

# match_dict[tripid].keys()
# match_dict[tripid]['match_ratio']
# match_dict[tripid].keys()
# help(InMemMap)
# help(DistanceMatcher)
# :param map_con: Map object to connect to map database
#         :param obs_noise: Standard deviation of noise
#         :param obs_noise_ne: Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
#         :param max_dist_init: Maximum distance from start location (if not given, uses max_dist)
#         :param max_dist: Maximum distance from path (this is a hard cut, min_prob_norm should be better)
#         :param min_prob_norm: Minimum normalized probability of observations (ema)
#         :param non_emitting_states: Allow non-emitting states. A non-emitting state is a state that is
#             not associated with an observation. Here we assume it can be associated with a location in between
#             two observations to allow for pruning. It is advised to set min_prob_norm and/or max_dist to avoid
#             visiting all possible nodes in the graph.
#         :param non_emitting_length_factor: Reduce the probability of a sequence of non-emitting states the longer it
#             is. This can be used to prefer shorter paths. This is separate from the transition probabilities because
#             transition probabilities are averaged for non-emitting states and thus the length is also averaged out.
#         :param max_lattice_width: Restrict the lattice (or possible candidate states per observation) to this value.
#             If there are more possible next states, the states with the best likelihood so far are selected.

#         :param dist_noise: Standard deviation of difference between distance between states and distance
#             between observatoins. If not given, set to obs_noise
#         :param dist_noise_ne: If not given, set to dist_noise
#         :param restrained_ne: Avoid non-emitting states if the distance between states and between
#             observations is close to each other.
#         :param avoid_goingback: If true, the probability is lowered for a transition that returns back to a
#             previous edges or returns to a position on an edge.

# # #get list of coords
# # gps_trace = list(zip(trace.geometry.y,trace.geometry.x))

# # #perform matching
# # states, last_matched = matcher.match(gps_trace)
# # only_nodes = matcher.path_pred_onlynodes

# # print("States\n------")
# # print(states)
# # print("Nodes\n------")
# # print(only_nodes)
# # print("")
# # matcher.print_lattice_stats()
# # fig, ax = plt.subplots(1, 1)
# # mmviz.plot_map(map_con, matcher=matcher,
# #                ax=ax,
# #                show_labels=True, show_matching=True, show_graph=False,
# #                filename="my_plot.png")
# # test = matcher.lattice[4]
# # m = max(test.values_all(), key=lambda m: m.logprob) # for the 4th point get the one with the highest logprob

# # m.logprob
# # import numpy as np
# # t = {x.cname.split('_')[0] + '_' + x.cname.split('_')[1]: x.logprob for x in test.values_all()}
# # check = pd.DataFrame.from_dict(t,orient='index',columns=['logprob']).sort_values('logprob',ascending=False)
# # check
# # (check.index == '5424132517_7151205661').sum()
# # testing = trace.copy()
# # testing.geometry = testing.buffer(1000)
# # intersect = gpd.overlay(edges,testing)
# # intersect[(intersect['A_B'] == '5424132517_7151205661') & (intersect['sequence'] == 4)]

# # #reduce the states size with match_nodes
# # reduced_states = list(set(edges))

# # #calculate the match ratio
# # match_ratio = last_matched / (len(gps_trace)-1)
    
# # #retreive matched edges from network
# # geos_list = [geos_dict.get(id,0) for id in reduced_states]

# # #turn into geodataframe
# # matched_trip = gpd.GeoDataFrame(data={'A_B':reduced_states,'geometry':geos_list},geometry='geometry',crs='epsg:2240')

# # #turn tuple to str
# # matched_trip['A_B'] = matched_trip['A_B'].apply(lambda row: f'{row[0]}_{row[1]}')

# # #reset index to add an edge sequence column
# # matched_trip.reset_index().rename(columns={'index':'edge_sequence'},inplace=True)

# # trace['interpolated_point'] = pd.Series([ Point(x.edge_m.pi) for x in matcher.lattice_best ])
# # trace = trace.loc[0:last_matched]
# # trace['match_lines'] = trace.apply(lambda row: LineString([row['geometry'],row['interpolated_point']]),axis=1)

# # interpolated_points = trace[['sequence','interpolated_point']]
# # interpolated_points = gpd.GeoDataFrame(interpolated_points,geometry='interpolated_point')

# # match_lines = trace[['sequence','match_lines']]
# # match_lines = gpd.GeoDataFrame(match_lines,geometry='match_lines')
# # match_lines['length'] = match_lines.length


# # interpolated_points.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='interpolated_points')
# # match_lines.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='match_lines')

# # #%%









