# Iterative Map Matching

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path
from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

import map_match

## Set up the filepaths

In [2]:
#file paths
network_fp = Path.home() / "Documents/BikewaySimData/Projects/gdot/networks"
traces_fp = Path.home() / 'Documents/BikewaySimData/Projects/gdot/gps_traces'
export_fp = Path('D:/matched_traces')

# Prepare Network

In [3]:
with (network_fp.parent / 'chosen.pkl').open('rb') as fh:
    df_edges,pseudo_df,pseudo_G = pickle.load(fh)

In [4]:
#add teh geometry 
fp = Path.home() / "Documents/BikewaySimData/Projects/gdot"
edges = gpd.read_file(fp/'networks/elevation_added.gpkg',layer="links")
edges.to_crs('epsg:2240',inplace=True)
df_edges = df_edges.merge(edges[['linkid','geometry']],on=['linkid'])
df_edges = gpd.GeoDataFrame(df_edges,geometry='geometry',crs=edges.crs)
df_edges = df_edges.loc[:,~df_edges.columns.duplicated()].copy()
df_edges.reset_index(drop=True,inplace=True)

In [5]:
# #import network
# edges = gpd.read_file(network_fp/'final_network.gpkg',layer="final_network")
# #edges.reset_index(inplace=True)
# #edges.rename(columns={'index':'linkid'},inplace=True)
nodes = gpd.read_file(Path.home()/'Documents/BikewaySimData/projects/gdot/networks/reconciled.gpkg',layer="nodes")[['N','geometry']]
nodes = nodes[nodes['N'].isin(set(df_edges['source'].append(df_edges['target']).tolist()))]

#TODO next features
# need to add the ability to remove isolates when doing this
# allow_wrongway = 
# link_types_to_include = ['road','bike','pedestrian]


In [6]:
#get max ids
max_nodeid = nodes['N'].max()
max_linkid = df_edges['linkid'].max()

In [7]:
# identify multi-edges so that we can break them apart (we already have duplicate edges)
# a multi edge will occur when there are two different linkids between the same nodes
# need to identify which linkids and then subset the data accordingly
# df_edges[['source_sort','target_sort']] = pd.DataFrame(np.sort(df_edges[['source','target']]))

# #identifies which node pairs have multiedges
# multi_edges = df_edges.groupby(['source_sort','target_sort'])['linkid'].nunique() > 1
# multi_edges = multi_edges[multi_edges]
# #subset data to get the multi-edges
df_sorted = df_edges.sort_values(by=['source','target'])
grouped_df = df_sorted.groupby(['source','target'])['linkid'].nunique().reset_index(name='num_linkid')
grouped_df = grouped_df[grouped_df['num_linkid']>1]
merged = pd.merge(df_sorted,grouped_df,on=['source','target'])
multi_edges = df_edges[df_edges['linkid'].isin(set(merged['linkid'].tolist()))]
#remove these multi-edges from the dataframe
non_multi_edges = df_edges[~df_edges['linkid'].isin(set(merged['linkid'].tolist()))]

In [8]:
new_links, new_nodes = map_match.explode_network_midpoint(multi_edges,max_nodeid,max_linkid)
exploded_nodes = pd.concat([nodes,new_nodes],ignore_index=True)
exploded_links = pd.concat([non_multi_edges,new_links],ignore_index=True)

map_con = map_match.make_network(exploded_links,exploded_nodes,False)

# Trace Data

In [9]:
#load all traces
with (traces_fp/'cleaned_traces.pkl').open('rb') as fh:
    coords_dict, trips_df = pickle.load(fh)

#one trace
tripids = [29837,7257,9806,30000,8429,10601]

subset = trips_df[(trips_df['total_distance_ft'] > 5280) & (trips_df['total_distance_ft'] < 5280*5)]
random_trips = subset['tripid'].sample(200).tolist()
random_trips = list(set(random_trips + tripids))

#random_trips = trips_df.loc[trips_df['tripid'].isin(tripids),'tripid'].tolist()

# #load existing matches/if none then create a new dict
# if (export_fp/'sample_matched.pkl').exists():
#     with (export_fp/'sample_matched.pkl').open('rb') as fh:
#         match_dict = pickle.load(fh)
# else:
#     matched_traces = dict()


The matching setting dictionary stores all of the settings used for map matching, so they can be retrieved later for study

In [10]:
if (export_fp / 'matching_settings_df.pkl').exists():
    with (export_fp / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

[Leueven Documentation](https://github.com/wannesm/LeuvenMapMatching/blob/9ca9f0b73665252f2ee492fae9dd243feef2f39d/leuvenmapmatching/matcher/distance.py)

In [11]:
from importlib import reload
reload(map_match)

matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
    'non_emitting_states': False, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 55, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False
}

#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)

#use this in the qaqc section to line up the ratings with the settings used
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

Settings have been used before


# Single Match


In [19]:
trace

Unnamed: 0,sequence,tripid,datetime,lat,lon,hAccuracy_m,speed_mph,hAccuracy_ft,geometry,acceleration_ft/s**2,delta_time,delta_distance_ft,traversed_distance_ft,time_elapsed,calculated_speed_mph,interpolated_point
0,0,2499,2012-11-20 18:15:40,33.774591,-84.394744,8.0,6.444365,26.24672,POINT (2227277.857 1373118.309),,NaT,,,0 days 00:00:00,,POINT (2227289.3090049876 1373102.336815314)
1,1,2499,2012-11-20 18:15:43,33.774478,-84.394870,4.0,6.329287,13.12336,POINT (2227239.478 1373077.277),0.000000,0 days 00:00:01,11.829496,57.104106,0 days 00:00:03,6.223430e-07,POINT (2227244.5307499147 1373070.2298760512)
2,2,2499,2012-11-20 18:15:48,33.774338,-84.394901,4.0,7.940378,13.12336,POINT (2227229.945 1373026.356),-0.345234,0 days 00:00:01,11.785720,108.934578,0 days 00:00:08,6.200400e-07,POINT (2227234.970590103 1373026.0232248616)
3,3,2499,2012-11-20 18:15:53,33.774194,-84.394908,4.0,6.559443,13.12336,POINT (2227227.702 1372973.964),-0.690468,0 days 00:00:01,6.973375,161.426100,0 days 00:00:13,3.668652e-07,POINT (2227231.506581363 1372973.7116090218)
4,4,2499,2012-11-20 18:16:02,33.774063,-84.394866,4.0,4.142806,13.12336,POINT (2227240.359 1372926.268),-0.575390,0 days 00:00:04,18.320481,218.897938,0 days 00:00:22,2.409575e-07,POINT (2227228.417313168 1372927.0591253927)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
328,328,2499,2012-11-20 18:41:03,33.791500,-84.348499,2.0,12.313340,6.56168,POINT (2241341.114 1379242.987),-0.460311,0 days 00:00:01,18.342509,21653.287928,0 days 00:25:23,9.649889e-07,POINT (2241352.0290837586 1379267.1088956809)
329,329,2499,2012-11-20 18:41:07,33.791454,-84.348295,3.0,10.126859,9.84252,POINT (2241403.062 1379226.139),-1.150779,0 days 00:00:02,29.678348,21717.508086,0 days 00:25:27,7.806805e-07,POINT (2241405.665422315 1379244.0780363928)
330,330,2499,2012-11-20 18:41:11,33.791427,-84.348114,3.0,7.825300,9.84252,POINT (2241458.034 1379216.218),-1.496013,0 days 00:00:01,11.526538,21773.521609,0 days 00:25:31,6.064046e-07,POINT (2241456.3829110367 1379242.6057744024)
331,331,2499,2012-11-20 18:41:17,33.791415,-84.347937,3.0,5.063430,9.84252,POINT (2241511.800 1379211.756),-0.920623,0 days 00:00:02,17.260545,21828.256941,0 days 00:25:37,4.540337e-07,POINT (2241509.661705841 1379245.939036235)


In [17]:
tripid = 2499

trace = coords_dict[tripid]
single_matches = {}
single_matches[tripid] = map_match.leuven_match(trace,matching_settings,map_con,exploded_links)

In [26]:
test_trace['speed_mph'].max()

22.785432780735505

In [25]:
test_trace = single_matches[2499]['trace']

from branca.colormap import linear

colormap = linear.YlGn_09.scale(
    test_trace['speed_mph'].min(), test_trace['speed_mph'].max()
)

print(colormap(70))
colormap

#004529ff


In [38]:
linear._colormaps

{'viridis': <branca.colormap.LinearColormap at 0x2a38ebb9f10>,
 'Pastel1_03': <branca.colormap.LinearColormap at 0x2a38ebb9e20>,
 'Pastel1_05': <branca.colormap.LinearColormap at 0x2a38ebb9df0>,
 'Pastel1_04': <branca.colormap.LinearColormap at 0x2a38ebb9dc0>,
 'Pastel1_07': <branca.colormap.LinearColormap at 0x2a38ebb9d60>,
 'YlOrRd_04': <branca.colormap.LinearColormap at 0x2a38ebb9d30>,
 'Pastel1_09': <branca.colormap.LinearColormap at 0x2a38ebb9d90>,
 'Pastel1_08': <branca.colormap.LinearColormap at 0x2a38ebb9cd0>,
 'Spectral_07': <branca.colormap.LinearColormap at 0x2a38ebb9d00>,
 'RdYlBu_05': <branca.colormap.LinearColormap at 0x2a38ebb9c70>,
 'PuBuGn_03': <branca.colormap.LinearColormap at 0x2a38ebb9ca0>,
 'Set1_08': <branca.colormap.LinearColormap at 0x2a38ebb9c10>,
 'PuBuGn_05': <branca.colormap.LinearColormap at 0x2a38ebb9be0>,
 'PuBuGn_04': <branca.colormap.LinearColormap at 0x2a38ebb9bb0>,
 'PuBuGn_07': <branca.colormap.LinearColormap at 0x2a38ebb9c40>,
 'PuBuGn_06': <branca

In [43]:
from importlib import reload
reload(map_match)
#TODO this is broken now and i'm not sure why, the linkid is really off
#TODO add velocity popup
map_match.visualize_match(tripid, single_matches, df_edges)

# Multi Match

In [14]:
match_dict = {}
for tripid in tqdm(random_trips):
    
    if (tripid in match_dict.keys()) == False:
        trace = coords_dict[tripid]
        match = map_match.leuven_match(trace,matching_settings,map_con,exploded_links)
        match_dict[tripid] = match
    
    # may remove essential part of trace so just ignore for now
    # retrace = match['edges']
    
    # #find retraces and remove them?
    # #it doesn't seem like the thing removes much
    # #match['edges'] = retrace[-(retrace['linkid'] == retrace['linkid'].shift(1))]
    # match['edges'] = retrace[-(retrace['linkid'].duplicated(keep=False))]
    
    # if tripid in match_dict.keys():
    #     match_ratio_cond = match['match_ratio'] > match_dict[tripid]['match_ratio']
    #     match_distance_cond = match['match_lines'].length.sum() < match_dict[tripid]['match_lines'].length.sum()
    #     print(match_ratio_cond)
    #     print(match_distance_cond)
    #     if match_ratio_cond & match_distance_cond:
    #         print('success')
    #         match_dict[tripid] = match
    # else:
    #     match_dict[tripid] = match


100%|██████████| 206/206 [09:35<00:00,  2.80s/it]


In [15]:
export_fp

WindowsPath('D:/matched_traces')

In [16]:
if export_fp.exists() == False:
    export_fp.mkdir(parents=True)

# export the matching settings tested
with (export_fp/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)

# export the matched traces
with (export_fp/f'match_testing.pkl').open('wb') as fh:
    pickle.dump(match_dict,fh)    
# with (export_fp/f'match_{matching_index}_{len(match_dict.keys())}_trips.pkl').open('wb') as fh:
#     pickle.dump(match_dict,fh)

# # export 
# with (export_fp/'200_sample.pkl').open('wb') as fh:
#     pickle.dump(match_dict,fh)

# Examine matches

In [None]:
export_fp

In [None]:
# export 
with (export_fp/f'match_1_205_trips.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

# # export the matching settings tested
# with (export_fp/'matching_settings_df.pkl').open('wb') as fh:
#     pickle.dump(matching_settings_df,fh)

In [None]:
#for exmamining trips 
if (export_fp/'qaqc.pkl').exists():
    with (export_fp/'qaqc.pkl').open('rb') as fh:
        qaqc_dict = pickle.load(fh)
else:
    qaqc_dict = {}

In [None]:
qaqc_dict = {}

Visualize and take notes

In [None]:
import time
from IPython.display import display, clear_output

from importlib import reload
reload(map_match)

for tripid in match_dict.keys():

    if isinstance(match_dict.get(tripid,0),str):
        qaqc_dict[(tripid,matching_index)] = 'failed match'
        continue


#TODO get condition that won't write to dict if a trip is skipped

    if qaqc_dict.get((tripid,matching_index),0) == 0:

        html_map = map_match.visualize_match(tripid, match_dict, edges)
        clear_output(wait=True)
        display(html_map)
        
        # Wait for user input to proceed to the next trip
        user_input = input("Rate from 1-10 with 1 being no match to 10 being perfect match (press 'enter' to skip or 'q' to quit)")
        
        if user_input.lower() == 'q':
            break  # Exit the loop if the user enters 'q'
        if user_input == '':
            continue

        notes = input("Input notes if desired and press enter")

        #save user input for that matching index
        qaqc_dict[(tripid,matching_index)] = {
            'rating': user_input,
            'notes': notes,
            'last_reviewed': datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')
        }
    
    #prev_qaqc = qaqc_dict.get((tripid,matching_index))
    #html_map = map_match.visualize_match(tripid, match_dict, edges, {'prev_qaqc':prev_qaqc})

#TODO add the trip date to viz
        

# Additional cleanup or actions after examining all trips can be added here
print("Finished examining trips.")

In [None]:
match_dict[tripid].keys()

In [None]:
with (export_fp/'qaqc.pkl').open('wb') as fh:
    pickle.dump(qaqc_dict,fh)

# Post Match Cleanup
Some trips have out-and-backing and or take the wrong link if two nodes have more than one link between them. This step goes through and cleans these trips.

For out and backing:
Subset network graph to only the edges between origin and destination and then use Dijkstra

In [None]:
qaqc_dict

In [None]:
new_dict = {key:item for key, item in qaqc_dict.items() if isinstance(item,str)==False}
qaqc_df = pd.DataFrame.from_dict(new_dict,orient='index')


In [None]:
qaqc_df

Post match cleaning

In [None]:
qaqc_df[qaqc_df['notes'].str.contains('out and back')]
match_dict[801]['edges']

In [None]:
gpd.GeoDataFrame(match_dict[801]['edges'].merge(edges,on='linkid')).explore()

In [None]:
exploded_edges.columns

In [None]:
#TODO use to make network graph, then subset with trip
import networkx as nx

MDG = nx.MultiDiGraph()  # Create a MultiDiGraph
    #itertuples used to maintain the type
for idx, row in exploded_edges.iterrows():
    #edge_data = {linkid: row[2],'reverse_link': False, 'azimuth': row[4]}
    MDG.add_edge(int(row['A']), int(row['B']), **{'weight': row['length_ft']})#**edge_data)  # Add edge with linkid attribute
    #add reverse link if oneway is not true
    MDG.add_edge(int(row['B']), int(row['A']), **{'weight': row['length_ft']})
    # if row[3] == False:
    #     edge_data['reverse_link'] = True 
    #     #reverse the azimuth
    #     edge_data['azimuth'] = row[5]
    #     MDG.add_edge(row[1], row[0], **edge_data)

#exploded_edges, exploded_nodes

In [None]:
tripid = 801

test = match_dict[801]['edges'].merge(edges,on='linkid')
sub_nodes = test['A'].append(test['B']).unique().tolist()

In [None]:
#get start and end linkid
start = match_dict[tripid]['edges'].iloc[0,:]
end = match_dict[tripid]['edges'].iloc[-1,:]

#get start and end node
start_a_b = edges.loc[edges['linkid']==start['linkid'],['A','B']]
end_a_b = edges.loc[edges['linkid']==end['linkid'],['A','B']]

if start['forward']:
    start = start_a_b['A'].item()
else:
    start = start_a_b['B'].item()

if end['forward']:
    end = end_a_b['B'].item()
else:
    end = end_a_b['A'].item()

In [None]:
sub_nodes[0]

In [None]:
start

In [None]:
start in sub_nodes

In [None]:
end in sub_nodes

In [None]:
path

In [None]:
subgraph = MDG.subgraph(sub_nodes)
length, path = nx.single_source_dijkstra(subgraph,start,end,weight='weight')


In [None]:
#turn to edge list
edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
edge_df = pd.DataFrame(edge_list,columns=['A','B'])
forward = pd.merge(edge_df,edges[['A','B','linkid','geometry']],on=['A','B'])#[['linkid','A','B','geometry']]
forward

In [None]:
reverse = pd.merge(edge_df,edges[['A','B','linkid','geometry']],left_on=['B','A'],right_on=['A','B'])[['linkid','A','B','geometry']]
shortest_path = pd.concat([forward,reverse],ignore_index=True)

In [None]:
shortest_path = shortest_path.loc[shortest_path.groupby(['A','B'])['length_ft'].idxmin()]

In [None]:
gpd.GeoDataFrame(shortest_path).explore()

In [None]:
#TODO deal with duplicate links

In [None]:
shortest_path.explore()

For multi-edges, buffer the 2+ edges and take the one that hits the most gps points

In [None]:
import numpy as np
exploded_edges['A_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,0]
exploded_edges['B_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,1]
duplicate_edges = exploded_edges.loc[exploded_edges[['A_sort','B_sort']].duplicated(keep=False),'linkid'].unique()

In [None]:
gps_points = match_dict[tripid]['trace']

In [None]:
# matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# matched_trip = gpd.GeoDataFrame(matched_trip)
# from shapely.ops import MultiLineString
# buffered_geo = MultiLineString(matched_trip.geometry.tolist()).buffer(100)
# match['trace'].intersects(buffered_geo).sum()

In [None]:
# # export 
# with (export_fp/'sample_matched.pkl').open('wb') as fh:
#     pickle.dump(match_dict,fh)

In [None]:
# with (export_fp/'sample_matched.pkl').open('rb') as fh:
#     match_dict = pickle.load(fh)

# Visualization


In [None]:
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster, PolyLineTextPath
from folium.map import FeatureGroup

#tripid = 29837#7257#9806#30000#8429

# Your GeoDataFrames
matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
matched_trip = gpd.GeoDataFrame(matched_trip)
gps_points = match_dict[tripid]['trace']
match_lines = match_dict[tripid]['match_lines']

#get the start and end point for mapping
start_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmin(),'geometry']
end_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmax(),'geometry']

# reproject and get the center of the map
x_mean = gps_points.to_crs(epsg='4326')['geometry'].x.mean()
y_mean = gps_points.to_crs(epsg='4326')['geometry'].y.mean()

# Create a Folium map centered around the mean of the GPS points
center = [y_mean,x_mean]
mymap = folium.Map(location=center, zoom_start=14)

# Convert GeoDataFrames to GeoJSON
matched_trip_geojson = matched_trip[['linkid','geometry']].to_crs(epsg='4326').to_json()
gps_points_geojson = gps_points[['sequence','geometry']].to_crs(epsg='4326').to_json()
match_lines_geojson = match_lines[['sequence','match_lines']].to_crs(epsg='4326').to_json()

# Create FeatureGroups for each GeoDataFrame
matched_trip_fg = FeatureGroup(name='Matched Trip')
gps_points_fg = FeatureGroup(name='GPS Points')
match_lines_fg = FeatureGroup(name='Match Lines')

# Add GeoJSON data to FeatureGroups
folium.GeoJson(matched_trip_geojson, name='Matched Trip', style_function=lambda x: {'color': 'red'}).add_to(matched_trip_fg)

# Add circles to the GPS Points FeatureGroup
for idx, row in gps_points.iterrows():
    folium.Circle(location=[row['lat'], row['lon']], radius=5, color='grey', fill=True, fill_color='grey').add_to(gps_points_fg)

# Add GeoJSON data to Match Lines FeatureGroup with transparent and grey style
folium.GeoJson(match_lines_geojson, name='Match Lines', style_function=lambda x: {'color': 'grey', 'opacity': 0.5}).add_to(match_lines_fg)

# Add FeatureGroups to the map
matched_trip_fg.add_to(mymap)
gps_points_fg.add_to(mymap)
match_lines_fg.add_to(mymap)

# Add start and end points with play and stop buttons
start_icon = folium.Icon(color='green',icon='play',prefix='fa')
end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

# Add layer control to toggle layers on/off
folium.LayerControl().add_to(mymap)

# Add legend with statistics
#TODO what happened to duration
legend_html = f'''
    <div style="position: fixed; 
            bottom: 5px; left: 5px; width: 300px; height: 250px; 
            border:2px solid grey; z-index:9999; font-size:14px;
            background-color: white;
            opacity: 0.9;">
    &nbsp; <b>Trip ID: {tripid} </b> <br>
    &nbsp; <b> Match Date: {match_dict[tripid]['time']} </b> <br>
    &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i>,
    End Point &nbsp; <i class="fa fa-stop" style="color:red"></i> <br>
    
    &nbsp; Matched Path &nbsp; <div style="width: 20px; height: 5px; background-color: red; display: inline-block;"></div> <br>
    &nbsp; Match Lines Path &nbsp; <div style="width: 20px; height: 5px; background-color: gray; display: inline-block;"></div> <br>
 
    &nbsp; Points Matched: {match_dict[tripid]['last_matched']}/{match_dict[tripid]['trace'].shape[0]} <br>
    &nbsp; Match Ratio: {match_dict[tripid]['match_ratio']:.2f} <br>
    &nbsp; GPS Distance: {match_dict[tripid]['gps_distance']:.1f} ft. <br>
    &nbsp; Matched Trace Distance: {matched_trip.length.sum():.0f} ft. <br>
    &nbsp; Mean Matching Distance: {match_dict[tripid]['match_lines'].length.mean():.0f} ft. 

    </div>
    '''
mymap.get_root().html.add_child(folium.Element(legend_html))

# Save the map to an HTML file or display it in a Jupyter notebook
#mymap.save('map.html')
# mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
mymap  # Uncomment if you are using Jupyter notebook

#TODO add in the legend with trip info and then we're golden


In [None]:
match_dict[tripid].keys()

In [None]:
match_dict[tripid]['match_ratio']

In [None]:
# tripid

# #%% batch match
# for tripid, trace in tqdm(coords_dict.items()):
#     try:
#         matched_traces, trips_df = map_match.leuven_match(trace,tripid,matched_traces,matching_settings,edges,map_con)
#         #update trips_df
#     except:
#         if tripid in matched_traces.keys():
#             failed_match.append(tripid)
#         export_files = (matched_traces,trips_df,failed_match)
#         with (export_fp/'matched_traces.pkl').open('wb') as fh:
#             pickle.dump(export_files,fh)
        
        
# #export
# export_files = (matched_traces,trips_df,failed_match)
# with (export_fp/'matched_traces.pkl').open('wb') as fh:
#     pickle.dump(export_files,fh)
# #export filepath
# project_dir = Path.home() / 'Downloads/cleaned_trips'

# trace = gpd.read_file(project_dir/'example_trace0.gpkg',layer='example_trace0').reset_index().drop(columns=['sequence']).rename(columns={'index':'sequence'})

# #load network
# network_fp = project_dir / "networks/final_network.gpkg"
# edges = gpd.read_file(network_fp,layer="links")
# nodes = gpd.read_file(network_fp,layer="nodes")

# #turn network into dict to quickly retrieve geometries
# edges['tup'] = list(zip(edges['A'],edges['B']))
# geos_dict = dict(zip(edges['tup'],edges['geometry']))

# # create network graph needed for map matching
# map_con = InMemMap("osm", use_latlon = False)

# # create network graph needed for map matching (using a projected coordinate system so latlon false)
# map_con = InMemMap("marta_osm", use_latlon = False)

# #redo the latlon columns
# nodes['X'] = nodes.geometry.x
# nodes['Y'] = nodes.geometry.y

# #add edges and nodes to leuven graph network object (make sure latlon is in same order and crs as trace)
# for row in nodes[['N','X','Y']].itertuples(index=False):
#     map_con.add_node(row[0], (row[2], row[1]))
# for row in edges[['A','B']].itertuples(index=False):
#     map_con.add_edge(row[0], row[1])
# matcher = DistanceMatcher(map_con, # the network graph
#                      max_dist=1000,  # maximum distance for considering a link a candidate match for a GPS point
#                      min_prob_norm=0.001, # drops routes that are below a certain normalized probability  
#                      #non_emitting_length_factor=0.75, # not sure what this does, it's not in the documentation but can't be above 1
#                      non_emitting_states=False, # allow for states that don't have matching GPS points
#                      obs_noise=500, # the standard error in GPS measurement
#                      max_lattice_width=100)  # limits the number of possible routes to consider, can increment if no solution is found

In [None]:
help(InMemMap)

In [None]:
help(DistanceMatcher)

:param map_con: Map object to connect to map database
        :param obs_noise: Standard deviation of noise
        :param obs_noise_ne: Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
        :param max_dist_init: Maximum distance from start location (if not given, uses max_dist)
        :param max_dist: Maximum distance from path (this is a hard cut, min_prob_norm should be better)
        :param min_prob_norm: Minimum normalized probability of observations (ema)
        :param non_emitting_states: Allow non-emitting states. A non-emitting state is a state that is
            not associated with an observation. Here we assume it can be associated with a location in between
            two observations to allow for pruning. It is advised to set min_prob_norm and/or max_dist to avoid
            visiting all possible nodes in the graph.
        :param non_emitting_length_factor: Reduce the probability of a sequence of non-emitting states the longer it
            is. This can be used to prefer shorter paths. This is separate from the transition probabilities because
            transition probabilities are averaged for non-emitting states and thus the length is also averaged out.
        :param max_lattice_width: Restrict the lattice (or possible candidate states per observation) to this value.
            If there are more possible next states, the states with the best likelihood so far are selected.

        :param dist_noise: Standard deviation of difference between distance between states and distance
            between observatoins. If not given, set to obs_noise
        :param dist_noise_ne: If not given, set to dist_noise
        :param restrained_ne: Avoid non-emitting states if the distance between states and between
            observations is close to each other.
        :param avoid_goingback: If true, the probability is lowered for a transition that returns back to a
            previous edges or returns to a position on an edge.


In [None]:
# #get list of coords
# gps_trace = list(zip(trace.geometry.y,trace.geometry.x))

# #perform matching
# states, last_matched = matcher.match(gps_trace)
# only_nodes = matcher.path_pred_onlynodes

# print("States\n------")
# print(states)
# print("Nodes\n------")
# print(only_nodes)
# print("")
# matcher.print_lattice_stats()
# fig, ax = plt.subplots(1, 1)
# mmviz.plot_map(map_con, matcher=matcher,
#                ax=ax,
#                show_labels=True, show_matching=True, show_graph=False,
#                filename="my_plot.png")
# test = matcher.lattice[4]
# m = max(test.values_all(), key=lambda m: m.logprob) # for the 4th point get the one with the highest logprob

# m.logprob
# import numpy as np
# t = {x.cname.split('_')[0] + '_' + x.cname.split('_')[1]: x.logprob for x in test.values_all()}
# check = pd.DataFrame.from_dict(t,orient='index',columns=['logprob']).sort_values('logprob',ascending=False)
# check
# (check.index == '5424132517_7151205661').sum()
# testing = trace.copy()
# testing.geometry = testing.buffer(1000)
# intersect = gpd.overlay(edges,testing)
# intersect[(intersect['A_B'] == '5424132517_7151205661') & (intersect['sequence'] == 4)]

# #reduce the states size with match_nodes
# reduced_states = list(set(edges))

# #calculate the match ratio
# match_ratio = last_matched / (len(gps_trace)-1)
    
# #retreive matched edges from network
# geos_list = [geos_dict.get(id,0) for id in reduced_states]

# #turn into geodataframe
# matched_trip = gpd.GeoDataFrame(data={'A_B':reduced_states,'geometry':geos_list},geometry='geometry',crs='epsg:2240')

# #turn tuple to str
# matched_trip['A_B'] = matched_trip['A_B'].apply(lambda row: f'{row[0]}_{row[1]}')

# #reset index to add an edge sequence column
# matched_trip.reset_index().rename(columns={'index':'edge_sequence'},inplace=True)

# trace['interpolated_point'] = pd.Series([ Point(x.edge_m.pi) for x in matcher.lattice_best ])
# trace = trace.loc[0:last_matched]
# trace['match_lines'] = trace.apply(lambda row: LineString([row['geometry'],row['interpolated_point']]),axis=1)

# interpolated_points = trace[['sequence','interpolated_point']]
# interpolated_points = gpd.GeoDataFrame(interpolated_points,geometry='interpolated_point')

# match_lines = trace[['sequence','match_lines']]
# match_lines = gpd.GeoDataFrame(match_lines,geometry='match_lines')
# match_lines['length'] = match_lines.length


# interpolated_points.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='interpolated_points')
# match_lines.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='match_lines')

# #%%






