# Prepare Data for Map Matching

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
#from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path

from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

from bikewaysim.paths import config
from bikewaysim import map_match
from bikewaysim.network import prepare_network

In [None]:
links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')

In [None]:
# # remove infra before 2016 so it doesn't match to these
# after = links['facility'].isin(['cycletrack','multi use path']) & \
#           (links['link_type']!='road') & \
#           links['year'].notna() & \
#           (links['year']>2016)
# # links[after].drop(columns=['all_tags']).explore()
# links = links[after==False]

In [None]:
#only allow roads + pedestrian + bike
link_types_allowed = ['bike','pedestrian','road','service','connector','parking_and_driveways']
print(links['link_type'].unique())

#TODO add the ability to go the wrongway on residential streets ONLY
allow_wrongway_on = ['residential','living_street']
links.loc[links['highway'].isin(allow_wrongway_on),'oneway'] = False

In [None]:
exploded_links, exploded_nodes, map_con = map_match.prepare_mapmatch_network(links,nodes,link_types_allowed,False)

# Trace Data
For map matching, we're using GPS traces that have been processed so that each point is spaced a certain distance apart, coordinates in between this distance are dropped to improve computation time. 

In [None]:
#load all traces
with (config['cycleatl_fp']/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

# import trips that we want to match
with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)

# subset the coords dict by just the trips we're trying to match
coords_dict = {key:item for key, item in coords_dict.items() if key in trips_df['tripid'].tolist()}
# drop speed_mph below zero if that hasn't been done
coords_dict = {key:item[item['speed_mph']>1] for key, item in coords_dict.items() if item[item['speed_mph']>1].shape[0] > 0}

print('Map matching',len(coords_dict.keys()),'trips')

In [None]:
#run the privacy filter 
def privacy_distance(df,privacy_dist=500):
    first_point = df['geometry'].iloc[0].buffer(privacy_dist)
    last_point = df['geometry'].iloc[-1].buffer(privacy_dist)
    double_buffer = df['geometry'].iloc[0].buffer(privacy_dist*2)
    if df['geometry'].intersects(double_buffer).all():
        return
    else:
        first_cut = df['geometry'].intersects(first_point).idxmin() # find the first point where it's false
        last_cut = df['geometry'].intersects(last_point).idxmax() - 1
        if df.loc[first_cut:last_cut,:].shape[0] == 0:
            return
        else:
            return df.loc[first_cut:last_cut,:]
coords_dict = {key:privacy_distance(item) for key, item in coords_dict.items()}

In [None]:
with (config['matching_fp'] / "coords_dict.pkl").open('wb') as fh:
    pickle.dump(coords_dict,fh)

In [None]:
with (config['matching_fp'] / "map_con.pkl").open('wb') as fh:
    pickle.dump((exploded_links,exploded_nodes),fh)

# split the

# One off map matching example
Use this for testing purposes

# Prepare Map Matching Files
- Split the data into buckets of 500 each (10hrs to hrs)
- Pickle the split up dicts, the network, and the matching settings

In [None]:
len(coords_dict)

In [None]:
# match_dict = {tripid:map_match.leuven_match(trace,matching_settings,map_con,exploded_links) for tripid, trace in tqdm(coords_dict.items(),total=len(coords_dict))}

In [None]:
#split match_dict into X parts
#split
small_coords = []
small_dict = {}
for idx, (tripid, item) in enumerate(coords_dict.items()):
    #500 each
    if (idx % 500 == 0) & (idx != 0):
        small_dict[tripid] = item
        small_coords.append(small_dict)
        small_dict = {}
    elif idx + 1 == len(coords_dict):
        small_dict[tripid] = item
        small_coords.append(small_dict)
    else:
        small_dict[tripid] = item
print(len(small_coords))

In [None]:
for idx,x in enumerate(small_coords):
    with (config['matching_fp']/f'coords_dict_{idx}.pkl').open('wb') as fh:
        pickle.dump(x,fh)

In [None]:
# The matching setting dictionary stores all of the settings used for map matching, so they can be retrieved later for study
if (config['matching_fp'] / 'matching_settings_df.pkl').exists():
    with (config['matching_fp'] / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

In [None]:
#TODO pickle the matchign settings so that we can still do the tracking of the different matching settings
matching_settings = {
        'obs_noise': 50, #Standard deviation of noise
        'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
        'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
        'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
        'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
        'non_emitting_states': False, #Allow non-emitting states
        'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
        'max_lattice_width': 50, #Restrict the lattice (or possible candidate states per observation) to this value.
        'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
        'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
        'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
        'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
        'increase_max_lattice_width': False,
        'export_graph': False,
        'link_types': str(np.sort(link_types_allowed)),
        'allow_wrongway': False
    }
#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

In [None]:
with (config['matching_fp']/'match_settings.pkl').open('wb') as fh:
    pickle.dump((matching_index,matching_settings),fh)

# export the matching settings tested
with (config['matching_fp']/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)