# Prepare Data for Map Matching

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
#from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path

from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

from bikewaysim.paths import config
from bikewaysim.map_matching import map_match, map_match_utils
from bikewaysim.network import prepare_network

## Create matching network
This section filters the network to remove features that shouldn't be matched to and to convert the network into a format that the leuvenmapmatching package can use.

In [2]:
links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')

In [3]:
# # remove infra before 2016 so it doesn't match to these
# after = links['facility'].isin(['cycletrack','multi use path']) & \
#           (links['link_type']!='road') & \
#           links['year'].notna() & \
#           (links['year']>2016)
# # links[after].drop(columns=['all_tags']).explore()
# links = links[after==False]

In [4]:
#only allow roads + pedestrian + bike
link_types_allowed = ['bike','pedestrian','road','service','connector','parking_and_driveways']
print(links['link_type'].unique())

#TODO add the ability to go the wrongway on residential streets ONLY
allow_wrongway_on = ['residential','living_street']
links.loc[links['highway'].isin(allow_wrongway_on),'oneway'] = False

['service' 'road' 'sidewalk' 'pedestrian' 'parking_and_driveways' 'bike'
 None]


In [5]:
exploded_links, exploded_nodes, map_con = map_match.prepare_mapmatch_network(links,nodes,link_types_allowed,False)

Before connected components: Links 22026 Nodes 18712
After connected components: Links 21312 Nodes 17869


In [6]:
# export copy of the network
with (config['matching_fp'] / "map_con.pkl").open('wb') as fh:
    pickle.dump((exploded_links,exploded_nodes),fh)

## Trace Data
For map matching, we're using GPS traces that have been processed so that each point is spaced a certain distance apart, coordinates in between this distance are dropped to improve computation time. 

In [7]:
#load all traces
with (config['cycleatl_fp']/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

# import trips that we want to match
with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)

# subset the coords dict by just the trips we're trying to match
coords_dict = {key:item for key, item in coords_dict.items() if key in trips_df['tripid'].tolist()}
# drop speed_mph below zero if that hasn't been done
coords_dict = {key:item[item['speed_mph']>1] for key, item in coords_dict.items() if item[item['speed_mph']>1].shape[0] > 0}

print('Map matching',len(coords_dict.keys()),'trips')

Map matching 682 trips


In [8]:
#run the privacy filter (remove points within X feet of start and end)
coords_dict = {key:map_match_utils.privacy_distance(item) for key, item in coords_dict.items()}

In [9]:
# export copy of the trips we're matching
with (config['matching_fp'] / "coords_dict.pkl").open('wb') as fh:
    pickle.dump(coords_dict,fh)

# Set match settings

In [10]:
#TODO pickle the matching settings so that we can still do the tracking of the different matching settings
matching_settings = {
        'obs_noise': 50, #Standard deviation of noise
        'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
        'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
        'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
        'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
        'non_emitting_states': False, #Allow non-emitting states
        'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
        'max_lattice_width': 50, #Restrict the lattice (or possible candidate states per observation) to this value.
        'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
        'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
        'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
        'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
        'increase_max_lattice_width': False,
        'export_graph': False,
        'link_types': str(np.sort(link_types_allowed)),
        'allow_wrongway': False
    }
map_match_utils.update_matching_settings(matching_settings)

Settings have been used before
