# Map Match QAQC

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import pickle
from tqdm import tqdm
from shapely.ops import Point, LineString
import random
from importlib import reload

from bikewaysim.paths import config
from bikewaysim import map_match

In [None]:
# print the available match dicts
print([x.stem for x in config['matching_fp'].glob('match_dict_full_*.pkl')])
# pick the match result you want
matching_index = 5

In [None]:
# if not created already, create a dict for keeping track of the best possible match so far
if (config['matching_fp']/f'matches_qaqc.pkl').exists() == False:
    # import the trips file
    trips = pd.read_pickle(config['cycleatl_fp']/'trips_2.pkl')[['tripid']]
    # initialize empty columns for QAQC inputs
    trips['matching_index'] = np.nan
    trips['match_rating'] = np.nan
    


In [None]:
keep_cols = ['tripid','start_time', 'end_time', 'start_lon', 'start_lat', 'end_lon',
       'end_lat', 'start_X', 'start_Y', 'end_X', 'end_Y', 'duration',
       'total_points', 'avg_accuracy', 'max_delta_time', 'mean_delta_time',
       'max_distance_ft', 'avg_distance_ft', 'total_distance_ft',
       'first_to_last_ft', 'max_speed_mph', 'min_speed_mph', 'avg_speed_mph',
       'userid', 'trip_type', 'description', 'old_userid'

In [None]:
trips.columns

In [None]:
#import specific match results
with (config['matching_fp']/f'match_dict_full_{matching_index}.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

#load the traces
with (config['cycleatl_fp']/'reduced_spacing.pkl').open('rb') as fh:
    coords_dict = pickle.load(fh)

# import matching settings tested for referencing
with (config['matching_fp']/'matching_settings_df.pkl').open('rb') as fh:
    matching_settings_df = pickle.load(fh)

# import matching network
with (config['matching_fp'] / 'map_con.pkl').open('rb') as fh:
    exploded_links, exploded_nodes = pickle.load(fh)
map_con = map_match.make_network(exploded_links,exploded_nodes)
link_types_allowed = exploded_links['link_type'].unique().tolist()

# load the coords dict because it has the GPS recording attributes
with (config['matching_fp'] / f'coords_dict.pkl').open('rb') as fh:
    coords_dict0 = pickle.load(fh)

# load network for visualizations
links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
links.rename(columns={'osm_linkid':'linkid','osm_A':'A','osm_B':'B'},inplace=True)
nodes.rename(columns={'osm_N':'N'},inplace=True)

In [None]:
matching_settings_df

# Map Matching Settings
Check the [Leueven Documentation](https://github.com/wannesm/LeuvenMapMatching/blob/9ca9f0b73665252f2ee492fae9dd243feef2f39d/leuvenmapmatching/matcher/distance.py) for more information.

Basic Stats

In [None]:
cutoff = 0.9
successful, substandard, failed, match_ratios = map_match.mapmatch_results(match_dict,cutoff)

#remove failed matches from match_dict
match_dict = {key:item for key,item in match_dict.items() if key in successful + substandard}

#only keep full traces for the failed matches
coords_dict = {key:item for key,item in coords_dict.items() if key in failed}

# Examine a match

In [None]:
tripid = 1488
if tripid in match_dict.keys():
    m = map_match.visualize_match(tripid, match_dict, links, config)
else:
    m = map_match.visualize_failed_match(tripid, coords_dict, links, config)
m

# Re-match

In [None]:
coords_dict = {key:item for key,item in coords_dict0.items() if key==tripid}
matching_settings = {
        'obs_noise': 50, #Standard deviation of noise
        'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
        'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
        'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
        'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
        'non_emitting_states': False, #Allow non-emitting states
        'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
        'max_lattice_width': 50, #Restrict the lattice (or possible candidate states per observation) to this value.
        'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
        'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
        'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
        'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
        'increase_max_lattice_width': False,
        'export_graph': False,
        'link_types': str(np.sort(link_types_allowed)),
        'allow_wrongway': False
    }
match_dict = {tripid:map_match.leuven_match(trace,matching_settings,map_con,exploded_links) for tripid, trace in tqdm(coords_dict.items(),total=len(coords_dict))}
map_match.visualize_match(tripid, match_dict, links, config)

# Examine failed Matches

In [None]:
import random
tripid = random.choice(failed)
# tripid = 1637
print(tripid)
map_match.visualize_failed_match(tripid, coords_dict, links, config)

# Examine suboptimal matches

In [None]:
tripid = random.choice(substandard)
print(tripid)
map_match.visualize_match(tripid, match_dict, links, config)

# Examine successful matches

In [None]:
tripid = random.choice(successful)
print(tripid)
map_match.visualize_match(tripid, match_dict, links, config)

# Examine matches around points

In [None]:
# input a point and it'll retrieve all of the trips that had points near there
# useful for figuring out failed matches

# Examine other

In [None]:
links

In [None]:
links.loc[links['oneway']==True,'link_type'].value_counts()

In [None]:
tripid = 3767
print(tripid)
map_match.visualize_match(tripid, match_dict, links, config)

In [None]:
#TODO find all trips with some sort of backtracking
backtracking = []
for tripid, item in match_dict.items():
    if item['edges']['linkid'].duplicated().any():
        backtracking.append(tripid)

In [None]:
reload(map_match)
tripid = random.choice(backtracking)
print(tripid)
map_match.visualize_match(tripid, match_dict, links, config)

In [None]:
tripid = 25874
test = match_dict[tripid]['edges']
test = test[test['linkid'].duplicated(keep=False)==False]
test0 = {tripid:match_dict[tripid]}
test0[tripid]['edges'] = test
print(tripid)
map_match.visualize_match(tripid, test0, links, config)

# QAQC Process
- Keyed by the tripid + match settings so that we can map match trip according to the best settings possible
- Assign a match rating (1-5)
- Add notes (e.g., enable one way, missing links, etc.)

In [None]:
from importlib import reload
reload(map_match)
map_match.qaqc_mapmatches(failed,match_dict,matching_index,matching_settings_df,coords_dict,links,config)

In [None]:
from importlib import reload
reload(map_match)
map_match.qaqc_mapmatches(substandard,match_dict,matching_index,matching_settings_df,coords_dict,links,config)

In [None]:
from importlib import reload
reload(map_match)
map_match.qaqc_mapmatches(successful,match_dict,matching_index,matching_settings_df,coords_dict,links,config)