# Preparing Matched Traces for Calibration
- Decide which traces to keep using match_ratio
- Determine which links should be included for routing (+ include all links found during map matching)
- Format the link attributes for calibration
- Calculate shortest paths on the impedance network

In [1]:
import pickle
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
from shapely.ops import MultiLineString, LineString
import geopandas as gpd

from bikewaysim.paths import config
from bikewaysim.impedance_calibration import speedfactor, stochastic_optimization
from bikewaysim.map_matching import map_match
from bikewaysim.network import prepare_network, modeling_turns
from bikewaysim.routing import rustworkx_routing_funcs
from bikewaysim.map_matching import post_process

# Filter the match map results

In [2]:
# print the available match dicts
print([x.stem for x in config['matching_fp'].glob('match_dict_full_*.pkl')])
matching_index = 0  # Change this to the index of the match dict you want to use
# matching_index = 5

['match_dict_full_0']


In [3]:
with (config['matching_fp'] / f'match_dict_full_{matching_index}.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

cutoff = 0.90 # set pct of points that need to be matched
above_threshold, below_threshold, failed_matches, match_ratios = post_process.mapmatch_results(match_dict,cutoff)
match_dict = {key:item for key,item in match_dict.items() if key in above_threshold}

597 / 682 (88%) successful matches
81 / 682 (12%) partial matches
4 / 682 (1%) failed matches


In [4]:
#get route attributes
trips = pd.read_pickle(config['cycleatl_fp']/'trips_4.pkl')
trips = trips.loc[trips['tripid'].isin(list(match_dict.keys()))]#,'userid'].nunique()
users = pd.read_pickle(config['cycleatl_fp']/'users_4.pkl')
users = users[users['userid'].isin(set(trips['userid'].tolist()))]
print(trips.shape[0],'trips')
print(users.shape[0],'users')

597 trips
259 users


# Group trips for calibration

In [5]:
# by user
ready_for_calibration_users = [(userid,list(trip_ids)) for userid, trip_ids in trips.groupby('userid')['tripid'].unique().reset_index().values]

In [6]:
# select one trip from each user
import random
random.seed(2)
subset = trips.groupby('userid')['tripid'].apply(lambda x: random.choice(list(x))).reset_index()

# debug
random.seed(2)
debug = random.sample(trips['tripid'].tolist(),20)

ready_for_calibration_users.append(('random',subset['tripid'].tolist()))
ready_for_calibration_users.append(('debug',debug))

# by rider type
not_fearless_users = users.loc[users['rider_type']!='Strong & fearless','userid'].tolist()
not_fearless = [tripid for userid, tripid in subset.values if userid in not_fearless_users]
fearless = [tripid for userid, tripid in subset.values if userid not in not_fearless_users]
not_fearless = ('notfearless',not_fearless)
fearless = ('fearless',fearless)
ready_for_calibration_users.append(not_fearless)
ready_for_calibration_users.append(fearless)

In [7]:
with (config['calibration_fp'] / 'subsets.pkl').open('wb') as fh:
    pickle.dump(ready_for_calibration_users,fh)

## Network Exceptions
Get list of all links used in map matching to make sure these are kept in the calibration network

In [8]:
map_matching_links = set()
for tripid, items in match_dict.items():
    map_matching_links.update(set([tuple(x) for x in items['edges'].values]))

# Create calibration network
Create dummy variables and make any other changes that weren't done in the final network export step.

In [9]:
directed_links = pd.read_parquet(config['network_fp']/'directed_edges.parquet') # has the directional variables
links = pd.read_pickle(config['network_fp']/'final_network_edges.parquet') # non-directed
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
nodes = dict(zip(nodes['N'],nodes.geometry))
turns = pd.read_parquet(config['network_fp']/'turns_df.parquet')

In [10]:
#merge with links
link_cols_drop = ['A','B','ascent_ft','ascent_grade_cat','descent_ft','descent_grade_cat','facility_fwd','facility_rev'] # drop directional attributes
links.drop(columns=link_cols_drop,inplace=True)
directed_cols_to_add = ['linkid','A','B','reverse_link','ascent_ft','ascent_grade_cat','facility_fwd']
links = pd.merge(links,directed_links[directed_cols_to_add],on='linkid')

In [11]:
# Remove wrongway
oneway_dict = dict(zip(links['linkid'],links['oneway']))
turns['source_oneway'] = turns['source_linkid'].map(oneway_dict)
turns['target_oneway'] = turns['target_linkid'].map(oneway_dict)
del oneway_dict

source_exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in turns[['source_linkid','source_reverse_link']].values]
target_exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in turns[['target_linkid','target_reverse_link']].values]
# source_wrongway = ((turns['source_oneway'] == True) & (turns['source_reverse_link'] == True)) == False
# target_wrongway = ((turns['target_oneway'] == True) & (turns['target_reverse_link'] == True)) == False
source_wrongway = ((turns[['source_oneway','source_reverse_link']] == True).all(axis=1)==False) | (source_exception)
target_wrongway = ((turns[['target_oneway','target_reverse_link']] == True).all(axis=1)==False) | (target_exception)
turns = turns[source_wrongway & target_wrongway]

#remove wrongway links
#TODO did we remove these in the export network step too?
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in links[['linkid','reverse_link']].values]
links = links.loc[((links[['oneway','reverse_link']]==True).all(axis=1) == False) | exception]

#TODO post GDOT
#add elevation adjusted travel times based on assumed speed on flat ground
# speedfactor.calculate_adjusted_speed(links,9)
assumed_speed_mph = 9
links['travel_time_min'] = (links.length / 5280 / assumed_speed_mph * 60).round(8)

## create dummy variables for modeling


In [12]:
links['ascent_grade_cat'].unique()

array(['[0,4)', '[4,6)', '[6,inf)'], dtype=object)

In [13]:
# lanes
links['1lpd'] = (links['lanes'] == 1).astype(int)
links['2lpd'] = (links['lanes'] == 2).astype(int)
links['3+lpd'] = (links['lanes'] == 3).astype(int)

# speed
links['[0,30] mph'] = (links['speed']=='[0,30]').astype(int)
links['(30,40] mph'] = (links['speed']=='(30,40]').astype(int)
links['(40,inf) mph'] = (links['speed']=='(40,inf)').astype(int)
links['(30,inf) mph'] = (links[['(30,40] mph','(40,inf) mph']] == 1).any(axis=1).astype(int)

# aadt
links['[0,4k) aadt'] = (links['AADT']=='[0,4k)').astype(int)
links['[4k,10k) aadt'] = (links['AADT']=='[4k,10k)').astype(int)
links['[10k,inf) aadt'] = (links['AADT']=='[10k,inf)').astype(int)

# grade
links['[0,4) grade'] = (links['ascent_grade_cat']=='[0,4)').astype(int)
links['[4,6) grade'] = (links['ascent_grade_cat']=='[4,6)').astype(int)
links['[6,inf) grade'] = (links['ascent_grade_cat']=='[6,inf)').astype(int)

# bicycle infra
links['bike lane'] = links['facility_fwd'].isin(['bike lane','bufferred bike lane']).astype(int)
links['cycletrack'] = links['facility_fwd'].isin(['cycletrack']).astype(int)
links['multi use path'] = links['facility_fwd'].isin(['multi use path']).astype(int)
links['multi use path and cycletrack'] = (links[['cycletrack','multi use path']] == 1).any(axis=1).astype(int)

# turns
turns.loc[turns['unsig_crossing'].isna(),'unsig_crossing'] = False
turns['unsig_crossing'] = turns['unsig_crossing'].astype(int)
turns['left_turn'] = ((turns['turn_type']=='left') & (turns['source_link_type']=='road') & (turns['target_link_type']=='road')).astype(int)
turns['right_turn'] = ((turns['turn_type']=='right') & (turns['source_link_type']=='road') & (turns['target_link_type']=='road')).astype(int)


In [14]:
links['AADT']

0          [0,4k)
1          [0,4k)
2        [4k,10k)
3        [4k,10k)
4          [0,4k)
           ...   
64475      [0,4k)
64476      [0,4k)
64477      [0,4k)
64478      [0,4k)
64479      [0,4k)
Name: AADT, Length: 60756, dtype: category
Categories (3, object): ['[0,4k)' < '[4k,10k)' < '[10k,inf)']

GDOT report variables

In [15]:
links0 = links.copy()
links0['multi use path report'] = links0['facility_fwd'].isin(['multi use path','cycletrack']).astype(int)
links0['bike lane report'] = links0['facility_fwd'].isin(['bike lane','bufferred bike lane']).astype(int)
# non road links were given lanes = 0
links0['lanes report'] = links0['lanes']
links0.loc[(links0['multi use path report']==True) | (links['link_type'].isin(['bike','pedestrian','sidewalk'])),'lanes report'] = 0
# just above 4% grade
links0['above_4 report'] = links0['ascent_grade_cat'].isin(['[4,6)','[6,inf)']).astype(int)
# merge back into links
links0 = links0[['linkid','reverse_link','multi use path report','bike lane report','lanes report','above_4 report']]
links = pd.merge(links,links0,suffixes=('',' report'),on=['linkid','reverse_link'])

## GDOT Base Case
Pedestrian paths that are NOT multi-use trails and are flat

In [16]:
links['gdot_base'] = (links[['multi use path report','bike lane report','lanes report','above_4 report']] == 0).all(axis=1).astype(int)

## New Base Case
Pedestrian paths + residential roads (1 lane per direction, no bicycle facility, < 4k aadt, < 4%, speed 40 or below)

In [17]:
links['new_base'] = (links[['2lpd','3+lpd','(30,inf) mph','[4k,10k) aadt','[10k,inf) aadt','[4,6) grade','[6,inf) grade','bike lane','cycletrack','multi use path']]==0).all(axis=1).astype(int)

# Network Filtering

In [18]:
# only allow these types for routing unless there's an exception in the matched traces
link_types_allowed = ['bike','pedestrian','road']
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in links[['linkid','reverse_link']].values]
links = links[links['link_type'].isin(link_types_allowed) | exception]

In [19]:
before = links.copy()
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in before[['linkid','reverse_link']].values]

In [20]:
# remove isolated links
links, turns = prepare_network.remove_isolates(links,turns)

print(links.shape[0],'links and',len(set(pd.concat([links['A'],links['B']]).tolist())),'nodes')
print(turns.shape[0],'turns')

Before connected components: Links 23919 Nodes 11586
After connected components: Links 21755 Nodes 10368
Before connected components: Turns 123976
After connected components: Turns 33289
21755 links and 10368 nodes
33289 turns


In [21]:
# # figure this out later
# # assign attributes to the sidepaths
# sidepaths = gpd.read_file(config['bicycle_facilities_fp']/'sidepaths.gpkg',layer='sidepaths',ignore_geometry=True)[['linkid','sidepath_linkid']]
# replace = sidepaths.merge(links,on='linkid')
# replace
# # this approach currenlty 

# #list of all the attributes
# all_attrs = ['2lpd', '3+lpd', '(30,40] mph', '(40,inf) mph',
#        '[4k,10k) aadt', '[10k,inf) aadt', '[4,6) grade', '[6,inf) grade',
#        'bike lane', 'cycletrack', 'multi use path', '(30,inf) mph',
#        'multi use path and cycletrack', 'multi use path report',
#        'bike lane report', 'lanes report', 'above_4 report']
# replace = replace[['sidepath_linkid']+all_attrs]
# replace = replace[replace['sidepath_linkid'].duplicated()==False]
# replace

# links = pd.merge(links,replace,left_on='linkid',right_on='sidepath_linkid',suffixes=(None,'_new'),how='left')
# for col in replace.columns:
#     if col != 'sidepath_linkid':
#         links[col] = links[f'{col}_new'].fillna(links[col])
# links.drop(columns=[x+'_new' for x in all_attrs],inplace=True)
# # replace
# #[['multi use path','sidepath']]
# # links.columns

In [22]:
# NOTE for the set inf variable
links['not_street'] = links['link_type'] != 'road'

In [23]:
#export calibration network
with (config['calibration_fp']/"calibration_network.pkl").open('wb') as fh:
    pickle.dump((links,turns),fh)

# Export network for QGIS

In [24]:
# drop cols we don't need
fwd_links = links[links['reverse_link']==False].set_index('linkid')
rev_links = links[links['reverse_link']==True].set_index('linkid')
merged = pd.merge(fwd_links,rev_links,left_index=True,right_index=True,how='outer')

cols = set([x.removesuffix('_x').removesuffix('_y') for x in merged.columns if ('_x' in x) | ('_y' in x)])

# Function to condense two columns
def condense_columns(col1, col2):
    if pd.isna(col1):  # if col1 is NaN
        return col2
    elif pd.isna(col2):  # if col2 is NaN
        return col1
    elif col1 == col2:  # if values are equal
        return col1
    else:  # if values are different and neither is NaN
        return str([col1, col2])
    
new_cols = {}

for col in tqdm(cols):
    new_cols[col] = [condense_columns(col1,col2) for col1, col2 in merged[[col+'_x',col+'_y']].values]

undirected_links = pd.DataFrame.from_dict(new_cols,orient='columns')
undirected_links.index = merged.index
undirected_links.reset_index(inplace=True)
undirected_links = gpd.GeoDataFrame(undirected_links,crs=config['projected_crs_epsg'])
order_cols = ['linkid', 'osmid', 'link_type', 'oneway', 'highway', 'name', 'all_tags',
              'lanes', 'speed', 'AADT', 'ascent_grade_cat', 'facility_fwd', 'year',
              '2lpd','3+lpd',
              '(30,40] mph', '(40,inf) mph', '(30,inf) mph',
              '[4k,10k) aadt', '[10k,inf) aadt', 
              '[4,6) grade', '[6,inf) grade',
              'bike lane', 'cycletrack', 'multi use path',
              'bike lane report', 'multi use path report','lanes report','above_4 report',
              'gdot_base','new_base',
              'travel_time_min', 'geometry']
undirected_links[order_cols].to_file(config['calibration_fp']/'calibration_network.gpkg',layer='final')

100%|██████████| 45/45 [00:01<00:00, 39.74it/s]


In [25]:
# without_isolates = set([tuple([x,y]) for x,y in links[['linkid','reverse_link']].values])
# missing = [tuple([x,y]) not in without_isolates for x,y in before[['linkid','reverse_link']].values]
# before[np.array(exception) & np.array(missing)].explore()#.to_file(Path.home()/'Downloads/')
# ((links[['linkid','reverse_link']]==(35062.0,False)).all(axis=1)).sum()

# Assemble the match data for shortest path routing and calibration

In [26]:
# links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)
# base_impedance_col = "travel_time_min"
# stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
# links.set_index(['linkid','reverse_link'],inplace=True,drop=False)
# match_results = {}
# #shortest_results = {}
# failed_shortest_path = []
# for tripid, items in tqdm(match_dict.items()):

#     #get start and end linkid
#     start = tuple(match_dict[tripid]['edges'].iloc[0,:].values)
#     end = tuple(match_dict[tripid]['edges'].iloc[-1,:].values)

#     #get start and end node for shortest and impedance routing
#     start = links.loc[start,'A']
#     end = links.loc[end,'B']

#     match_results[tripid] = {
#     'origin_node': start,
#     'destination_node': end,
#     'trip_start_time': items['trace'].iloc[0,2].year,
#     'match_ratio': items['match_ratio'],
#     'matched_edges': match_dict[tripid]['edges'],
#     'shortest_edges': pd.DataFrame(stochastic_optimization.impedance_path(turns,turn_G,links,start,end)['edge_list'],columns=['linkid','reverse_link'])
#     }
# # trip_ods = pd.DataFrame.from_dict(match_results,orient='index')
# # trip_ods.reset_index(inplace=True)
# # trip_ods.rename(columns={'index':'tripid'},inplace=True)
# #export for impedance calibration
# with (config['calibration_fp']/'ready_for_calibration.pkl').open('wb') as fh:
#     pickle.dump(match_results,fh)
# # links.reset_index(inplace=True)

In [27]:
from importlib import reload
reload(rustworkx_routing_funcs)
links, turns, length_dict, geo_dict, turn_G = rustworkx_routing_funcs.import_calibration_network(config)
base_impedance_col = "travel_time_min"
rustworkx_routing_funcs.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
links.set_index(['linkid','reverse_link'],inplace=True,drop=False)
match_results = {}
#shortest_results = {}
failed_shortest_path = []

In [37]:
#rustworkx verison
reload(post_process)
starts, ends = post_process.get_ods_from_match_dict(match_dict,links)

added_nodes = rustworkx_routing_funcs.add_virtual_edges(starts,ends,links,turns,turn_G)

import numpy as np
time_periods = []
link_dates = sorted(links['year'].dropna().unique())[::-1]
for trip_date in [items['trace'].iloc[0,2].year for tripid, items in match_dict.items()]:
    cond = (trip_date < np.array(link_dates))
    if cond.all():
        print('trip is before any built infrastructure')
        continue
    idx = cond.argmin() # grabs the closest year in descending order
    time_periods.append(link_dates[idx])

shortest_tripid, shortest_length, shortest_edges = rustworkx_routing_funcs.rx_shortest_paths(list(zip(starts,ends)),turn_G)

In [38]:
for tripid, start, end, year, shortest_edge in zip(match_dict.keys(),starts,ends,time_periods,shortest_edges):
    match_results[tripid] = {
        'origin_node': start,
        'destination_node': end,
        'trip_start_time': year,
        'match_ratio': match_dict[tripid]['match_ratio'],
        'matched_edges': match_dict[tripid]['edges'],
        'shortest_edges': pd.DataFrame(shortest_edge,columns=['linkid','reverse_link'])
    }

In [39]:
with (config['calibration_fp']/'ready_for_calibration.pkl').open('wb') as fh:
    pickle.dump(match_results,fh)

In [None]:
# TODO add this to the export network section
# # add this for later
# link_types = dict(zip(links['linkid'],links['link_type']))
# turns['source_link_type'] = turns['source_linkid'].map(link_types)
# turns['target_link_type'] = turns['source_linkid'].map(link_types)

# # #unit conversions
# links['length_mi'] = (links['length_ft'] / 5280).round(2)
# links['ascent_ft'] = (links['ascent_m'] * 3.28084).round(0)
# #links.drop(columns=['length_ft','ascent_m'],inplace=True)

# #get node degree
# degree = links['A'].append(links['B']).value_counts()
# links['A_deg'] = links['A'].map(degree)
# links['B_deg'] = links['B'].map(degree)
# #remove excess dead end pedestrian links
# dead_ends = (links['link_type']=='pedestrian')&((links['A_deg']==1)|(links['B_deg']==1))
# links = links[dead_ends==False]
# #unique scenario but there's an expressway tag that needs to be removed
# import ast
# john_lewis_freedom_pkwy = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('expressway',0)=='yes')
# links = links[john_lewis_freedom_pkwy==False]
# surfaces = ['dirt','unpaved','gravel','fine_gravel','dirt/sand','ground']
# unpaved = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('surface',0) in surfaces)
# #links[unpaved].explore(tooltip=False)
# links = links[unpaved==False]
# #unpaved.unique()