# Preparing Matched Traces for Calibration
- Decide which traces to keep using match_ratio
- Use `network/step_7_export_network.ipynb` to modify variables and determine which links should be included for routing
    - error returned if network does not include all network features in the map matching step
- Format the link attributes for calibration
- Calculate shortest paths on the impedance network

In [1]:
import pickle
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
from shapely.ops import MultiLineString, LineString
import geopandas as gpd

from bikewaysim.paths import config
from bikewaysim.impedance_calibration import speedfactor, stochastic_optimization
from bikewaysim.map_matching import map_match
from bikewaysim.network import prepare_network, modeling_turns
from bikewaysim.routing import rustworkx_routing_funcs
from bikewaysim.map_matching import post_process

# Import network

In [None]:
#get cycleatlanta trip and user data
trips = pd.read_pickle(config['cycleatl_fp']/'trips_4.pkl')
trips = trips.loc[trips['tripid'].isin(list(match_dict.keys()))]#,'userid'].nunique()
users = pd.read_pickle(config['cycleatl_fp']/'users_4.pkl')
users = users[users['userid'].isin(set(trips['userid'].tolist()))]
print(trips.shape[0],'trips')
print(users.shape[0],'users')

597 trips
259 users


## Network Sync
Make sure all of the links used for map matching are present in the network.

In [8]:
map_matching_links = set()
for tripid, items in match_dict.items():
    map_matching_links.update(set([tuple(x) for x in items['edges'].values]))

In [None]:
exception = [(linkid,reverse_link) in map_matching_links for linkid, reverse_link in links[['linkid','reverse_link']].values]

## Set the base case
The base case is used as a reference point for impedance. For links, the base case is travel time while for turns it's zero impedance. 

In [None]:
# Pedestrian paths that are NOT multi-use trails and are flat
# links['gdot_base'] = (links[['multi use path report','bike lane report','lanes report','above_4 report']] == 0).all(axis=1).astype(int)

# Pedestrian paths + residential roads (1 lane per direction, no bicycle facility, < 4k aadt, < 4%, speed 40 or below)
links['new_base'] = (links[['2lpd','3+lpd','(30,inf) mph','[4k,10k) aadt','[10k,inf) aadt','[4,6) grade','[6,inf) grade','bike lane','cycletrack','multi use path']]==0).all(axis=1).astype(int)

In [25]:
# without_isolates = set([tuple([x,y]) for x,y in links[['linkid','reverse_link']].values])
# missing = [tuple([x,y]) not in without_isolates for x,y in before[['linkid','reverse_link']].values]
# before[np.array(exception) & np.array(missing)].explore()#.to_file(Path.home()/'Downloads/')
# ((links[['linkid','reverse_link']]==(35062.0,False)).all(axis=1)).sum()

# Perform shortest path routing and create the different networks for each year

In [26]:
# links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)
# base_impedance_col = "travel_time_min"
# stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
# links.set_index(['linkid','reverse_link'],inplace=True,drop=False)
# match_results = {}
# #shortest_results = {}
# failed_shortest_path = []
# for tripid, items in tqdm(match_dict.items()):

#     #get start and end linkid
#     start = tuple(match_dict[tripid]['edges'].iloc[0,:].values)
#     end = tuple(match_dict[tripid]['edges'].iloc[-1,:].values)

#     #get start and end node for shortest and impedance routing
#     start = links.loc[start,'A']
#     end = links.loc[end,'B']

#     match_results[tripid] = {
#     'origin_node': start,
#     'destination_node': end,
#     'trip_start_time': items['trace'].iloc[0,2].year,
#     'match_ratio': items['match_ratio'],
#     'matched_edges': match_dict[tripid]['edges'],
#     'shortest_edges': pd.DataFrame(stochastic_optimization.impedance_path(turns,turn_G,links,start,end)['edge_list'],columns=['linkid','reverse_link'])
#     }
# # trip_ods = pd.DataFrame.from_dict(match_results,orient='index')
# # trip_ods.reset_index(inplace=True)
# # trip_ods.rename(columns={'index':'tripid'},inplace=True)
# #export for impedance calibration
# with (config['calibration_fp']/'ready_for_calibration.pkl').open('wb') as fh:
#     pickle.dump(match_results,fh)
# # links.reset_index(inplace=True)

In [27]:
from importlib import reload
reload(rustworkx_routing_funcs)
links, turns, length_dict, geo_dict, turn_G = rustworkx_routing_funcs.import_calibration_network(config)
base_impedance_col = "travel_time_min"
rustworkx_routing_funcs.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
links.set_index(['linkid','reverse_link'],inplace=True,drop=False)
match_results = {}
#shortest_results = {}
failed_shortest_path = []

In [28]:
#rustworkx verison
reload(post_process)
starts, ends = post_process.get_ods_from_match_dict(match_dict,links)

added_nodes = rustworkx_routing_funcs.add_virtual_edges(starts,ends,links,turns,turn_G)

import numpy as np
time_periods = []
link_dates = sorted(links['year'].dropna().unique())[::-1]
for trip_date in [items['trace'].iloc[0,2].year for tripid, items in match_dict.items()]:
    cond = (trip_date < np.array(link_dates))
    if cond.all():
        print('trip is before any built infrastructure')
        continue
    idx = cond.argmin() # grabs the closest year in descending order
    time_periods.append(link_dates[idx])

shortest_tripid, shortest_length, shortest_edges = rustworkx_routing_funcs.rx_shortest_paths(list(zip(starts,ends)),turn_G)

In [29]:
for tripid, start, end, year, shortest_edge in zip(match_dict.keys(),starts,ends,time_periods,shortest_edges):
    match_results[tripid] = {
        'origin_node': start,
        'destination_node': end,
        'trip_start_time': year,
        'match_ratio': match_dict[tripid]['match_ratio'],
        'matched_edges': match_dict[tripid]['edges'],
        'shortest_edges': pd.DataFrame(shortest_edge,columns=['linkid','reverse_link'])
    }

In [None]:
with (config['calibration_fp']/'ready_for_calibration.pkl').open('wb') as fh:
    pickle.dump(match_results,fh)

In [31]:
# TODO add this to the export network section
# # add this for later
# link_types = dict(zip(links['linkid'],links['link_type']))
# turns['source_link_type'] = turns['source_linkid'].map(link_types)
# turns['target_link_type'] = turns['source_linkid'].map(link_types)

# # #unit conversions
# links['length_mi'] = (links['length_ft'] / 5280).round(2)
# links['ascent_ft'] = (links['ascent_m'] * 3.28084).round(0)
# #links.drop(columns=['length_ft','ascent_m'],inplace=True)

# #get node degree
# degree = links['A'].append(links['B']).value_counts()
# links['A_deg'] = links['A'].map(degree)
# links['B_deg'] = links['B'].map(degree)
# #remove excess dead end pedestrian links
# dead_ends = (links['link_type']=='pedestrian')&((links['A_deg']==1)|(links['B_deg']==1))
# links = links[dead_ends==False]
# #unique scenario but there's an expressway tag that needs to be removed
# import ast
# john_lewis_freedom_pkwy = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('expressway',0)=='yes')
# links = links[john_lewis_freedom_pkwy==False]
# surfaces = ['dirt','unpaved','gravel','fine_gravel','dirt/sand','ground']
# unpaved = links['all_tags'].apply(lambda x: ast.literal_eval(x).get('surface',0) in surfaces)
# #links[unpaved].explore(tooltip=False)
# links = links[unpaved==False]
# #unpaved.unique()