# Calibration QAQC
- Visualize calibration routes and compare to the chosen and shortest routes
- Trip-specific impedance routing to see if chosen route can be found
- Test different objective functions
- Try using coordinates in case the map matched trace is incorrect

In [None]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import src.stochastic_optimization as stochastic_optimization
import src.speedfactor as speedfactor
from tqdm import tqdm
import similaritymeasures
import random
import matplotlib.pyplot as plt
from shapely.ops import Point, MultiLineString
from importlib import reload
import datetime

np.set_printoptions(suppress=True)

from shapely.ops import LineString, MultiLineString

import sys
path = Path.cwd().parent
sys.path.insert(0,str(path))
import file_structure_setup
config = file_structure_setup.filepaths(path)

from network.src import modeling_turns
from scipy.spatial.distance import directed_hausdorff

In [None]:
#TODO add google bike layer and a sat layer
import xyzservices.providers as xyz
tile_info_dict = {
    "tiles": xyz.MapTiler.Streets.build_url(key=config['maptilerapikey']),
    "name": str.replace(xyz.MapTiler.Streets.name,'.',' '),
    "attr": xyz.MapTiler.Streets.html_attribution
}

# Import relevant files

In [None]:
links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)
with (config['calibration_fp']/'ready_for_calibration_stats.pkl').open('rb') as fh:
    full_set = pickle.load(fh)
full_ods = stochastic_optimization.match_results_to_ods(full_set)
# for viz parts
with (config['cycleatl_fp']/"rdp.pkl").open('rb') as fh:
    coords = pickle.load(fh)

In [None]:
post_calibration_fps = (config['calibration_fp']/'post_calibration').glob('*.pkl')

Choose a calibration method

In [None]:
modeled_list = []
for post_calibration_fp in post_calibration_fps:
    with post_calibration_fp.open('rb') as fh:
        modeled_dict = pickle.load(fh)
    modeled_list.append((post_calibration_fp.stem,modeled_dict))

In [None]:
best_performing = [x for x in post_calibration_fps if 'calibration2' in x.stem][0]
with best_performing.open('rb') as fh:
    modeled_dict = pickle.load(fh)

In [None]:
not_well_explained = [tripid for tripid, item in modeled.items() if item['modeled_buffer'] < 0.3]

Get user and trip variables

In [None]:
with (config['cycleatl_fp']/"trips_2.pkl").open('rb') as fh:
    trips = pickle.load(fh)
with (config['cycleatl_fp']/"users_2.pkl").open('rb') as fh:
    users = pickle.load(fh)
trips.set_index('tripid',inplace=True)
users.set_index('userid',inplace=True)
trips = trips.loc[list(full_set.keys())]
users = users.loc[users.index.isin(set(list(trips['userid'])))]

# First an overview of how to use the QAQC functions

## Visualize a random trip

In [None]:
reload(stochastic_optimization)
pool = not_well_explained#list(full_set.keys())
tripid = random.choice(pool)
print(tripid)
# tripid = 861

In [None]:
reload(stochastic_optimization)
print(trips.loc[tripid,['avg_speed_mph','trip_type','description']])
print(users.loc[trips.at[tripid,'userid']])
stochastic_optimization.visualize_three(tripid,full_set,modeled_list,geo_dict,coords,config['projected_crs_epsg'],tile_info_dict)

## Recalibrate one trip

In [None]:
# # determine the ouput name of the calibration outputs
# calibration_name = 'calibration2'

# determine variables, impedance type, and search range
betas_tup = (
    {'col':'2lpd','type':'link','range':[0,3]},
    {'col':'3+lpd','type':'link','range':[0,3]},
    {'col':'(30,40] mph','type':'link','range':[0,3]},
    {'col':'(40,inf) mph','type':'link','range':[0,3]},
    {'col':'[4k,10k) aadt','type':'link','range':[0,3]},
    {'col':'[10k,inf) aadt','type':'link','range':[0,3]},
    {'col':'[4,6) grade','type':'link','range':[0,3]},
    {'col':'[6,inf) grade','type':'link','range':[0,3]},
    {'col':'bike lane','type':'link','range':[-1,0]},
    {'col':'cycletrack','type':'link','range':[-1,0]},
    {'col':'multi use path','type':'link','range':[-1,0]},
    # {'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

# determine the objective function to use and other settings
objective_function = stochastic_optimization.buffer_overlap
batching = False
stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':100,'popsize':3}
}

# links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)

one_set = {key:item for key, item in full_set.items() if key == tripid}

args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(one_set), # list of OD network node pairs needed for shortest path routing
    one_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    objective_function, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict},#,'trace_dict':traces}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    True, #whether to store calibration results
    batching # whether to batch results to help speed up computation time, if yes input the number to batch with
)

start = time.time()
print([x['col'] for x in betas_tup]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(betas_tup),
             args=args,
             **stochastic_optimization_settings)
end = time.time()
print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
print(f"{args[10].__name__}: {x.fun}")
print(x)

calibration_result = {
    'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
    'settings': stochastic_optimization_settings, # contains the optimization settings
    'objective_function': args[10].__name__, # objective function used
    'results': x, # stochastic optimization outputs
    'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
    'past_vals': args[0], # all of the past values/guesses
    'runtime': pd.Timedelta(end-start),
    'time': datetime.datetime.now()
}
# #export but don't overwrite
# export_fp = config['calibration_fp'] / f'calibration_results/{calibration_name}.pkl'
# with stochastic_optimization.uniquify(export_fp).open('wb') as fh:
#         pickle.dump(calibration_result,fh)

In [None]:
base_impedance_col = "travel_time_min"
betas = [x['beta'] for x in calibration_result['betas_tup']]
print(betas)
one_od = stochastic_optimization.match_results_to_ods(one_set)
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
stochastic_optimization.impedance_update(betas,calibration_result['betas_tup'],
                        stochastic_optimization.link_impedance_function,
                        base_impedance_col,
                        stochastic_optimization.turn_impedance_function,
                        links,turns,turn_G)
modeled_results_sp = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,links,start_node,end_node) for start_node, end_node in tqdm(one_od,total=len(one_od))}

In [None]:
modeled_results_dict = {}
for tripid, item in one_set.items():
    chosen = item['matched_edges'].values
    shortest = item['shortest_edges'].values
    od = (item['origin_node'],item['destination_node'])
    modeled = modeled_results_sp[od]['edge_list']

    modeled_results_dict[tripid] = {
        'modeled_edges': pd.DataFrame(modeled,columns=['linkid','reverse_link']),
        'modeled_length': round(np.array([length_dict.get(tripid[0],0) for tripid in modeled]).sum()/5280,1),
        'modeled_detour': round(stochastic_optimization.detour_factor(modeled,shortest,length_dict),2),
        'modeled_jaccard': round(stochastic_optimization.jaccard_index_func(chosen,modeled,length_dict),2),
        'modeled_buffer': round(stochastic_optimization.buffer_overlap(chosen,modeled,geo_dict),2)
    }

In [None]:
test = [('original',modeled_dict),('recalibrated',modeled_results_dict)]

In [None]:
reload(stochastic_optimization)
stochastic_optimization.visualize_three(tripid,full_set,test,geo_dict,coords,config['projected_crs_epsg'],tile_info_dict)

In [None]:
modeled_dict[71]

In [None]:
modeled_results_dict

In [None]:
# with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
#     calibration_results = pickle.load(fh)
# calibration_result = caalibration_results[-1]
# calibration_result

In [None]:
one_trip = {tripid:modeled_results[tripid]}
args = (
    [], # empty list for storing past calibration results
    calibration_result['betas_tup'], # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(one_trip), # list of OD network node pairs needed for shortest path routing
    one_trip, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.jaccard_index_func, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    False #whether to store calibration results
)

In [None]:
from importlib import reload
reload(stochastic_optimization)

start = time.time()
print([x['col'] for x in calibration_result['betas_tup']]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(calibration_result['betas_tup']),
             args=args,
             method='pso',
             options={'maxiter':20,"popsize":10})
end = time.time()
print(f'Took {(end-start)/60/60:.2f} hours')

In [None]:
x

## Reroute one trip after recalibration

In [None]:
# betas = [-0.184,-0.398,0.126,0.325,0.324]
base_impedance_col = "travel_time_min"
betas = x.x
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
stochastic_optimization.impedance_update(betas,calibration_result['betas_tup'],
                          stochastic_optimization.link_impedance_function,
                          base_impedance_col,
                          stochastic_optimization.turn_impedance_function,
                          links,turns,turn_G)
one_trip_od = stochastic_optimization.match_results_to_ods(one_trip)[0]
new_result = stochastic_optimization.impedance_path(turns,turn_G,links,*one_trip_od) #for start_node, end_node in one_trip_od}

In [None]:
modeled_results[tripid]['Recalibrated'] = pd.DataFrame(new_result['edge_list'])

# Visualize new modeled one

In [None]:
modeled_results[tripid].keys()

In [None]:
reload(stochastic_optimization)
line_dict = stochastic_optimization.construct_line_dict(['matched_edges','shortest_edges','modeled_edges','Recalibrated'],modeled_results[tripid],geo_dict)
line_dict = stochastic_optimization.add_metrics_to_tooltip(line_dict,length_dict,geo_dict)
stochastic_optimization.visualize_three(tripid,line_dict,modeled_results[tripid]['coords'],links.crs,tile_info_dict)

## Visualize the chosen, shortest, and modeled route

In [None]:
reload(stochastic_optimization)
tripid = 243
print(trips.loc[tripid,['trip_type','description']])
mymap = stochastic_optimization.basic_three_viz(tripid,modeled_results,links,tile_info_dict)
mymap.save(Path.home()/'Downloads/troubleshooting.html')

## Need to re-route or re-calibrate trips?

## If you have more than one modeled route or other routes, then use this sequence of functions

In [None]:
line_dict = stochastic_optimization.construct_line_dict(['matched_edges','shortest_edges','modeled_edges'],modeled_results[tripid],geo_dict)
line_dict = stochastic_optimization.add_metrics_to_tooltip(line_dict,links)
stochastic_optimization.visualize_three(tripid,line_dict,links,tile_info_dict)

In [None]:
def export_to_qgis(tripid,results_dict,geo_dict,config):
    '''
    Creates GeoJSON for viewing calibration results in QGIS. Set
    colors to color column.
    '''
    
    fields = ['matched_edges','shortest_edges','modeled_edges']
    geos = [LineString(stochastic_optimization.get_route_line(results_dict[tripid][field].values,geo_dict)) for field in fields]

    gdf = gpd.GeoDataFrame(
        data={
            'type': ['chosen','shortest','modeled'],
            'color': ['#fc8d62','#66c2a5','#8da0cb'],
            'geometry': geos
        },
        crs=config['projected_crs_epsg']
    )
    gdf.to_file(config['calibration_fp']/'calibration_qaqc.gpkg',layer='calibration_results')
    return gdf

gdf = export_to_qgis(tripid,modeled_results,geo_dict,config)
gdf.total_bounds

In [None]:
links.columns

In [None]:
from shapely.geometry import box
bbox = box(*gdf.total_bounds)
bbox = bbox.buffer(5280*2)
extract = links[links.geometry.intersects(bbox)].copy()
extract['forward_cost'] = extract['link_cost']
extract['backward_cost'] = extract['link_cost']
extract = extract[extract['reverse_link']==False]
extract = extract[['A','B','linkid','osmid','forward_cost','backward_cost','geometry']]
extract.to_crs('epsg:4326',inplace=True)
extract.to_file(Path.home()/'Downloads/test.geojson')

In [None]:
extract.plot()

## Idenfity trips that pass through a circle
One area with trouble is the Stone Mountain Trail because the map matched route often uses Dekalb instead. This leads to some trips having a lower than expected overlap.

In [None]:
reload(stochastic_optimization)

In [None]:
freedom_pkwy = (2237899.09,1372338.05)
freedom_pkwy = stochastic_optimization.retrieve_geos(*freedom_pkwy,modeled_results,links)
print(len(freedom_pkwy))

In [None]:
smt = (2250499.40,1369121.80)
smt = stochastic_optimization.retrieve_geos(*smt,modeled_results,links)
print(len(smt))
tenth_st = (2233722.10,1375729.08)
tenth_st = stochastic_optimization.retrieve_geos(*tenth_st,modeled_results,links)
print(len(tenth_st))
wylie_st = (2237751.33,1365098.89)
wylie_st = stochastic_optimization.retrieve_geos(*wylie_st,modeled_results,links)
print(len(wylie_st))

In [None]:
tripid = random.choice(freedom_pkwy)
reload(stochastic_optimization)
stochastic_optimization.basic_three_viz(tripid,modeled_results,links.crs,length_dict,geo_dict,tile_info_dict)

## Can calibrating by itself improve it?


In [None]:
#condense it

betas_links = {
    0 : 'multi use path',
    1 : 'bike lane',
    2 : 'lanes',
    3 : 'above_4'
} 

betas_turns = {
    4 : 'unsig_major_road_crossing'
}

with (config['calibration_fp']/'full_set.pkl').open('rb') as fh:
    train_set = pickle.load(fh)
# train_set = train_set
train_set = {tripid:train_set.get(tripid)}

In [None]:

train_ods = stochastic_optimization.match_results_to_ods(train_set)

base_impedance_col = "travel_time_min"
loss_function = stochastic_optimization.jaccard_index
#loss_function_kwargs = {'length_dict':length_dict}#,'overlap_threshold':0.80}

# loss_function = stochastic_optimization.buffer_overlap
# loss_function_kwargs = {'geo_dict':geo_dict,'buffer_ft':100,'standardize':True}

# link coefficients control the % increase in link travel time (units don't matter)
# turn coefficients control the amount of seconds added from the turn (units matter)
link_bounds = [[-1,0],[-1,0],[0,4],[0,4]]
#[[-1, 2] for _ in range(0, len(betas_links))]
turn_bounds = [[0, 4] for _ in range(0, len(betas_turns))]
if (len(betas_links) > 0) & (len(betas_turns) > 0):
    bounds = np.vstack([link_bounds,turn_bounds])
elif (len(betas_links) > 0):
    bounds = link_bounds
elif (len(betas_turns) > 0):
    bounds = turn_bounds

past_betas = []
past_vals = []
args = (
    past_betas,
    past_vals,
    betas_links,betas_turns,
    train_ods,train_set,
    stochastic_optimization.link_impedance_function,
    base_impedance_col,
    stochastic_optimization.turn_impedance_function,
    links,turns,turn_G,
    loss_function,
    #loss_function_kwargs,
    True #whether to print the results of each iteration
)

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print(list(betas_links.values())+list(betas_turns.values())+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration, bounds, args=args, method='pso', options={'maxiter':50,"popsize":5})
end = time.time()
print(f'Took {(end-start)/60/60:.2f} hours')

Retrieve the new result

In [None]:
betas = x.x

In [None]:
#link_impedance_col = "adj_travel_time_min"
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

#update impedances
#betas = #past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          stochastic_optimization.link_impedance_function,
                          base_impedance_col,
                          stochastic_optimization.turn_impedance_function,
                          links,turns,turn_G)


In [None]:

#find shortest path
full_results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,links,start_node,end_node) for start_node, end_node in train_ods}

#calulate objective function
loss_full = loss_function(train_set,full_results_dict,**loss_function_kwargs)
loss_full[:,1].mean()

In [None]:
links.set_index(['linkid','reverse_link'],inplace=True)

In [None]:
modeled2 = full_results_dict[(8180621166, 68253267)]['edge_list']
modeled2 = stochastic_optimization.get_route_line(modeled2,links)
#modeled2 = LineString(modeled2)

In [None]:
# from importlib import reload
# reload(stochastic_optimization)
# mymap = stochastic_optimization.visualize_three(tripid,LineString(chosen),LineString(modeled2),LineString(modeled),links,tile_info_dict,shortest_overlap,impedance_overlap)
# #mymap.save(Path.home()/'Downloads/troubleshooting.html')
# print(trips.loc[tripid,['start_time','trip_type','description']])
# mymap

I see improvement! But how does it track with othere success measures

In [None]:
old_frechet = modeled_dist
new_frechet = similaritymeasures.frechet_dist(chosen,modeled2)
print(old_frechet,new_frechet)

Huge reduction! Now what if we used frechet distance in the calibration for the objective function?

In [None]:
reload(stochastic_optimization)

In [None]:
links.reset_index(inplace=True)


In [None]:
links.set_index(['linkid','reverse_link'],inplace=True,drop=False)


In [None]:

train_ods = stochastic_optimization.match_results_to_ods(train_set)

base_impedance_col = "travel_time_min"
loss_function = stochastic_optimization.frechet_distance
loss_function_kwargs = {'length_dict':length_dict,'links':links}#,'overlap_threshold':0.80}

# loss_function = stochastic_optimization.buffer_overlap
# loss_function_kwargs = {'geo_dict':geo_dict,'buffer_ft':100,'standardize':True}

# link coefficients control the % increase in link travel time (units don't matter)
# turn coefficients control the amount of seconds added from the turn (units matter)
link_bounds = [[-1,0],[-1,0],[0,4],[0,4]]
#[[-1, 2] for _ in range(0, len(betas_links))]
turn_bounds = [[0, 4] for _ in range(0, len(betas_turns))]
if (len(betas_links) > 0) & (len(betas_turns) > 0):
    bounds = np.vstack([link_bounds,turn_bounds])
elif (len(betas_links) > 0):
    bounds = link_bounds
elif (len(betas_turns) > 0):
    bounds = turn_bounds

past_betas = []
past_vals = []
args = (
    past_betas,
    past_vals,
    betas_links,betas_turns,
    train_ods,train_set,
    stochastic_optimization.link_impedance_function,
    base_impedance_col,
    stochastic_optimization.turn_impedance_function,
    links,turns,turn_G,
    loss_function,
    loss_function_kwargs,
    True #whether to print the results of each iteration
)

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print(list(betas_links.values())+list(betas_turns.values())+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration, bounds, args=args, method='pso', options={'maxiter':50,"popsize":5})
end = time.time()
print(f'Took {(end-start)/60/60:.2f} hours')

Did using a different overlap function fix the issue?

In [None]:
reload(stochastic_optimization)
single_trip = stochastic_optimization.post_calibration_routing(links,turns,turn_G,"travel_time_min",
                                                 betas,betas_links,betas_turns,train_ods)

In [None]:
#link_impedance_col = "adj_travel_time_min"
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

#update impedances
#betas = #past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          stochastic_optimization.link_impedance_function,
                          base_impedance_col,
                          stochastic_optimization.turn_impedance_function,
                          links,turns,turn_G)

#find shortest path
full_results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,links,start_node,end_node) for start_node, end_node in train_ods}

#calulate objective function
loss_full = loss_function(train_set,full_results_dict,**loss_function_kwargs)
loss_full[:,1].mean()

In [None]:
modeled3 = full_results_dict[(8180621166, 68253267)]['edge_list']
modeled3 = stochastic_optimization.get_route_line(modeled3,links)
#modeled2 = LineString(modeled2)

## need something about the tootlip just so we can see what the different measurements are
- jaccard (linkids + reverse)
- frechet (coords)
- maybe more

In [None]:
reload(stochastic_optimization)
stochastic_optimization.basic_three_viz(243,modeled_results,links,tile_info_dict)

In [None]:
modeled_results[122]['matched_edges']

In [None]:
line_dict = {
    'Chosen': {'coords':chosen},
    'Shortest': {'coords':shortest,'tooltip':0.5},
    'Modeled (all trips)': {'coords':modeled,'tooltip':0.5},
    'Modeled (trip only)': {'coords':modeled2,'tooltip':0.5},
    'Modeled (new objective function)': {'coords':modeled3,'tooltip':0.5},
}

In [None]:
modeled_results[122]

In [None]:
from importlib import reload
reload(stochastic_optimization)
mymap = stochastic_optimization.visualize_three(tripid,line_dict,links,tile_info_dict)
#mymap.save(Path.home()/'Downloads/troubleshooting.html')
print(trips.loc[tripid,['start_time','trip_type','description']])
mymap

I think no but lets package these functions to make it easier to test

# Inspect overlap values for trip to trip impedance
(coefficients are incorrect right now)

In [None]:
with (config['calibration_fp']/"trip_specific.pkl").open('rb') as fh:
    trip_by_trip = pickle.load(fh)

new_col = {tripid:item['loss'].min() * -1 for tripid, item in trip_by_trip.items()}
new_col = pd.Series(new_col).reset_index()
new_col.columns = ['tripid','new_impedance']
test = pd.merge(full,new_col,on='tripid')
print(test['new_impedance'].mean())
(test['new_impedance'] - test['impedance']).describe()

In [None]:
#plot the overlap values

In [None]:
test[test['new_impedance']<test['impedance']]

In [None]:
from importlib import reload
reload(stochastic_optimization)
tripid = 837
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
#mymap.save(Path.home()/'Downloads/troubleshooting.html')
print(trips.loc[tripid,['start_time','trip_type','description']])
mymap

In [None]:
links2 = links.reset_index().copy()
# links2 = gpd.read_file(config['calibration_fp']/"calibration_network.gpkg",layer='links')
# links2.set_index(['linkid','reverse_link'],inplace=True)

In [None]:
differences = full['impedance'] - full['shortest']
differences.describe()

In [None]:
better = full.loc[full['shortest']<full['impedance'],'tripid'].tolist()
worse = full.loc[full['shortest']>full['impedance'],'tripid'].tolist()
print(len(better))
print(len(worse))

In [None]:
tripid

In [None]:
links.set_index(['linkid','reverse_link'],inplace=True)

# Examine Social Trips

In [None]:
from importlib import reload
reload(stochastic_optimization)
social = trips[trips['trip_type']=='Social']
tripid = random.choice(social.index.tolist())
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
# mymap.save(Path.home()/'Downloads/shortest_poor.html')
print(trips.loc[tripid,['start_time','trip_type','description']])
mymap

In [None]:
trips.loc[trips['description'].str.contains('ride') & trips['description'].notna(),'description'].head(50)#.index.tolist()

In [None]:


from importlib import reload
reload(stochastic_optimization)
social = trips[trips['description'].str.contains('ride') & trips['description'].notna()].index.tolist()
tripid = random.choice(social)
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
# mymap.save(Path.home()/'Downloads/shortest_poor.html')
print(trips.loc[tripid,['start_time','trip_type','description']])
mymap


Export the three lines into one gpkg file with pre-defined colors, so that we can update it on the fly in QGIS?

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as colors

# Extract colors from a ColorBrewer scheme (e.g., 'Set2')
# # Convert them to HEX format if needed
colorbrewer_hex = [colors.to_hex(c) for c in plt.get_cmap('Set2').colors]

Condense the network so that it is convenient to examine in QGIS

In [None]:
links.set_index(['linkid','reverse_link'],inplace=True)


In [None]:

nonrev_cols = ['name','osmid','highway','year','geometry']
rev_cols = ['multi use path','bike lane','lanes','above_4']

idx = pd.IndexSlice
nonrev_links = links.loc[idx[:,False],idx[nonrev_cols+rev_cols]]
nonrev_links.reset_index(inplace=True)
nonrev_links.set_index('linkid',inplace=True)
nonrev_links.drop(columns=['reverse_link'],inplace=True)

rev_links = links.loc[idx[:,True],idx[rev_cols]]
rev_links.reset_index(inplace=True)
rev_links.set_index('linkid',inplace=True)
rev_links.drop(columns=['reverse_link'],inplace=True)
rev_links.columns = 'rev_' + rev_links.columns

#combine
condensed_network = pd.concat([nonrev_links,rev_links],ignore_index=False,axis=1)
condensed_network = gpd.GeoDataFrame(condensed_network,crs=config['projected_crs_epsg'])

import ast
og_links = gpd.read_file(config['osmdwnld_fp']/'osm_2023.gpkg',layer='raw',ignore_geometry=True)
og_links = dict(zip(og_links['osmid'],og_links['all_tags']))
for key, item in og_links.items():
    item = ast.literal_eval(item)
    item.pop('@way_nodes')
    item = str(item)
    og_links[key] = item
condensed_network['all_tags'] = condensed_network['osmid'].map(og_links)
condensed_network.to_file(config['calibration_fp']/'calibration_qaqc.gpkg',layer='network')

or we can try viewing it in leaflet instead? might be slow because of all the links?

In [None]:
#condense it

betas_links = {
    0 : 'multi use path',
    1 : 'bike lane',
    2 : 'lanes',
    3 : 'above_4'
} 

betas_turns = {
    4 : 'unsig_major_road_crossing'
}

with (config['calibration_fp']/'full_set.pkl').open('rb') as fh:
    train_set = pickle.load(fh)
# train_set = train_set
train_set = {tripid:train_set.get(tripid)}

In [None]:
links.reset_index(inplace=True)

train_ods = stochastic_optimization.match_results_to_ods(train_set)

base_impedance_col = "travel_time_min"
loss_function = stochastic_optimization.jaccard_index
loss_function_kwargs = {'length_dict':length_dict}#,'overlap_threshold':0.80}

# loss_function = stochastic_optimization.buffer_overlap
# loss_function_kwargs = {'geo_dict':geo_dict,'buffer_ft':100,'standardize':True}

# link coefficients control the % increase in link travel time (units don't matter)
# turn coefficients control the amount of seconds added from the turn (units matter)
link_bounds = [[-1,0],[-1,0],[0,4],[0,4]]
#[[-1, 2] for _ in range(0, len(betas_links))]
turn_bounds = [[0, 4] for _ in range(0, len(betas_turns))]
if (len(betas_links) > 0) & (len(betas_turns) > 0):
    bounds = np.vstack([link_bounds,turn_bounds])
elif (len(betas_links) > 0):
    bounds = link_bounds
elif (len(betas_turns) > 0):
    bounds = turn_bounds

past_betas = []
past_vals = []
args = (
    past_betas,
    past_vals,
    betas_links,betas_turns,
    train_ods,train_set,
    stochastic_optimization.link_impedance_function,
    base_impedance_col,
    stochastic_optimization.turn_impedance_function,
    links,turns,turn_G,
    loss_function,
    loss_function_kwargs,
    True #whether to print the results of each iteration
)

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print(list(betas_links.values())+list(betas_turns.values())+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration, bounds, args=args, method='pso', options={'maxiter':50,"popsize":5})
end = time.time()
print(f'Took {(end-start)/60/60:.2f} hours')

In [None]:
betas = x.x

In [None]:
#link_impedance_col = "adj_travel_time_min"
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

#update impedances
#betas = #past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          stochastic_optimization.link_impedance_function,
                          base_impedance_col,
                          stochastic_optimization.turn_impedance_function,
                          links,turns,turn_G)

#find shortest path
full_results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,links,start_node,end_node) for start_node, end_node in train_ods}

#calulate objective function
loss_full = loss_function(train_set,full_results_dict,**loss_function_kwargs)
loss_full[:,1].mean()

In [None]:
#add a new modeled edges field so that we can calculate the modeled edges entry
for tripid, item in full_set.items():
    od = (item['origin_node'],item['destination_node'])
    modeled_edges = full_results_dict.get(od,0).get('edge_list',0)
    if isinstance(modeled_edges,int):
        print(modeled_edges)
    #turn to dataframe
    modeled_edges = pd.DataFrame(modeled_edges,columns=['linkid','reverse_link'])
    full_set[tripid].update({'modeled_edges':modeled_edges})

In [None]:
# subset_links = links[links.intersects(box(*pd.concat([chosen_line,shortest_line,modeled_line]).total_bounds))].copy()
# links.loc[idx[:,False],idx[rev_cols]]

In [None]:
shortest_path_poor = full.loc[full['impedance']<0.1,'tripid'].tolist()
tripid = random.choice(shortest_path_poor)
from importlib import reload
reload(stochastic_optimization)
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
# mymap.save(Path.home()/'Downloads/shortest_poor.html')
print(trips.loc[tripid,['start_time','trip_type','description']])
mymap

In [None]:
#where shortest path does poorly

from importlib import reload
reload(stochastic_optimization)
shortest_path_poor = full.loc[full['shortest']<0.3,'tripid'].tolist()
tripid = random.choice(shortest_path_poor)
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
mymap.save(Path.home()/'Downloads/shortest_poor.html')

In [None]:
from importlib import reload
reload(stochastic_optimization)
tripid = random.choice(better)
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
mymap.save(Path.home()/'Downloads/optim_results.html')

In [None]:
from importlib import reload
reload(stochastic_optimization)
tripid = random.choice(worse)
shortest_overlap = full.loc[full['tripid']==tripid,'shortest'].item()
impedance_overlap = full.loc[full['tripid']==tripid,'impedance'].item()
mymap = stochastic_optimization.visualize_three_no_legend(tripid,modeled_results,links,tile_info_dict,shortest_overlap,impedance_overlap)
mymap.save(Path.home()/'Downloads/optim_results2.html')

In [None]:
full[full['tripid']==tripid]

In [None]:
good_overlaps = [30845]
needs_work = [13190]

In [None]:
# both_ods = list(set.union(set(train_ods),set(test_ods)))
# html = ""
# nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
# nodes.to_crs('epsg:4236',inplace=True)
# nodes['lon'] = nodes.geometry.x
# nodes['lat'] = nodes.geometry.y
# latlon = tuple(zip(nodes['lon'],nodes['lat']))
# nodes = dict(zip(nodes['N'],latlon))
# nodes.get(68196100,0)
# htmls = []
# for od in both_ods:
#     start = od[0]
#     end = od[1]
#     start_lonlat = nodes.get(start,0)
#     end_lonlat = nodes.get(end,0)
#     html = f"https://brouter.damsy.net/latest/#map=12/33.7522/-84.3892/standard&lonlats={start_lonlat[1]},{start_lonlat[0]};{end_lonlat[1]},{end_lonlat[0]}&profile=safety"
#     htmls.append(html)
# with (config['calibration_fp']/"brouter_links.txt").open('w') as fh:
#     for html in htmls:
#         fh.write(f"{html}\n")
# with (config['calibration_fp']/"brouter_ods.txt").open('w') as fh:
#     for od in both_ods:
#         fh.write(f"{od}")