# Feature Development
See if raw GPS can be used instead of map matched traces

# Impedance Calibration

**Overview:**
1. Network Preparation
1. Import Train and Test Sets
2. Specify Calibration Parameters
    - Link Impedance Function
    - Turn Impedance Function
    - Objective/Loss Function
        - First Preference Recovery
        - Exact Overlap
        - Buffer Overlap (in development)
        - Frechet Distance/Area (in development)
3. Run Calibration
    - Particle Swarm Optimization (constrained & non-probabilistic)
4. Export and Run Post Calibration


In [None]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import stochastic_optimization
from tqdm import tqdm
import similaritymeasures
import random
import datetime

from shapely.ops import LineString, MultiLineString

import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

from network.src import modeling_turns
import speedfactor

# Network Import

In [None]:
links, turns, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)

# Import Training Set

In [None]:
with (config['calibration_fp']/'ready_for_calibration.pkl').open('rb') as fh:
    full_set = pickle.load(fh)
# subsetting for testing purposes
# full_set = {key:item for key, item in full_set.items() if key in list(full_set.keys())[0:20]}

# with (config['cycleatl_fp']/'simplified_coords.pkl').open('rb') as fh:
#     traces = pickle.load(fh)

In [None]:
#just try this on 10 right now

# Specify Link Impedance Functions
Must specifiy the column name, the type (link or turn), and the search range

In [None]:
betas_tup = (
    {'col':'2lpd','type':'link','range':[0,2]},
    {'col':'3+lpd','type':'link','range':[0,2]},
    {'col':'(30,40] mph','type':'link','range':[0,2]},
    {'col':'(40,inf) mph','type':'link','range':[0,2]},
    {'col':'[4,6) grade','type':'link','range':[0,2]},
    {'col':'[6,inf) grade','type':'link','range':[0,2]},
    {'col':'bike lane','type':'link','range':[-1,0]},
    {'col':'cycletrack','type':'link','range':[-1,0]},
    {'col':'multi use path','type':'link','range':[-1,0]},
    # {'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

# Calibration Settings

In [None]:
args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
    full_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.buffer_overlap, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict},#,'trace_dict':traces}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    True #whether to store calibration results
)

In [None]:
stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':100,'popsize':3}
}

# Impedance Calibration

In [None]:
from importlib import reload
reload(stochastic_optimization)
reload(modeling_turns)

start = time.time()
print([x['col'] for x in betas_tup]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(betas_tup),
             args=args,
             **stochastic_optimization_settings)
end = time.time()
print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
print(f"{args[10].__name__}: {x.fun}")

# Record results and store for reference

In [None]:
calibration_result = {
    'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
    'settings': stochastic_optimization_settings, # contains the optimization settings
    'objective_function': args[10].__name__, # objective function used
    'results': x, # stochastic optimization outputs
    'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
    'past_vals': args[0], # all of the past values/guesses
    'runtime': pd.Timedelta(end-start),
    'time': datetime.datetime.now()
}

In [None]:
with (config['calibration_fp']/"calibration_results/standard.pkl").open('wb') as fh:
        pickle.dump(calibration_result,fh)

In [None]:
if (config['calibration_fp']/"calibration_results.pkl").exists():
    with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
        calibration_results = pickle.load(fh)
else:
    calibration_results = []
calibration_results.append(calibration_result)
with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
        pickle.dump(calibration_results,fh)

# Create smaller subset of trips for testing purposes

In [None]:
len(full_set)

In [None]:
user = pd.read_pickle(config['cycleatl_fp']/'users_4.pkl')
trip = pd.read_pickle(config['cycleatl_fp']/'trips_4.pkl')

results_df = pd.read_csv(config['calibration_fp']/'objective_functions.csv')

In [None]:
results_df[results_df['index']==194]

In [None]:
results_df.sort_values('chosen_detour',ascending=False).head(50)['index'].tolist()

In [None]:
subset = trip[trip['userid'].isin(set(full_set.keys()))]

Just retain one from each user

In [None]:
full_set[71].keys()

In [None]:
set([(3,4),(3,4)])

In [None]:
subset = trip[trip['userid'].isin(set(full_set.keys()))]
groups = subset.groupby('userid')['trip_type'].unique()
groups



# Additional Calibration Runs Template

## Using speed category instead of lanes
Result is similar to using lanes

In [None]:
betas_tup = (
    {'col':'multi use path','type':'link','range':[-1,1]},
    {'col':'bike lane','type':'link','range':[-1,1]},
    {'col':'speed','type':'link','range':[0,2]},
    {'col':'above_4','type':'link','range':[0,2]},
    {'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
    full_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.buffer_overlap, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    True #whether to store calibration results
)

stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':50,'popsize':10}
}

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print([x['col'] for x in betas_tup]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(betas_tup),
             args=args,
             **stochastic_optimization_settings)
end = time.time()
print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
print(f"{args[10].__name__}: {x.fun}")

calibration_result = {
    'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
    'settings': stochastic_optimization_settings, # contains the optimization settings
    'objective_function': args[10].__name__, # objective function used
    'results': x, # stochastic optimization outputs
    'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
    'past_vals': args[0], # all of the past values/guesses
    'runtime': pd.Timedelta(end-start),
    'time': datetime.datetime.now()
}

with (config['calibration_fp']/"calibration_results/speed.pkl").open('wb') as fh:
        pickle.dump(calibration_result,fh)

if (config['calibration_fp']/"calibration_results.pkl").exists():
    with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
        calibration_results = pickle.load(fh)
else:
    calibration_results = []
calibration_results.append(calibration_result)
with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
        pickle.dump(calibration_results,fh)

## Try a few combinations of lanes, speed, AADT


lanes

## Run it 10 times to see how the coefficients change

In [None]:
betas_tup = (
    {'col':'multi use path','type':'link','range':[-1,1]},
    {'col':'bike lane','type':'link','range':[-1,1]},
    {'col':'lanes','type':'link','range':[0,2]},
    {'col':'above_4','type':'link','range':[0,2]},
    {'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
    full_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.buffer_overlap, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
    False, #whether to print the results of each iteration
    True #whether to store calibration results
)

stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':50,'popsize':10}
}

from importlib import reload
reload(stochastic_optimization)

run_10_times = []

for x in range(0,10):

    start = time.time()
    print([x['col'] for x in betas_tup]+['objective_function'])
    x = minimize(stochastic_optimization.impedance_calibration,
                stochastic_optimization.extract_bounds(betas_tup),
                args=args,
                **stochastic_optimization_settings)
    end = time.time()
    print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
    print(f"{args[10].__name__}: {x.fun}")

    calibration_result = {
        'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
        'settings': stochastic_optimization_settings, # contains the optimization settings
        'objective_function': args[10].__name__, # objective function used
        'results': x, # stochastic optimization outputs
        'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
        'past_vals': args[0], # all of the past values/guesses
        'runtime': pd.Timedelta(end-start),
        'time': datetime.datetime.now()
    }
    run_10_times.append(calibration_result)

    if (config['calibration_fp']/"calibration_results.pkl").exists():
        with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
            calibration_results = pickle.load(fh)
    else:
        calibration_results = []
    calibration_results.append(calibration_result)
    with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
            pickle.dump(calibration_results,fh)

In [None]:
len(run_10_times)

In [None]:
run_10_times

In [None]:
with (config['calibration_fp']/"calibration_results/run_10_times.pkl").open('wb') as fh:
        pickle.dump(run_10_times,fh)

## Remove the grade and turn variables to see if a simpler model is better
Slightly worse but not that much worse

In [None]:
betas_tup = (
    {'col':'multi use path','type':'link','range':[-1,1]},
    {'col':'bike lane','type':'link','range':[-1,1]},
    {'col':'lanes','type':'link','range':[0,2]},
    #{'col':'above_4','type':'link','range':[0,2]},
    #{'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
    full_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.buffer_overlap, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    True #whether to store calibration results
)

stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':50,'popsize':10}
}

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print([x['col'] for x in betas_tup]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(betas_tup),
             args=args,
             **stochastic_optimization_settings)
end = time.time()
print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
print(f"{args[10].__name__}: {x.fun}")

calibration_result = {
    'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
    'settings': stochastic_optimization_settings, # contains the optimization settings
    'objective_function': args[10].__name__, # objective function used
    'results': x, # stochastic optimization outputs
    'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
    'past_vals': args[0], # all of the past values/guesses
    'runtime': pd.Timedelta(end-start),
    'time': datetime.datetime.now()
}

with (config['calibration_fp']/"calibration_results/fewer_variables.pkl").open('wb') as fh:
        pickle.dump(calibration_result,fh)

if (config['calibration_fp']/"calibration_results.pkl").exists():
    with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
        calibration_results = pickle.load(fh)
else:
    calibration_results = []
calibration_results.append(calibration_result)
with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
        pickle.dump(calibration_results,fh)

## Test the impact of changing the objective/loss function

In [None]:
loss_functions = [stochastic_optimization.buffer_overlap,stochastic_optimization.jaccard_index_func,stochastic_optimization.frechet_distance]
loss_functions_results = []

for loss_function in loss_functions:

    betas_tup = (
        {'col':'multi use path','type':'link','range':[-1,1]},
        {'col':'bike lane','type':'link','range':[-1,1]},
        {'col':'speed','type':'link','range':[0,2]},
        {'col':'above_4','type':'link','range':[0,2]},
        {'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
    )

    args = (
        [], # empty list for storing past calibration results
        betas_tup, # tuple containing the impedance spec
        stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
        full_set, # dict containing the origin/dest node and map matched edges
        stochastic_optimization.link_impedance_function, # link impedance function to use
        "travel_time_min", # column with the base the base impedance in travel time or distance
        stochastic_optimization.turn_impedance_function, # turn impedance function to use
        links,turns,turn_G, # network parts
        loss_function, # loss function to use
        {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
        False, #whether to print the results of each iteration
        True #whether to store calibration results
    )

    stochastic_optimization_settings = {
        'method':'pso',
        'options': {'maxiter':50,'popsize':10}
    }

    from importlib import reload
    reload(stochastic_optimization)

    start = time.time()
    print([x['col'] for x in betas_tup]+['objective_function'])
    x = minimize(stochastic_optimization.impedance_calibration,
                stochastic_optimization.extract_bounds(betas_tup),
                args=args,
                **stochastic_optimization_settings)
    end = time.time()
    print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
    print(f"{args[10].__name__}: {x.fun}")

    calibration_result = {
        'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
        'settings': stochastic_optimization_settings, # contains the optimization settings
        'objective_function': args[10].__name__, # objective function used
        'results': x, # stochastic optimization outputs
        'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
        'past_vals': args[0], # all of the past values/guesses
        'runtime': pd.Timedelta(end-start),
        'time': datetime.datetime.now()
    }

    if (config['calibration_fp']/"calibration_results.pkl").exists():
        with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
            calibration_results = pickle.load(fh)
    else:
        calibration_results = []
    calibration_results.append(calibration_result)
    with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
            pickle.dump(calibration_results,fh)

    loss_functions_results.append(calibration_result)

In [None]:
with (config['calibration_fp']/"calibration_results/different_loss_functions.pkl").open('wb') as fh:
        pickle.dump(loss_functions_results,fh)

## Try two grade categories

In [None]:
betas_tup = (
    {'col':'multi use path','type':'link','range':[-1,1]},
    {'col':'bike lane','type':'link','range':[-1,1]},
    {'col':'lanes','type':'link','range':[0,2]},
    #{'col':'above_4','type':'link','range':[0,2]},
    #{'col':'unsig_major_road_crossing','type':'turn','range':[0,2]}
)

args = (
    [], # empty list for storing past calibration results
    betas_tup, # tuple containing the impedance spec
    stochastic_optimization.match_results_to_ods(full_set), # list of OD network node pairs needed for shortest path routing
    full_set, # dict containing the origin/dest node and map matched edges
    stochastic_optimization.link_impedance_function, # link impedance function to use
    "travel_time_min", # column with the base the base impedance in travel time or distance
    stochastic_optimization.turn_impedance_function, # turn impedance function to use
    links,turns,turn_G, # network parts
    stochastic_optimization.buffer_overlap, # loss function to use
    {'length_dict':length_dict,'geo_dict':geo_dict}, # keyword arguments for loss function
    True, #whether to print the results of each iteration
    True #whether to store calibration results
)

stochastic_optimization_settings = {
    'method':'pso',
    'options': {'maxiter':50,'popsize':10}
}

from importlib import reload
reload(stochastic_optimization)

start = time.time()
print([x['col'] for x in betas_tup]+['objective_function'])
x = minimize(stochastic_optimization.impedance_calibration,
             stochastic_optimization.extract_bounds(betas_tup),
             args=args,
             **stochastic_optimization_settings)
end = time.time()
print(f"Took {str(pd.Timedelta(seconds=end-start).round('s'))} hours")
print(f"{args[10].__name__}: {x.fun}")

calibration_result = {
    'betas_tup': tuple({**item,'beta':x.x[idx].round(4)} for idx,item in enumerate(betas_tup)), # contains the betas
    'settings': stochastic_optimization_settings, # contains the optimization settings
    'objective_function': args[10].__name__, # objective function used
    'results': x, # stochastic optimization outputs
    'trips_calibrated': set(full_set.keys()), # saves which trips were calibrated
    'past_vals': args[0], # all of the past values/guesses
    'runtime': pd.Timedelta(end-start),
    'time': datetime.datetime.now()
}

with (config['calibration_fp']/"calibration_results/fewer_variables.pkl").open('wb') as fh:
        pickle.dump(calibration_result,fh)

if (config['calibration_fp']/"calibration_results.pkl").exists():
    with (config['calibration_fp']/"calibration_results.pkl").open('rb') as fh:
        calibration_results = pickle.load(fh)
else:
    calibration_results = []
calibration_results.append(calibration_result)
with (config['calibration_fp']/"calibration_results.pkl").open('wb') as fh:
        pickle.dump(calibration_results,fh)

MOVE TO QAQC Create GIFs

In [None]:
# import geopandas as gpd
# import matplotlib.pyplot as plt
# import imageio
# from io import BytesIO

# # Function to plot a GeoSeries and save the plot
# def plot_geoseries(geoseries,other_geoseries,i,past_val):
#     fig, ax = plt.subplots(figsize=(20, 20))
#     #cx.add_basemap(ax)
#     other_geoseries.plot(ax=ax,color='blue',style_kwds={'linewidth':2})
#     geoseries.plot(ax=ax,color='red')
#     ax.set_title(f"Iter:{i} Overlap Function:{past_val}")
#     ax.set_axis_off()
#     img_bytes = BytesIO()
#     plt.savefig(img_bytes, format='png', bbox_inches='tight')
#     plt.close()
#     return img_bytes.getvalue()

In [None]:
# num_trips = 10

# for z in range(0,num_trips):

#     #choose a random tripid
#     tripid = random.choice(list(full_set.keys()))
#     start_node = full_set[tripid]['start_node']
#     end_node = full_set[tripid]['end_node']

#     matched_edges = full_set[tripid]['matched_edges']
#     matched_edges = np.array(matched_edges)
#     matched_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in matched_edges])
#     matched_line = gpd.GeoSeries(matched_line,crs='epsg:2240')
#     matched_line = matched_line.to_crs('epsg:4326')

#     modeled_lines = []

#     for betas in past_betas:
#         #update network with the correct impedances
#         stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
#                                 link_impedance_function,
#                                 turn_impedance_function,
#                                 links,turns,turn_G)
#         #find shortest path
#         modeled_edges = stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node)['edge_list']
#         modeled_line = MultiLineString([geo_dict[linkid] for linkid, reverse_link in modeled_edges])
#         modeled_line = gpd.GeoSeries(modeled_line,crs='epsg:2240')
#         modeled_line = modeled_line.to_crs('epsg:4326')
#         modeled_lines.append(modeled_line)

#     # List of GeoSeries (Replace this with your own GeoSeries list)
#     geoseries_list = modeled_lines

#     # Loop through the list of GeoSeries, plot each one, and save the plot
#     images = []
#     for i, geoseries in enumerate(geoseries_list):
#         past_val = past_vals[i]
#         image_bytes = plot_geoseries(geoseries,matched_line,i,past_val)
#         images.append(imageio.imread(BytesIO(image_bytes)))

#     # Path for saving the GIF
#     gif_path = f"animations/stress_animation_{z}.gif"

#     # Save the images as a GIF
#     imageio.mimsave(Path.cwd()/gif_path, images, format='gif', duration=2)


In [None]:
# #TODO, combine these and just have the structure indicate how it works
# betas_links = {
#     0 : 'multi use path',
#     1 : 'bike lane',
#     2 : 'lanes',
#     3 : 'above_4'
# } 

# betas_turns = {
#     4 : 'unsig_major_road_crossing'
# }
# # #this was only .14 overlap
# # betas_links = {
# #     0 : 'multi use path',
# #     1 : 'bike lane',
# #     2 : 'AADT',
# #     3 : 'above_4'
# # } 

# # betas_turns = {
# #     4 : 'unsig_major_road_crossing'
# # }


# # #have position of beta next to name of variable
# # #NOTE: keys must be in the currect order used
# # betas_links = {
# #     0 : 'mixed_traffic_no_facil',
# #     1 : 'mixed_traffic_w_facil',
# #     #0 : 'major_road_w_class_2',
# #     # 1 : 'minor_road_w_class_2',
# #     # 2 : 'major_road_no_facil',
# #     # 3 : 'minor_road_no_facil',
# #     2 : 'above_4'
# #     #1 : 'motorized'
# #     #1 : 'ascent_grade_%'
# # } 

# # betas_turns = {
# #     3 : 'unsig_major_road_crossing'
# #     #1 : 'left',
# #     #2 : 'right',
# #     #3 : 'signalized'
# # }


# # #have position of beta next to name of variable
# # #NOTE: keys must be in the currect order used
# # #TODO have this be named tuples or something similar
# # # (name=var_name,type,position=position,bounds=[0,3])
# # betas_links = {
# #     0 : 'AADT',
# #     1 : 'lanes',
# #     2 : 'here_speed',
# #     3 : 'above_4'
# # } 

# # betas_turns = {
# #     4 : 'unsig_major_road_crossing',
# #     5 : 'signalized'
# # }

