# User Post Calibration

In [None]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
import stochastic_optimization
from tqdm import tqdm
import similaritymeasures
import random

import matplotlib.pyplot as plt

from shapely.ops import LineString, MultiLineString

import sys
sys.path.insert(0,str(Path.cwd().parent))
from network.src import modeling_turns
import speedfactor

In [None]:
#todo, see if there's a way to just have this auto import
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
calibration_fp = Path(config['project_directory']) / 'Calibration'
cycleatl_fp = Path(config['project_directory']) / 'CycleAtlanta'
matching_fp = Path(config['project_directory']) / 'Map_Matching'
network_fp = Path(config['project_directory']) / 'Network'
if calibration_fp.exists() == False:
    calibration_fp.mkdir()

In [None]:
#import the impedance calibration results
with (calibration_fp/'trip_specific.pkl').open('rb') as fh:
    results = pickle.load(fh)
results

In [None]:
#have position of beta next to name of variable
#NOTE: keys must be in the currect order used
betas_links = {
    0 : 'mixed_traffic_no_facil',
    1 : 'mixed_traffic_w_facil',
    2 : 'above_4'
} 

betas_turns = {
    3 : 'unsig_major_road_crossing'
}

In [None]:
#import the network and perform shortest path routing using the impedance coefficients
with (calibration_fp/"calibration_network.pkl").open('rb') as fh:
    links, turns = pickle.load(fh)
turn_G = modeling_turns.make_turn_graph(turns)

In [None]:
#want to add an additional key for the impedance result
with (calibration_fp/'ready_for_calibration.pkl').open('rb') as fh:
    ready_for_calibration = pickle.load(fh)
#add user id?
ready_for_calibration

In [None]:
trips_df = pd.read_pickle(cycleatl_fp/"trips_3.pkl")
trips_df.reset_index(drop=True,inplace=True)

In [None]:
tripids_by_user = pd.read_pickle(calibration_fp/'tripids_by_user.pkl')
tripids_by_user

In [None]:
results

In [None]:
with (calibration_fp/'test_set.pkl').open('rb') as fh:
    test_set = pickle.load(fh)
with (calibration_fp/'train_set.pkl').open('rb') as fh:
    train_set = pickle.load(fh)

In [None]:
#retrieve impedance routes
#each user has a seperate set and x

users_dict = {}

for user, item in tqdm(results.items()):

    tripids = tripids_by_user.loc[user]
    train_set = { key : item0 for key, item0 in train_set.items() if key in tripids}
    train_ods = stochastic_optimization.match_results_to_ods(train_set)

    betas = item['betas']

    base_impedance_col = "travel_time_min"
    stochastic_optimization.post_calibration_routing(
        links, turns, turn_G, base_impedance_col, betas, betas_links, betas_turns,
        train_ods, users_dict
    )


    # base_impedance_col = "travel_time_min"
    # stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)
    # stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
    #                         stochastic_optimization.link_impedance_function,
    #                         base_impedance_col,
    #                         stochastic_optimization.turn_impedance_function,
    #                         links,turns,turn_G)

    #find shortest path
    #results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in train_ods}

    #add to final dict?
    #users_dict.update(results_dict)



In [None]:
with (calibration_fp/"")
users_dict

In [None]:
#turn dict into dataframe
labels = ['userid','Loss'] + list(betas_links.values()) + list(betas_turns.values())
loss = [tuple([key,item['loss'],*item['betas']]) for key, item in results.items()]
loss = pd.DataFrame.from_records(loss,columns=labels)
loss['Loss'] = loss['Loss'].abs()
loss.set_index('userid',inplace=True)

In [None]:
results

In [None]:
# add the impedance routes to the dict

In [None]:
#link_impedance_col = "adj_travel_time_min"
base_impedance_col = "travel_time_min"
stochastic_optimization.back_to_base_impedance(base_impedance_col,links,turns,turn_G)

#update impedances
#betas = #past_betas[np.array(past_vals).argmin()]#x.x
print(betas)
stochastic_optimization.impedance_update(betas,betas_links,betas_turns,
                          link_impedance_function,
                          base_impedance_col,
                          turn_impedance_function,
                          links,turns,turn_G)

#find shortest path
results_dict = {(start_node,end_node):stochastic_optimization.impedance_path(turns,turn_G,start_node,end_node) for start_node, end_node in test_ods}

#calulate objective function
loss_test = loss_function(test_set,results_dict,**loss_function_kwargs)
loss_test.mean()

In [None]:
#import user characteristics
users_df = pd.read_pickle(cycleatl_fp/"users_1.pkl")

#replace userid with just the first one
def take_first(x):
    if isinstance(x,list):
        return x[0]
    return x
users_df['userid'] = users_df['userid'].apply(take_first)

#combine data
merged = pd.merge(loss,users_df,on='userid')

#merged = pd.merge(merged.drop(columns=['userid']),users_df,left_on='remapped_userid',right_on='userid')
merged.columns

In [None]:
import matplotlib.pyplot as plt
ax = loss.hist(figsize=(12,12),bins=20,color='grey')

# Add a title for the entire figure
plt.suptitle(f'User by User Impedance Calibration (n=500)')
plt.subplots_adjust(top=0.925)

#x_labels = ['(Intersection of Modeled and Chosen) / (Union of Modeled and Chosen)','Distance Proportion Change','Distance Proportion Change','Added Minutes Per Instance','Added Minutes Per Instance','Added Minutes Per Instance']
x_lims = [(0,1),(0,9),(0,9),(0,9),(0,9),(0,9)]
for i, sub_ax in enumerate(ax.flatten()):
    #sub_ax.set_xlabel(x_labels[i])
    sub_ax.set_ylabel('Frequency')
    #sub_ax.set_xlim(x_lims[i])

# Counter-intuitive

In [None]:
loss['counter'] = loss['mixed_traffic_no_facil'] < loss['mixed_traffic_w_facil']


In [None]:
# Create a new figure
fig, axs = plt.subplots(4, 1, figsize=(12, 12), constrained_layout=True)

# Add a title for the entire figure
fig.suptitle(f'User by User Impedance Calibration')
fig.subplots_adjust(top=0.925)

categories = [True, False]
colors = ['orange','skyblue']

# Iterate over features to create stacked histograms
for i, feature in enumerate(['mixed_traffic_no_facil', 'mixed_traffic_w_facil', 'above_4','unsig_major_road_crossing']):
    ax = axs[i]
    for category, color in zip(categories, colors):
        ax.hist(
            loss[loss['counter'] == category][feature],
            bins=20,
            stacked=True,
            label=category,
            alpha=0.5,
            color=color,
            density=True
        )
    ax.set_ylabel('Frequency')
    ax.set_title(feature)
    ax.legend(labels=[f"True ({(loss['counter']==True).sum()})",f"False ({(loss['counter']==False).sum()})"])

plt.xlabel('Value')
plt.show()

It seems like people with a higher impedance for roads with bike infrastructure don't differ much on the other characterstics. Next, we should check these users to see what the distribution of roate attributes was like. Maybe these people still avoided major streets?

## Does the attribute values vary by the overlap amt?
Does not appear to for the coefficients selected.

In [None]:
loss.loc[loss['Loss'] > loss['Loss'].mean(),'loss_value'] = "Above Mean"
loss.loc[loss['loss_value'].isna(),'loss_value'] = "Below Mean"

In [None]:
# Create a new figure
fig, axs = plt.subplots(4, 1, figsize=(12, 12), constrained_layout=True)

# Add a title for the entire figure
fig.suptitle(f'User by User Impedance Calibration')
fig.subplots_adjust(top=0.925)

categories = ['Above Mean', 'Below Mean']
colors = ['orange','skyblue']

# Iterate over features to create stacked histograms
for i, feature in enumerate(['mixed_traffic_no_facil', 'mixed_traffic_w_facil', 'above_4','unsig_major_road_crossing']):
    ax = axs[i]
    for category, color in zip(categories, colors):
        ax.hist(
            loss[loss['loss_value'] == category][feature],
            bins=20,
            stacked=True,
            label=category,
            alpha=0.5,
            color=color,
            density=True
        )
    ax.set_ylabel('Frequency')
    ax.set_title(feature)
    ax.legend()

plt.xlabel('Value')
plt.show()

In [None]:
import json
user_data_definitions = json.load((Path.home()/'Documents/GitHub/cycleatlanta/user_data_definition.json').open('rb'))

#add the 55+ column
user_data_definitions['age']['6'] = '55+'

#income has too many nulls
tree_cols = ['age','gender','rider_history','rider_type','trip_type','total_distance_ft','avg_speed_mph','count']#,'count']#[,'cycling_freq'
tree_df = merged[tree_cols]

#use to detect null values
isnull = ((tree_df == -1) | (tree_df == 'NULL'))

#TODO do cross-sectionals to see which combination results in the most retained entries

#remove rows with null values
tree_df = tree_df[(isnull==False).all(axis=1)]

loss_vals = merged.loc[tree_df.index]

get_factor = ['age','rider_history','rider_type']
# just fyi
# select_max_cols = ['age','income','cycling_freq']
# #select the min for these (i.e. strong and fearless over interested but...)
# select_min_cols = ['rider_type','rider_history']

for col in get_factor:
    ivd = {v:k for k, v in user_data_definitions[col].items()}
    tree_df[col] = tree_df[col].map(ivd)