# Examining route attributes
Before, I had been only considering how these attributes can be compared across route alternatives, but instead of doing that, let's just look at the distance travelled on high stress facilities as this gives some evidence on people's willingness to travel on these facilities.

Look at distance rather than percent of route because a short trip with only a brief two block strech on a busy road is different than someone biking several miles on a stressful road that ends up being a smaller portion of the trip. Essentially, tolerance to stress from motor vehicles should in theory not vary with the distance of the trip. We're trying to find user profiles by acutally looking at the composition of links that they actually put themselves on.

However, it should be noted that distance will be limited in that there are only so many of a facility. Also, because sidewalks exist, this approach may run into issues in the case that someone is matched to a highstress road but they were actually on a sidewalk.

In [None]:
from pathlib import Path
import time
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from shapely.ops import MultiLineString
import pandas as pd
import math
import matplotlib.pyplot as plt

from bikewaysim.paths import config
from bikewaysim.impedance_calibration import summarize_route, stochastic_optimization
from bikewaysim.routing import rustworkx_routing_funcs

In [None]:
# just make this a function

with (config['calibration_fp']/'ready_for_calibration.pkl').open('rb') as fh:
    ready_for_calibration = pickle.load(fh)
print(len(ready_for_calibration),'trips')

#new pickles
with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips = pickle.load(fh)
trips.reset_index(drop=True,inplace=True)
trips = trips[trips['tripid'].isin(ready_for_calibration.keys())]
with (config['cycleatl_fp']/'users_4.pkl').open('rb') as fh:
    users = pickle.load(fh)
users = users[users['userid'].isin(trips['userid'])]

#recalculate the number trips
users['matched_trips'] = users['userid'].map(trips.groupby('userid').size())

In [None]:
links, turns_df, length_dict, geo_dict, turn_G = rustworkx_routing_funcs.import_calibration_network(config)

# Dealing with cycletracks and multi-use path sidepaths.
Problem is that when two links are parallel and close together, the map matching algorithm may not route on the parallel bike infrastructure. Especially true when the infrastructure doesn't have good network connectivity. In this case, we want to acknowledge that there was a bicycle facility that they could have been on.

This happens in two instances:
1. Cycletracks
1. Mutli-Use Paths that are essentially wide sidewalks

Some trips may be matched to these features still so I think it's important that not only it be acknowledge that the road had an adjacent cycletrack/multi-use path but that the adjacent cycletrack/multi-use path also takes on the features of the adjacent road. That way, it won't matter how the trip was matched.

For an LTS style analysis, we want to know 

In [None]:
sidepaths = gpd.read_file(config['bicycle_facilities_fp']/'sidepaths.gpkg',layer='sidepaths',ignore_geometry=True)

First, add cycletrack / multi-use path attributes to streets

In [None]:
links = pd.merge(links,sidepaths,on='linkid',how='left')

In [None]:
# identify the roads with sidepath variables but no bicycle facility variables
cond = links['sidepath_linkid'].notna() & links['facility_fwd'].isna()

# assign sidepath to the road if it doesn't already have a facility
links.loc[cond,'facility_fwd'] = links.loc[cond,'sidepath']

# assign sidepath year if there is one
links.loc[cond & links['sidepath_year'].notna(),'year'] = links['sidepath_year']

Next, add street attributes to cycletracks / multi-use paths

In [None]:
# get the street attributes that we care about and then drop duplicates
# assign the highest value for each (NOTE: these sidepaths prolly need to be split up in OSM)
cols = ['AADT','speed','lanes']
to_add = links.loc[links['linkid'].isin(set(list(sidepaths['linkid']))),['sidepath_linkid']+cols].drop_duplicates()
# retrieve the highest value present
to_add = to_add.groupby('sidepath_linkid').max()
to_add['link_type'] = 'road'

In [None]:
links = links.merge(to_add,left_on='linkid',right_index=True,how='left',suffixes=(None,'_new'))
# replaces na values in the to_add column with the links data
for col in to_add.columns:
    links[col] = links[f'{col}_new'].fillna(links[col])
links.drop(columns=[x for x in links.columns if '_new' in x],inplace=True)

# Getting route attributes

In [None]:
#set index for quick retrieval
links.set_index(['linkid'],inplace=True)
turns_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],inplace=True)

In [None]:
links['length_mi'] = links['length_ft'] / 5280 

In [None]:
links['facility_fwd'] = links['facility_fwd'].fillna('No facility')

In [None]:
import random
tripid = random.choice(list(ready_for_calibration.keys()))
trip = ready_for_calibration[tripid]['matched_edges']
trip_links = links.loc[trip['linkid']] 

route = [tuple(x) for x in trip.values]
trip_turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]
trip_turns = [x for x in trip_turns if x[0] != x[2]]
trip_turns = turns_df.loc[trip_turns]

In [None]:
trip_turns

In [None]:
item = ready_for_calibration[tripid]
trip = item['matched_edges']

# get links traversed
trip_links = links.loc[trip['linkid']] 
route = [tuple(x) for x in trip.values]

# get the turn movements
trip_turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]
trip_turns = [x for x in trip_turns if x[0] != x[2]]
trip_turns = turns_df.loc[trip_turns]

#general stats
general_stats = pd.Series({
    'length_mi': trip_links['length_mi'].sum().round(1),
    'ascent_ft': trip_links['ascent_ft'].sum(), # ascent seems a little high
})
general_stats.index.name = 'general stats'
turn_stats = pd.Series({
    'left_turns': trip_turns['left_turn'].sum(),
    'right_turns': trip_turns['right_turn'].sum(),
    'unsig_crossing': trip_turns['unsig_crossing'].sum(),  # these appear to match up to real world
    'sig_crossings': trip_turns['signalized'].sum()
})
turn_stats.index.name = 'turn stats'

#bike facilities
bike_attrs = trip_links[trip_links['facility_fwd'].isin(['multi use path', 'bike lane', 'cycletrack', 'buffered bike lane'])]
bike_attrs = bike_attrs.groupby('facility_fwd')['length_mi'].sum()

# road variables (no bike facilities)
# remove if there's a bicycle facility
road_attrs = trip_links[(trip_links['link_type']=='road') & (trip_links['facility_fwd'].isin(['multi use path', 'bike lane', 'cycletrack', 'buffered bike lane'])==False)].copy()
# route_attrs = trip_links[trip_links['link_type']=='road'].groupby(['AADT','lanes','speed'])['length_mi'].sum()
aadt = road_attrs.groupby('AADT')['length_mi'].sum()
lanes = road_attrs.groupby('lanes')['length_mi'].sum()
speed = road_attrs.groupby('speed')['length_mi'].sum()

#group these
concat = [general_stats,bike_attrs,aadt,lanes,speed,turn_stats]
concat = pd.concat(concat,keys=[x.index.name for x in concat])

#alternatively use a list of tuples instead

In [None]:
concat

In [None]:
turn_stats

In [None]:
concat

In [None]:
# need this to be column wise
trip_links[trip_links['link_type']=='road'].groupby(['AADT','lanes','speed','facility_fwd'])['length_mi'].sum()

In [None]:
trip_links['length_mi'].sum()
trip_links['ascent_ft'].sum()

In [None]:
links['facility_fwd'].unique()

Identify users with lots of mileage on "high-stress" facilities

In [None]:
links.columns

In [None]:
links['facility_fwd'].unique()

## Route attributes loop

In [None]:
from tqdm import tqdm
test = []
for tripid, item in tqdm(ready_for_calibration.items()):
    trip = item['matched_edges']

    # get links traversed
    trip_links = links.loc[trip['linkid']] 
    route = [tuple(x) for x in trip.values]

    # get the turn movements
    trip_turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]
    trip_turns = [x for x in trip_turns if x[0] != x[2]]
    trip_turns = turns_df.loc[trip_turns]

    #general stats
    general_stats = pd.Series({
        'length_mi': trip_links['length_mi'].sum().round(1),
        'ascent_ft': trip_links['ascent_ft'].sum(), # ascent seems a little high
    })
    general_stats.index.name = 'general stats'
    turn_stats = pd.Series({
        'left_turns': trip_turns['left_turn'].sum(),
        'right_turns': trip_turns['right_turn'].sum(),
        'unsig_crossing': trip_turns['unsig_crossing'].sum(),  # these appear to match up to real world
        'sig_crossings': trip_turns['signalized'].sum()
    })
    turn_stats.index.name = 'turn stats'

    #bike facilities
    bike_attrs = trip_links[trip_links['facility_fwd'].isin(['multi use path', 'bike lane', 'cycletrack', 'buffered bike lane'])]
    bike_attrs = bike_attrs.groupby('facility_fwd')['length_mi'].sum()

    # road variables (no bike facilities)
    # remove if there's a bicycle facility
    road_attrs = trip_links[(trip_links['link_type']=='road') & (trip_links['facility_fwd'].isin(['multi use path', 'bike lane', 'cycletrack', 'buffered bike lane'])==False)].copy()
    # route_attrs = trip_links[trip_links['link_type']=='road'].groupby(['AADT','lanes','speed'])['length_mi'].sum()
    aadt = road_attrs.groupby('AADT')['length_mi'].sum()
    lanes = road_attrs.groupby('lanes')['length_mi'].sum()
    speed = road_attrs.groupby('speed')['length_mi'].sum()

    #group these
    concat = [general_stats,bike_attrs,aadt,lanes,speed,turn_stats]
    concat = pd.concat(concat,keys=[x.index.name for x in concat])

    test.append(concat)

In [None]:
test[2]

In [None]:
pd.concat(test,axis=1)

In [None]:
trip_links['length_mi'].sum()

In [None]:
test[0]

In [None]:
route_attrs_df = pd.concat(test,axis=1).transpose().fillna(0).round(2)

In [None]:
x0 = pd.DataFrame(route_attrs_df.reset_index().values)
x0 = pd.merge(trips[['tripid','userid']],x0,left_on='tripid',right_on=0)
x0 = x0.drop(columns=['tripid',0]).groupby(['userid']).max()
x0.columns = route_attrs_df.columns

In [None]:
x0

Get plots of the milage on various types of roads w/o bike facilities

In [None]:
fig, ax = plt.subplots()
export_fp = r"C:\Users\tpassmore6\OneDrive - Georgia Institute of Technology\BikewaySim\Data\test"
for idx, x in enumerate(x0.columns):
    ax.hist(x0[x],color='grey',bins=[x/40 for x in range(0,225,5)]) # 0 to 3
    ax.set_xlim(0,6)
    ax.set_ylim(0,700)
    ax.title.set_text(str(x))
    plt.savefig(export_fp+rf"\{idx}.png")

In [None]:
route_attrs_df.to_csv(Path.home()/'Downloads/route_attrs_df.csv')

In [None]:
node_to_idx, idx_to_node = rustworkx_routing_funcs.rx_conversion_helpers(turn_G)

In [None]:
a = (29485.0, False)
b = (29485.0, True)
a_idx = node_to_idx[a]
b_idx = node_to_idx[b]

In [None]:
turn_G.edge_indices_from_endpoints(a_idx,b_idx)

In [None]:
x = turns_df.reset_index()
x[x[['source_linkid','source_reverse_link']]==a]

In [None]:
turns_df

In [None]:
tripid

In [None]:
trip_turns

In [None]:
matched_summary = gpd.read_file(Path.home()/'Downloads/chosen_route_attributes.gpkg')
matched_summary.reset_index(drop=True,inplace=True)
# shortest_summary = gpd.read_file(Path.home()/'Downloads/shortest_route_attributes.gpkg')

In [None]:
pct_cols = [x for x in matched_summary.columns if '_pct' in x]
for pct_col in pct_cols:
    matched_summary[pct_col.split('_pct')[0]] = matched_summary[pct_col] * matched_summary['length_mi'] / 100

In [None]:
stress_cols = ['(30,40] mph','(40,inf) mph','2lpd','3+lpd','[10k,inf) aadt','[4k,10k) aadt']

In [None]:
matched_summary = matched_summary.merge(trips[['tripid','userid']],on='tripid')

Now find the maximum distance travelled on a stressfull attribute by user

In [None]:
matched_summary_user = matched_summary.groupby('userid')[stress_cols].max()

In [None]:
matched_summary_user.columns

In [None]:
n_cols = 2
n_rows = math.ceil(len(x_column) / n_cols)

matched_summary_user.hist(['(30,40] mph'],bins=100)

In [None]:
matched_summary_user.hist('3+lpd',bins=100)

In [None]:
matched_summary

How many miles are people riding on stressful streets? And can that be used to differentiate people?

In [None]:
matched_summary['[10k,inf) aadt'].describe()

In [None]:
matched_summary['(30,40] mph'].hist(bins=100)

In [None]:
matched_summary.hist('[10k,inf) aadt',bins=100)

In [None]:
matched_summary.hist('3+lpd_pct',bins=100)