# Retrieve Route Attributes from List of Edges IDs and the Direction Travelled
We have a list of edges or turns (depending on the settings) from Dijkstra, and we need to get the route attributes (how many turns, how much feet up, miles of bike facility, etc) to report the route characterstics of a trip.

In [None]:
from pathlib import Path
import time
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from shapely.ops import MultiLineString
import pandas as pd
import math
import matplotlib.pyplot as plt

import summarize_route
import stochastic_optimization

In [None]:
import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

In [None]:
# #TEMP for GDOT report statistics
# with (Path('D:\PROJECTS\GDOT\GDOT\Map_Matching\matched_0.pkl')).open('rb') as fh:
#     match_dict = pickle.load(fh)

# cutoff = 0.90 # set pct of points that need to be matched

# total = len(match_dict)
# match_ratios = {tripid:item['match_ratio'] for tripid, item in match_dict.items() if isinstance(item,str)==False}
# failed_matches = total - len(match_ratios)
# match_ratios = pd.Series(match_ratios)
# above_threshold = match_ratios[match_ratios > 0.90].index.tolist()
# match_dict = {key:item for key, item in match_dict.items() if key in above_threshold}
# below_threshold = total - failed_matches - len(above_threshold)

# print(len(match_dict),'/',total,'successful matches')
# print(failed_matches,'failed to match')
# print(below_threshold,'partial match')

# trips = pd.read_pickle(config['cycleatl_fp']/'trips_4.pkl')
# trips = trips.loc[trips['tripid'].isin(list(match_dict.keys()))]#,'userid'].nunique()
# users = pd.read_pickle(config['cycleatl_fp']/'users_4.pkl')
# users = users[users['userid'].isin(set(trips['userid'].tolist()))]

# #recalculate the number trips
# users['matched_trips'] = users['userid'].map(trips.groupby('userid').size())

# print(users.shape[0],'users')

# # ready_for_calibration = {tripid:{'matched_edges':item['edges']} for tripid, item in match_dict.items()}

In [None]:
with (config['calibration_fp']/'ready_for_calibration.pkl').open('rb') as fh:
    ready_for_calibration = pickle.load(fh)
print(len(ready_for_calibration),'trips')

#new pickles
with (config['cycleatl_fp']/'trips_4.pkl').open('rb') as fh:
    trips = pickle.load(fh)
trips = trips[trips['tripid'].isin(ready_for_calibration.keys())]

with (config['cycleatl_fp']/'users_4.pkl').open('rb') as fh:
    users = pickle.load(fh)
print(users.shape[0],'users')
users = users[users['userid'].isin(trips['userid'])]
print(users.shape[0],'users')

#recalculate the number trips
users['matched_trips'] = users['userid'].map(trips.groupby('userid').size())

# Barplot on trips per user

In [None]:
print((users['matched_trips'] > 10).sum(),'users had above 10 trips')

In [None]:
# Count the number of users for each number of trips matched
trip_counts = users['matched_trips'].value_counts().sort_index()

# Create bar plot
fig, ax = plt.subplots()
ax.bar(trip_counts.index, trip_counts.values, color='grey')

# Set axis labels
ax.set_xlabel(f'Number of Trips (N={trips.shape[0]})')
ax.set_ylabel(f'Number of Users (N={users.shape[0]})')

# Set x-axis ticks every 5 bins
#ax.set_xticks(range(trip_counts.index.min(), trip_counts.index.max() + 1, 5))
ax.set_xticks(range(0, trip_counts.index.max() + 5, 5))

plt.show()

# User Characterstics

In [None]:
# fix this one value
users.loc[users['ethnicity'].astype(str) == "['Hispanic / Mexican / Latino', 'Multi-racial']",'ethnicity'] = 'Hispanic / Mexican / Latino'

summary_cols = ['gender','age','income','ethnicity','cycling_freq','rider_history','rider_type']

for col in summary_cols:
    print(f"------- {col} -------")
    counts = users[col].value_counts()
    counts.name = 'counts'
    pct = (counts / users.shape[0] * 100).round(0)
    pct.name = 'percent'
    print(pd.concat([counts,pct],axis=1,ignore_index=False))

In [None]:
for col in summary_cols:
    print(f"------- {col} -------")
    counts = users.groupby(col)['matched_trips'].sum()
    counts.name = 'counts'
    # print(counts)
    pct = (counts / users['matched_trips'].sum() * 100).round(0)
    pct.name = 'percent'
    print(pd.concat([counts,pct],axis=1,ignore_index=False))

# Trip Characterstics

In [None]:
trips['trip_type'].value_counts()


# Network Import

In [None]:
links, turns_df, length_dict, geo_dict, turn_G = stochastic_optimization.import_calibration_network(config)

#set index for quick retrieval
links.set_index(['linkid','reverse_link'],inplace=True)
turns_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],inplace=True)

# Import Map Matched and Shortest Path Data (and impedance calibrated in the future)
This is where we want to look at the makeup of the chosen routes to know what to include in the impedance function. Because route atttributes have to be aggregated from the links, continuous variables have to be averaged or split into ordinal variables in order to represented.

In most cases, it seems 

In [None]:
links.columns

In [None]:
links.loc[links['name'].apply(lambda x: 'BeltLine' in x if x is not None else False),'name'].unique()

In [None]:
# links.loc[links['name'].isin(['Atlanta BeltLine Eastside Trail',
#        'BeltLine Eastside Trail'])].explore()
links['on_eastside_beltline'] = links['name'].isin(['Atlanta BeltLine Eastside Trail','BeltLine Eastside Trail'])

In [None]:
# additional columns to summarize
links['length_mi'] = links['length_ft'] / 5280
links['state_route'] = links['route_type'] == 'State Route'
links['1lpd'] = (links['2lpd']==False)&(links['3+lpd']==False)
links['[0,30] mph'] = (links['(30,40] mph']==False)&(links['(40,inf) mph']==False)
links['[0,4k) aadt'] = (links['[10k,inf) aadt']==False)&(links['[4k,10k) aadt']==False)
links['[0,4) grade'] = (links['[4,6) grade']==False)&(links['[6,inf) grade']==False)

link_cols_to_summarize = {
    'oneway':'bool',
    'link_type':'category',
    # 'highway':'category',
    'state_route':'bool',
    'ascent_ft':'sum',
    # 'facility_fwd':'category', 
    'travel_time_min':'sum',
    '1lpd':'bool',
    '2lpd':'bool',
    '3+lpd':'bool',
    '[0,30] mph':'bool',
    '(30,40] mph':'bool', 
    '(40,inf) mph':'bool',
    '[0,4k) aadt':'bool',
    '[4k,10k) aadt':'bool',
    '[10k,inf) aadt':'bool', 
    '[0,4) grade':'bool',
    '[4,6) grade':'bool', 
    '[6,inf) grade':'bool', 
    'bike lane':'bool',
    'cycletrack':'bool', 
    'multi use path':'bool',
    'on_eastside_beltline':'bool',
    'length_mi':'sum'
}
restr_stats_to_roads = ['state_route','oneway','1lpd','2lpd','3+lpd','[0,30] mph','(30,40] mph','(40,inf) mph','[0,4k) aadt','[4k,10k) aadt','[10k,inf) aadt']
turn_cols_to_summarize = {
    'turn_type':'category',
    'unsig_major_road_crossing':'bool'
}

In [None]:
links

In [None]:
from importlib import reload
reload(summarize_route)
matched_summary = [summarize_route.route_attributes(key,item['matched_edges'],link_cols_to_summarize,turn_cols_to_summarize,links,turns_df,restr_stats_to_roads) for key, item in ready_for_calibration.items()]
matched_summary = summarize_route.procees_summary_results(matched_summary,config['projected_crs_epsg'])
reorder = ['length_mi', 'travel_time_min', 'ascent_ft', 'oneway_pct','state_route_pct',
        'link_type_bike_pct','link_type_pedestrian_pct', 'link_type_road_pct','link_type_service_pct',
        '[0,30] mph_pct','(30,40] mph_pct','(40,inf) mph_pct',
        '1lpd_pct','2lpd_pct', '3+lpd_pct',
        '[0,4k) aadt_pct','[4k,10k) aadt_pct','[10k,inf) aadt_pct',
        '[0,4) grade_pct','[4,6) grade_pct',  '[6,inf) grade_pct',
        'bike lane_pct', 'cycletrack_pct',  'multi use path_pct', 'on_eastside_beltline_pct'
    ]
df0 = matched_summary.drop(columns=['tripid','geometry']).describe().round(1).transpose().drop(columns='count').loc[reorder]
df0.iloc[1:,:] = df0.iloc[1:,:].round(0)
print(df0)
df0.to_csv(Path.home()/'Downloads/route_attr.csv')

In [None]:
from importlib import reload
reload(summarize_route)
shortest_summary = [summarize_route.route_attributes(key,item['shortest_edges'],link_cols_to_summarize,turn_cols_to_summarize,links,turns_df,restr_stats_to_roads) for key, item in ready_for_calibration.items()]
shortest_summary = summarize_route.procees_summary_results(shortest_summary,config['projected_crs_epsg'])
reorder = ['length_mi', 'travel_time_min', 'ascent_ft', 'oneway_pct','state_route_pct',
        'link_type_bike_pct','link_type_pedestrian_pct', 'link_type_road_pct','link_type_service_pct',
        '[0,30] mph_pct','(30,40] mph_pct','(40,inf) mph_pct',
        '1lpd_pct','2lpd_pct', '3+lpd_pct',
        '[0,4k) aadt_pct','[4k,10k) aadt_pct','[10k,inf) aadt_pct',
        '[0,4) grade_pct','[4,6) grade_pct',  '[6,inf) grade_pct',
        'bike lane_pct', 'cycletrack_pct',  'multi use path_pct', 'on_eastside_beltline_pct'
    ]
df1 = shortest_summary.drop(columns=['tripid','geometry']).describe().round(1).transpose().drop(columns='count').loc[reorder]
df1.iloc[1:,:] = df1.iloc[1:,:].round(0)
print(df1)
df1.to_csv(Path.home()/'Downloads/route_attr_short.csv')

In [None]:
x = matched_summary.drop(columns=['geometry']).set_index('tripid')
y = shortest_summary.drop(columns=['geometry']).set_index('tripid')

In [None]:
diff = (x - y).describe().round(1).transpose().drop(columns='count').loc[reorder]
diff.iloc[1:,:] = diff.iloc[1:,:].round(0)
diff.to_csv(Path.home()/'Downloads/diff.csv')

In [None]:
difference = df0 - df1
print(difference)
difference.to_csv(Path.home()/'Downloads/difference.csv')

In [None]:
fig, ax = plt. subplots()
ax.hist(matched_summary['length_mi'],color='grey')

# Set axis labels
ax.set_xlabel(f'Length in Miles')
ax.set_ylabel(f'Number of Trips (N={trips.shape[0]})')

# Set x-axis ticks every 5 bins
#ax.set_xticks(range(trip_counts.index.min(), trip_counts.index.max() + 1, 5))
#ax.set_xticks(range(0, int(matched_summary['length_mi'].max()) + 5, 1))

plt.show()


In [None]:
# shortest_summary = [summarize_route.route_attributes(key,item,'shortest_edges',cols_to_summarize,links,turns_df) for key, item in ready_for_calibration.items()]
# shortest_summary = summarize_route.procees_summary_results(shortest_summary,config['projected_crs_epsg'])
# shortest_summary.drop(columns=['tripid','geometry']).describe()

### Export

In [None]:
matched_summary.to_file(config['calibration_fp']/"route_attributes.gpkg",layer="matched")
shortest_summary.to_file(config['calibration_fp']/"route_attributes.gpkg",layer="shortest")

# Visualize Chosen Attributes

In [None]:
# for making graphs
renaming_dict = {
    '(0,2]_prop': "0-2% grade", 
    '(2,4]_prop': "2-4% grade", 
    '(4,6]_prop': "4-6% grade", 
    '(6,inf)_prop': "> 6% grade", 
    '(4,inf)_prop': "> 4% grade",
    'ascent_ft': "Ascent (ft)",
    'descent_ft': "Descent (ft)", 
    'beltline_prop': "On BeltLine", 
    'facility_fwd_bike lane_prop': "Bike Lane",
    'facility_fwd_buffered bike lane_prop': "Buffered Bike Lane", 
    'facility_fwd_cycletrack_prop': "Cycletrack",
    'facility_fwd_multi use path_prop': "Multi-Use Path", 
    'facility_fwd_sharrow_prop': "Sharrow or Bike Route", 
    'left': "Left Turn (per mi)",
    'length_mi': "Length (mi)", 
    'right': "Right Turn (per mi)", 
    'straight': "Straight", 
    'uturn': " U-Turn (per mi)", 
    'vehicle_traffic_prop': "Vehicle Traffic Allowed",
    'lts_0_prop': "LTS Not Rated",
    'lts_1_prop': "LTS 1",
    'lts_2_prop': "LTS 2",
    'lts_3_prop': "LTS 3",
    'lts_4_prop': "LTS 4",
}

In [None]:
x_axes_limits = {
    # '(0,2]_prop': "0-2% grade", 
    # '(2,4]_prop': "2-4% grade", 
    # '(4,6]_prop': "4-6% grade", 
    # '(6,inf)_prop': "> 6% grade", 
    '(4,inf)_prop': (-0.2,0.2),
    'ascent_ft': (-1000,1000),
    #'descent_ft': "Descent (ft)", 
    'beltline_prop': (-0.6,0.6), 
    'facility_fwd_bike lane_prop': (-0.4,0.4),
    'facility_fwd_buffered bike lane_prop': (-0.1,0.1), 
    'facility_fwd_cycletrack_prop': (-0.5,0.5),
    'facility_fwd_multi use path_prop': (-0.6,0.6), 
    'facility_fwd_sharrow_prop': (-0.3,0.3), 
    #'left': "Left Turn",
    'length_mi': (0,6), 
    #'right': "Right Turn", 
    #'straight': "Straight", 
    #'uturn': " U-Turn", 
    'vehicle_traffic_true_prop': (-0.75,0.75),
    #'lts': "LTS"
}

In [None]:
links.columns

In [None]:
summarize_route.

In [None]:
#select which differences to plot and what order to plot
# cols_to_plot = ['length_mi', 'ascent_ft',
#        '(4,inf)_prop', 'vehicle_traffic_prop', 'beltline_prop', 
#        'facility_fwd_sharrow_prop','facility_fwd_bike lane_prop',
#        'facility_fwd_buffered bike lane_prop',
#        'facility_fwd_cycletrack_prop', 'facility_fwd_multi use path_prop',
#        'lts_1_prop','lts_2_prop','lts_3_prop','lts_4_prop',
#        'uturn', 'straight', 'right', 'left','signalized'
# ]

cols_to_plot = ['facility_fwd_bike lane_prop','lanes','speed','above_4']


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


# Define the variables to plot
variables = cols_to_plot

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 18), constrained_layout=True)

# Set the background color of the figure
sns.set_style("darkgrid")

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
for ax, variable in zip(axes, variables):
    sns.kdeplot(data=matched_summary, x=variable, fill=True, ax=ax, cut=0, bw_adjust=0.5)
    #ax.set_title(renaming_dict.get(variable,variable))
    ax.set_xlabel(renaming_dict.get(variable,variable))

    # # Get the min and max values of the variable
    # min_val = matched_summary[variable].min()
    # max_val = matched_summary[variable].max()
    
    # # Calculate symmetric limits around zero
    # max_abs_val = max(abs(min_val), abs(max_val))
    # x_lim = (-max_abs_val, max_abs_val)

    # # #set varaible specific limits
    # # if x_axes_limits.get(variable,False) != False:
    # #     x_lim = x_axes_limits.get(variable,x_lim)
    
    # ax.set_xlim(x_lim)
    ax.axvline(0, color='k', linestyle='--', alpha = 0.5)  # Optional: add a vertical line at zero for reference

    # Set the background color of the axes
    #ax.set_facecolor('grey')
    #ax.grid(True, color='white')

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


# Visualize Path Attribute Differences (Chosen - Shortest)
Re-create the plots that were in Dillon's paper.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
differences = matched_summary.iloc[:,2:] - shortest_summary.iloc[:,2:]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


# Define the variables to plot
variables = cols_to_plot

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 18), constrained_layout=True)

# Set the background color of the figure
sns.set_style("darkgrid")

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
for ax, variable in zip(axes, variables):
    sns.kdeplot(data=differences, x=variable, fill=True, ax=ax, cut=0, bw_adjust=0.5)
    #ax.set_title(renaming_dict.get(variable,variable))
    ax.set_xlabel(renaming_dict.get(variable,variable))

    # Get the min and max values of the variable
    min_val = differences[variable].min()
    max_val = differences[variable].max()
    
    # Calculate symmetric limits around zero
    max_abs_val = max(abs(min_val), abs(max_val))
    x_lim = (-max_abs_val, max_abs_val)

    #set varaible specific limits
    if x_axes_limits.get(variable,False) != False:
        x_lim = x_axes_limits.get(variable,x_lim)
    
    ax.set_xlim(x_lim)
    ax.axvline(0, color='k', linestyle='--', alpha = 0.5)  # Optional: add a vertical line at zero for reference

    # Set the background color of the axes
    #ax.set_facecolor('grey')
    #ax.grid(True, color='white')

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


In [None]:
differences.rename(columns=renaming_dict).describe().round(2)

# Cross Tabs

In [None]:
with (cycleatl_fp/'trips_3.pkl').open('rb') as fh:
    trips = pickle.load(fh)
with (cycleatl_fp/'users_0.pkl').open('rb') as fh:
    users = pickle.load(fh)

In [None]:
trips.columns

In [None]:
users.columns

In [None]:
#pretend they're not the same person for now and then later fix the cycleatl data processing code

In [None]:
differences['tripid'] = matched_summary['tripid']

In [None]:
differences_w_info = differences.merge(pd.merge(trips,users,on='userid'),on='tripid')

In [None]:
differences_w_info['rider_type'].value_counts()

In [None]:
trips.columns

In [None]:
users.columns

In [None]:
hue = "rider_type"

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


# Define the variables to plot
variables = cols_to_plot

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 20), constrained_layout=True)

# Set the background color of the figure
sns.set_style("darkgrid")

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
i = 0
for ax, variable in zip(axes, variables):
    if i == 0:
        sns.kdeplot(data=differences_w_info, x=variable, fill=False, ax=ax, cut=0, bw_adjust=0.5, hue=hue)
        i += 1
    else:
        sns.kdeplot(data=differences_w_info, x=variable, fill=False, ax=ax, cut=0, bw_adjust=0.5, hue=hue, legend = False)
    #ax.set_title(renaming_dict.get(variable,variable))
    ax.set_xlabel(renaming_dict.get(variable,variable))

    # Get the min and max values of the variable
    min_val = differences_w_info[variable].min()
    max_val = differences_w_info[variable].max()
    
    # Calculate symmetric limits around zero
    max_abs_val = max(abs(min_val), abs(max_val))
    x_lim = (-max_abs_val, max_abs_val)

    #set varaible specific limits
    if x_axes_limits.get(variable,False) != False:
        x_lim = x_axes_limits.get(variable,x_lim)
    
    ax.set_xlim(x_lim)
    ax.axvline(0, color='k', linestyle='--', alpha = 0.5)  # Optional: add a vertical line at zero for reference

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


In [None]:
#multiple_entries = users['userid'].apply(lambda x: isinstance(x,list))

#users['rider_type'].apply()
# def list_to_single(row):
#     if isinstance(row['userid'],list) == False:
#         return row
    
#     #remove nulls
#     if isinstance(row['rider_type'],list):


#     row["rider_type"]


In [None]:
#have a bad elevation link
edges_w_attr.loc[edges_w_attr['ascent_ft'].sort_values(ascending=False).head(10).index,['name','ascent_ft','descent_ft','geometry']].explore()

In [None]:
matched_summary.loc[[matched_summary['ascent_ft'].idxmax()]].explore()

In [None]:
differences.sort_values('ascent_ft',ascending=False).head(10)

In [None]:
sns.kdeplot(data=matched_summary[link_type_new],cut=0,bw_adjust=5)

# Deprecated past here

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

df = summary_df.copy()

# Assuming your dataframe is named df
# Drop any rows with missing values (if any)
df.fillna(0,inplace=True)

# Define features and target variable
X = df.drop(columns=['tripid', 'geometry', 'length_ft'])
y = df['length_ft']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
regressor = DecisionTreeRegressor(random_state=42)

# Train the model
regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plot_tree(regressor, feature_names=X.columns, filled=True, rounded=True)
plt.show()


In [None]:
feature_importances = pd.Series(regressor.feature_importances_, index=X.columns).sort_values(ascending=False)
print(feature_importances)


In [None]:
import random
testtrip = random.choice(list(matched_traces_dict.keys()))
testtrip = matched_traces_dict[testtrip]

In [None]:
summary_dict = {}

#get edges
route = testtrip['edges']


In [None]:
#get route and turns
route = [tuple(x) for x in testtrip['edges'].values]
turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]

In [None]:
'''
Two different types of summarization:

Instance based (turns, signals, bridges, etc)

Length based on certain tag (bike facilities)

Cumulative (length,elevation)

'''

summary_attributes = {}

#get trip date for the bike facility check
trip_date_year = testtrip['trace'].iloc[0,2].year

#get route and turns
route = testtrip['edges']
turns = [(route.values[i][0],route.values[i][1],route.values[i+1][0],route.values[i+1][1]) for i in range(0,len(route.values)-1)]
turns = pd.DataFrame(turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])


In [None]:

#retrieve attributes
route_w_attr = pd.merge(route,edges_w_attr,on='linkid')
turns_w_attr = pd.merge(turns,turns_df,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

#turn to gdf
route_w_attr = gpd.GeoDataFrame(route_w_attr,geometry='geometry',crs=config['projected_crs_epsg'])
summary_attributes["geometry"] = MultiLineString(route_w_attr['geometry'].tolist())

#flip relevant attributes
route_w_attr.loc[route_w_attr['reverse_link']==True,ascent_columns+descent_columns+bike_facils] = \
    route_w_attr.loc[route_w_attr['reverse_link']==True,descent_columns+ascent_columns+bike_facils[::-1]].values

#set the bike facility to na if the trip date was before the bike facility
route_w_attr.loc[route_w_attr['year'] > trip_date_year,bike_facils] = np.nan

#summary columns
summary_attributes["length_ft"] = route_w_attr['length_ft'].sum().round(0)
summary_attributes["ascent_m"] = route_w_attr['ascent_m'].sum().round(0)
summary_attributes["descent_m"] = route_w_attr['descent_m'].sum().round(0)

# average grade by category (from broach)
zero_to_two = (route_w_attr['ascent_grade_%'] > 0) & (route_w_attr['ascent_grade_%'] <= 2)
two_to_four = (route_w_attr['ascent_grade_%'] > 2) & (route_w_attr['ascent_grade_%'] <= 4)
four_to_six = (route_w_attr['ascent_grade_%'] > 4) & (route_w_attr['ascent_grade_%'] <= 6)
six_and_beyond = (route_w_attr['ascent_grade_%'] > 6)
summary_attributes["zero_to_two_%_ft"] = route_w_attr.loc[zero_to_two,'length_ft'].sum().round(0)
summary_attributes["two_to_four_%_ft"] = route_w_attr.loc[two_to_four,'length_ft'].sum().round(0)
summary_attributes["four_to_six_%_ft"] = route_w_attr.loc[four_to_six,'length_ft'].sum().round(0)
summary_attributes["six_and_beyond_%_ft"] = route_w_attr.loc[six_and_beyond,'length_ft'].sum().round(0)

#TODO add this back in the elevation step and use the same limits?
#add meters on grade segments (i.e. add all in length along x to x)
#could possibly be a more accurate represntation of steep roads

# #instance columns to summarize
# count_cols = ['bridge','tunnel']
# for count_col in count_cols:
#     summary_attributes[count_col] = (route_w_attr[count_col]==True).sum().round(0)

# length of route columns to summarize
cols = ['link_type_new','facility_fwd']#['link_type','highway']#,'speedlimit_range_mph','lanes_per_direction']
for col in cols:
    #make a summary column for every unique value in that column
    for unique_val in route_w_attr[col].unique():
        #skip if nan
        if isinstance(unique_val,str) == False:
            continue
        summary_attributes[col+'.'+unique_val+'_ft'] = route_w_attr.loc[route_w_attr[col]==unique_val,'length_ft'].sum().round(0)
        
# turns
summary_attributes.update(turns_w_attr['turn_type'].value_counts().to_dict())


In [None]:
summary_attributes

In [None]:
df_edges['tup'] = list(zip(df_edges['linkid'],df_edges['reverse_link']))
chosen_links = df_edges.set_index('tup').loc[list_of_edges]
list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
chosen_turns = pseudo_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link']).loc[list_of_turns]
chosen_links.columns


In [None]:
#intialize summary dict
summary_attributes = {}


In [None]:

#trip distance
summary_attributes['trip_distance_ft'] = chosen_links['length_ft'].sum()


In [None]:

#instance columns to summarize
count_cols = ['bridge','tunnel']
for count_col in count_cols:
    summary_attributes[count_col] = (chosen_links[count_col]==True).sum()

#general elevation
total_ascent = (chosen_links['ascent_m'].sum() / 3.28).round(0)
summary_attributes['ascent_ft'] = total_ascent 
summary_attributes['avg_ascent_grade'] = (total_ascent / chosen_links['length_ft'].sum() * 100).round(1)

#elevation broken by segment
elev_cols = ['(0,2]_ascent','(2,4]_ascent', '(4,6]_ascent', '(6,10]_ascent', '(10,15]_ascent','(15,inf]_ascent']
for elev_col in elev_cols:
    total_length = (chosen_links[elev_col].sum() / 3.28).round(0)
    summary_attributes[elev_col+'_ft'] = total_length

#pct of route columns to summarize
cols = ['link_type','highway','bike_facility_type','speedlimit_range_mph','lanes_per_direction']
for col in cols:
    #make a summary column for every unique value in that column
    for unique_val in chosen_links[col].unique():
        if (unique_val != None) | (unique_val == np.nan):
            total_length = chosen_links[chosen_links[col] == unique_val]['length_ft'].sum()
        else:
            continue
        if isinstance(unique_val,str) == False:
            unique_val = str(unique_val)
        summary_attributes[col+'.'+unique_val] = np.round(total_length/chosen_links['length_ft'].sum(),2)

# signalized and turns
summary_attributes['signalized'] = (chosen_turns['signalized']==True).sum()
summary_attributes['unsignalized'] = (chosen_turns['unsignalized']==True).sum()
turn_dict = chosen_turns['turn_type'].value_counts().to_dict()
summary_attributes.update(turn_dict)

summary_dict[tripid] = summary_attributes



In [None]:
summary_attributes

In [None]:
#turn into geodataframe
trips_df_info = pd.DataFrame.from_dict(summary_dict,orient='index')
trips_df_info.fillna(0,inplace=True)

In [None]:
trips_df_info

In [None]:
trips_df = trips_df.merge(trips_df_info,left_on='tripid',right_index=True)

In [None]:
#TODO find the visualiztion code that we had already made

In [None]:
# list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
# list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]

# chosen_links = df_edges.set_index(['linkid','reverse_link'],drop=False).loc[list_of_edges]

# chosen_links['bridge'].value_counts()

In [None]:
#add user info
trip_and_user = pd.read_pickle(fp/'gps_traces/trip_and_user.pkl')

trips_df = trips_df_info.merge(trip_and_user,left_index=True,right_on='tripid')

In [None]:
trips_df.to_csv(fp/'all_attrs.csv',index=False)

In [None]:
#instead of visualizing here visualize elsewhere?

In [None]:

#import stochastic_optimization


In [None]:
# fp = Path.home() / 'Documents/BikewaySimData/Projects/gdot'
# #fp = Path.home() / 'Library/CloudStorage/OneDrive-GeorgiaInstituteofTechnology/BikewaySim/Data'

# with (fp / 'impedance_calibration.pkl').open('rb') as fh:
#     (df_edges,pseudo_df,pseudo_G) = pickle.load(fh)

In [None]:
# Retrieve link/turn costs
# default below is link distance
# link_costs = dict(zip(list(zip(df_edges['source'],df_edges['target'],df_edges['linkid'])),df_edges['length_ft']))
# tup = list(zip(pseudo_df['source_A'],pseudo_df['source_B'],pseudo_df['source_linkid']))
# pseudo_df['source_cost'] = list(map(link_costs.get,tup))
# tup = list(zip(pseudo_df['target_A'],pseudo_df['target_B'],pseudo_df['target_linkid']))

# pseudo_df['target_cost'] = list(map(link_costs.get,tup))
# pseudo_df['total_cost'] = pseudo_df['source_cost'] + pseudo_df['target_cost'] #+turn_cost

# costs = pseudo_df.groupby(['source','target'])['total_cost'].min()
# nx.set_edge_attributes(pseudo_G,values=costs,name='weight')
# source = list(pseudo_G.nodes())[0]
# target = list(pseudo_G.nodes())[420]
# print(source,target)
# import networkx as nx
# length, edge_list = nx.single_source_dijkstra(pseudo_G,source,target,weight="weight")
# turn_list = [[edge_list[i][0],edge_list[i][1],edge_list[i+1][0],edge_list[i+1][1]] for i in range(len(edge_list)-1)]

# turn_cols = ['turn_type','signalized_left_straight','unsignalized_left_straight_nonlocal']
# linkid_cols = ['source_linkid','source_reverse_link','target_linkid','target_reverse_link']
# chosen_turns = pseudo_df.set_index(['source_A','source_B','target_A','target_B'],drop=False).loc[turn_list,linkid_cols+turn_cols]

# tripid = 302

# #make a single row dataframe to attach to trips_df
# stats_dict = {}
# stats_dict[tripid] = {
#     'tripid':tripid,
#     'signalized_left_straight': chosen_turns['signalized_left_straight'].sum(),
#     'unsignalized_left_straight_nonlocal': chosen_turns['unsignalized_left_straight_nonlocal'].sum()
# }
# turn_dict = chosen_turns['turn_type'].value_counts().to_dict()
# stats_dict[tripid].update(turn_dict)

# Case 1: Need to run shortest paths to create link sequence

In [None]:
# source_links = chosen_turns[['source_linkid','source_reverse_link']]
# target_links = chosen_turns[['target_linkid','target_reverse_link']]
# source_links.columns = ['linkid','reverse_link']
# target_links.columns = ['linkid','reverse_link']
# linkids = pd.concat([source_links,target_links],ignore_index=True).drop_duplicates()
# chosen_links = df_edges.merge(linkids,on=['linkid','reverse_link'])

In [None]:
#TODO recalculate bearing
#create pseudo graph for modeling turns
df_edges, pseudo_links, pseudo_G = modeling_turns.create_pseudo_dual_graph(links,'A','B','linkid','oneway')


In [None]:
source = list(pseudo_G.nodes())[0]
target = list(pseudo_G.nodes())[420]

In [None]:
source = (68209677, 68209675)
target = (69200243, 69465418)

import networkx as nx
length, path = nx.single_source_dijkstra(pseudo_G,source,target)

In [None]:
edge_list = [(linkids[i],linkids[i+1]) for i in range(len(linkids)-1)]
edge_list

In [None]:
edge_list = [(*path[i],*path[i+1]) for i in range(len(path)-1)]
edge_list

In [None]:
pseudo_links.columns

In [None]:
pseudo_links.set_index(['source_A','source_B','target_A','target_B']).loc[edge_list,'turn_type'].value_counts()