# Retrieve Route Attributes from List of Edges IDs and the Direction Travelled
We have a list of edges or turns (depending on the settings) from Dijkstra, and we need to get the route attributes (how many turns, how much feet up, miles of bike facility, etc) to report the route characterstics of a trip.


- For link attributes we have the linkid and direction of travel (because of elevation).
- For turn attributes we just need linkid to linkid.

In [None]:
from pathlib import Path
import time
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from shapely.ops import MultiLineString
import pandas as pd
import sys
import math

# #TODO will need to fix this
# sys.path.insert(0,str(Path.cwd().parent))
# from network.src import modeling_turns

In [None]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
export_fp = Path(config['project_directory']) / 'Calibration'
cycleatl_fp = Path(config['project_directory']) / 'CycleAtlanta'
matching_fp = Path(config['project_directory']) / 'Map_Matching'
network_fp = Path(config['project_directory']) / 'Network'
if export_fp.exists() == False:
    export_fp.mkdir()

# Import Network

In [None]:
turns_df = pd.read_parquet(network_fp/'turns_df.parquet')
edges_w_attr = gpd.read_file(network_fp/'final_network.gpkg',layer='edges')

In [None]:
test = edges_w_attr.loc[edges_w_attr['name'].notna()]
test[test['name'].apply(lambda x: 'beltline' in x.lower())].plot()

In [None]:
#unit conversions
edges_w_attr['length_mi'] = (edges_w_attr['length_ft'] / 5280).round(2)
edges_w_attr['ascent_ft'] = (edges_w_attr['ascent_m'] / 3.28084).round(0)
edges_w_attr['descent_ft'] = (edges_w_attr['descent_m'] / 3.28084).round(0)
edges_w_attr.drop(columns=['length_ft','ascent_m','descent_m'],inplace=True)

In [None]:
#set index
edges_w_attr.set_index('linkid',inplace=True)
turns_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],inplace=True)

In [None]:
#get the columns that need to be reversed (within function)
ascent_columns = [col for col in edges_w_attr.columns if 'ascent' in col]
descent_columns = [col for col in edges_w_attr.columns if 'descent' in col]
bike_facils = ['facility_fwd','facility_rev']

#added a major/minor classification, everything else is just left as "road"
major_road = ['primary','secondary']
major_road = major_road + [item + '_link' for item in major_road]
minor_road = ['tertiary','unclassified','residential']
major_road = major_road + [item + '_link' for item in minor_road]
edges_w_attr.loc[edges_w_attr['highway'].isin(major_road),'link_type_new'] = 'major_road'
edges_w_attr.loc[edges_w_attr['highway'].isin(minor_road),'link_type_new'] = 'minor_road'
edges_w_attr.loc[edges_w_attr['link_type_new'].isna(),'link_type_new'] = edges_w_attr.loc[edges_w_attr['link_type_new'].isna(),'link_type']

#do a motorized vs non-motorized split
edges_w_attr['mixed_traffic'] = False
roads = ['primary', 'residential', 'service', 'secondary', 'tertiary',
       'secondary_link', 'unclassified','primary_link','tertiary_link','trunk', 'trunk_link']
edges_w_attr.loc[edges_w_attr['highway'].isin(roads),'mixed_traffic'] = True

# Map Matched

In [None]:
with (matching_fp / 'matched_0.pkl').open('rb') as fh:
    matched_traces_dict = pickle.load(fh)
with (export_fp/'ready_for_calibration.pkl').open('rb') as fh:
    ready_for_calibration = pickle.load(fh)
#get trip date for the bike facility check
for key, item in ready_for_calibration.items():
    ready_for_calibration[key]['start_time'] = matched_traces_dict.get(key,0)['trace'].iloc[0,2].year
del matched_traces_dict

In [None]:
#more than 2 links
print('Initial:',len(ready_for_calibration))
ready_for_calibration = {key:item for key, item in ready_for_calibration.items() if (item['matched_edges'].shape[0] > 2) & (item['shortest_edges'].shape[0] > 2)}
print('Successful',len(ready_for_calibration))

In [None]:
def route_attributes(tripid,match_dict_entry,edge_col):
    '''
    Two different types of summarization:

    Instance based (turns, signals, bridges, etc)

    Length based on certain tag (bike facilities)

    Cumulative (length,elevation)

    '''

    summary_attributes = {}

    summary_attributes['tripid'] = tripid

    #get trip date for the bike facility check
    trip_date_year = match_dict_entry['start_time']#trip_date_year = match_dict_entry['trace'].iloc[0,2].year

    #get route and turns
    route = [tuple(x) for x in match_dict_entry[edge_col].values]
    turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]
    
    #remove any doubling back (might be some of this in the matched dataset)
    turns = [turns for turns in turns if turns[0] != turns[2]]
    linkids = match_dict_entry[edge_col]['linkid'].tolist()
    reverse_links = match_dict_entry[edge_col]['reverse_link'].tolist()
    
    #get attributes
    route_w_attr = edges_w_attr.loc[linkids]
    turns_w_attr = turns_df.loc[turns]

    #add reverse direction
    route_w_attr['reverse_link'] = reverse_links

    #turn to gdf
    #route_w_attr = gpd.GeoDataFrame(route_w_attr,geometry='geometry',crs=config['projected_crs_epsg'])
    summary_attributes["geometry"] = MultiLineString([list(line.coords) for line in route_w_attr['geometry'].values])

    #flip relevant attributes
    route_w_attr.loc[route_w_attr['reverse_link']==True,ascent_columns+descent_columns+bike_facils] = \
        route_w_attr.loc[route_w_attr['reverse_link']==True,descent_columns+ascent_columns+bike_facils[::-1]].values

    #set the bike facility to na if the trip date was before the bike facility
    route_w_attr.loc[route_w_attr['year'] > trip_date_year,bike_facils] = np.nan
    #set no facility_fwd to nan
    route_w_attr.loc[route_w_attr['facility_fwd']=='no facility'] = None

    #summary columns
    summary_attributes["length_mi"] = route_w_attr['length_mi'].sum()
    summary_attributes["ascent_ft"] = route_w_attr['ascent_ft'].sum()
    summary_attributes["descent_ft"] = route_w_attr['descent_ft'].sum()

    # average grade by category (cut offs from broach)
    zero_to_two = (route_w_attr['ascent_grade_%'] >= 0) & (route_w_attr['ascent_grade_%'] < 2)
    two_to_four = (route_w_attr['ascent_grade_%'] >= 2) & (route_w_attr['ascent_grade_%'] < 4)
    four_to_six = (route_w_attr['ascent_grade_%'] >= 4) & (route_w_attr['ascent_grade_%'] < 6)
    six_and_beyond = (route_w_attr['ascent_grade_%'] >= 6)
    summary_attributes["(0,2]_prop"] = (route_w_attr.loc[zero_to_two,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)
    summary_attributes["(2,4]_prop"] = (route_w_attr.loc[two_to_four,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)
    summary_attributes["(4,6]_prop"] = (route_w_attr.loc[four_to_six,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)
    summary_attributes["(6,inf)_prop"] = (route_w_attr.loc[six_and_beyond,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)

    #TODO add this back in the elevation step and use the same limits?
    #add meters on grade segments (i.e. add all in length along x to x)
    #could possibly be a more accurate represntation of steep roads

    # #instance columns to summarize
    # count_cols = ['bridge','tunnel']
    # for count_col in count_cols:
    #     summary_attributes[count_col] = (route_w_attr[count_col]==True).sum().round(0)

    # length of route columns to summarize
    cols = ['mixed_traffic','facility_fwd']#['link_type_new','link_type','highway']#,'speedlimit_range_mph','lanes_per_direction']
    for col in cols:
        #make a summary column for every unique value in that column
        for unique_val in route_w_attr[col].unique():
            if isinstance(unique_val,str):
                summary_attributes[col+'.'+unique_val+'_prop'] = (route_w_attr.loc[route_w_attr[col]==unique_val,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)
            elif isinstance(unique_val,bool):
                summary_attributes[col+'.'+str(unique_val).lower()+'_prop'] = (route_w_attr.loc[route_w_attr[col]==unique_val,'length_mi'].sum() / route_w_attr['length_mi'].sum()).round(2)
            else:
                continue
        
    # turns
    summary_attributes.update(turns_w_attr['turn_type'].value_counts().to_dict())

    return summary_attributes


In [None]:
matched_summary = [route_attributes(key,item,'matched_edges') for key, item in ready_for_calibration.items()]
matched_summary = pd.DataFrame.from_records(matched_summary)
matched_summary = gpd.GeoDataFrame(matched_summary,crs=config['projected_crs_epsg'])
matched_summary.fillna(0,inplace=True)

In [None]:
shortest_summary = [route_attributes(key,item,'shortest_edges') for key, item in ready_for_calibration.items()]
shortest_summary = pd.DataFrame.from_records(shortest_summary)
shortest_summary = gpd.GeoDataFrame(shortest_summary,crs=config['projected_crs_epsg'])
shortest_summary.fillna(0,inplace=True)

In [None]:
# #calculate proportions for comparison purposes
# per_ft = ['ascent_m','descent_m','uturn','straight', 'right', 'left']
# proportion_columns = [
#     'zero_to_two_%_ft', 'two_to_four_%_ft', 'four_to_six_%_ft',
#     'six_and_beyond_%_ft', 'link_type_new.pedestrian_ft',
#     'link_type_new.bike_ft', 'link_type_new.minor_road_ft',
#     'link_type_new.sidewalk_or_crossing_ft', 'link_type_new.major_road_ft',
#     'link_type_new.service_ft', 'facility_fwd.sharrow_ft',
#     'facility_fwd.bike lane_ft', 'facility_fwd.multi use path_ft',
#     'link_type_new.parking_and_driveways_ft',
#     'facility_fwd.cycletrack_ft', 'link_type_new.road_ft',
#     'facility_fwd.no facility_ft', 'facility_fwd.buffered bike lane_ft'
#     ]
# shortest_summary.loc[:,proportion_columns] = shortest_summary.loc[:,proportion_columns].div(shortest_summary['length_ft'],axis=0)
# shortest_summary.loc[:,proportion_columns] = shortest_summary.loc[:,proportion_columns].div(shortest_summary['length_ft'],axis=0)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
matched_summary.columns

In [None]:
test = pd.merge(matched_summary[['tripid','mixed_traffic.false_prop']],shortest_summary[['tripid','mixed_traffic.false_prop']],on='tripid',suffixes=('_matched','_shortest'))

Looks like the shortest path generally has a higher proportion of the route on non mixed traffic streets
(however this might just be due to the links allowed for routing)

In [None]:
sns.kdeplot(data=test[['mixed_traffic.false_prop_matched','mixed_traffic.false_prop_shortest']],cut=0)

In [None]:
shortest_summary.columns

In [None]:
link_type_new = [col for col in shortest_summary.columns if 'facility_fwd' in col]
link_type_new

In [None]:
shortest_summary.loc[:,link_type_new].sum(axis=1).sort_values(ascending=False)

In [None]:
matched_summary[link_type_new].min()

In [None]:
sns.kdeplot(data=matched_summary[link_type_new],cut=0,bw_adjust=5)

In [None]:
sns.kdeplot(data=shortest_summary[link_type_new],cut=0,bw_adjust=5)

In [None]:
test2 = matched_summary[link_type_new] - shortest_summary[link_type_new]
#sns.kdeplot(data=test2,cut=0,bw_adjust=5)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Load an example dataset
#penguins = sns.load_dataset("penguins")

# Define the variables to plot
variables = test2.columns.tolist()#['length_mi','ascent_ft','descent_ft'] #+ proportion_columns#['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 5), constrained_layout=True)

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
for ax, variable in zip(axes, variables):
    sns.kdeplot(data=test2, x=variable, fill=True, ax=ax, cut=0)
    ax.set_title(f'KDE Plot of {variable}')

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


In [None]:
edges_w_variables

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Example dataset
# penguins = sns.load_dataset("penguins")

# Define the variables to plot
variables = test2.columns.tolist()  # ['length_mi', 'ascent_ft', 'descent_ft'] + proportion_columns

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 5), constrained_layout=True)

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
for ax, variable in zip(axes, variables):
    sns.kdeplot(data=test2, x=variable, fill=True, ax=ax, cut=0)
    
    # Get the min and max values of the variable
    min_val = test2[variable].min()
    max_val = test2[variable].max()
    
    # Calculate symmetric limits around zero
    max_abs_val = max(abs(min_val), abs(max_val))
    x_lim = (-max_abs_val, max_abs_val)

    #set varaible specific limits
    #make this a dictionary
    if variable == 'facility_fwd.cycletrack_prop':
        x_lim = (-0.1,0.1)
    
    ax.set_xlim(x_lim)
    ax.axvline(0, color='k', linestyle='--')  # Optional: add a vertical line at zero for reference
    ax.set_title(f'KDE Plot of {variable}')

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Load an example dataset
#penguins = sns.load_dataset("penguins")

# Define the variables to plot
variables = ['length_mi','ascent_ft','descent_ft'] #+ proportion_columns#['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

# Number of variables
num_vars = len(variables)

# Determine grid size (e.g., 2x2)
ncols = 3
nrows = (num_vars // ncols) + (num_vars % ncols > 0)

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 5), constrained_layout=True)

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each KDE plot on a separate subplot
for ax, variable in zip(axes, variables):
    sns.kdeplot(data=matched_summary, x=variable, fill=True, ax=ax, cut=0)
    ax.set_title(f'KDE Plot of {variable}')

# Remove any empty subplots
for ax in axes[len(variables):]:
    fig.delaxes(ax)

# Show the plot
plt.show()


In [None]:
#convert ft to miles and meters to ft
#group relevant variables and make an "other" column
#link_type, bike facility, non-motorized vs mixed traffic split

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Load an example dataset
penguins = sns.load_dataset("penguins")

# Define the variables to plot
variables = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

# Create a FacetGrid
g = sns.FacetGrid(penguins, col='species', col_wrap=3, sharex=False, sharey=False)

# Map the kdeplot onto the grid
g.map(sns.kdeplot, 'bill_length_mm', fill=True)

# Add titles to the individual plots
for ax, title in zip(g.axes.flatten(), variables):
    ax.set_title(title)

# Show the plot
plt.show()


In [None]:
# person to person density plot
# x axis is the proportion/# of distance
# y axis is the density

In [None]:
# x axis is path attribute differences

# y axis is the density

In [None]:
with (cycleatl_fp/'trips_3.pkl').open('rb') as fh:
    trips_df = pickle.load(fh)
trips_df.reset_index(drop=True,inplace=True)

In [None]:
summary_df = pd.merge(trips_df,summary_df,on='tripid')

In [None]:
#export
summary_df.to_file(matching_fp/'matched_routes_w_attributes.gpkg')

In [None]:
#TODO move on to shortest path?
summary_df.columns

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

df = summary_df.copy()

# Assuming your dataframe is named df
# Drop any rows with missing values (if any)
df.fillna(0,inplace=True)

# Define features and target variable
X = df.drop(columns=['tripid', 'geometry', 'length_ft'])
y = df['length_ft']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
regressor = DecisionTreeRegressor(random_state=42)

# Train the model
regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plot_tree(regressor, feature_names=X.columns, filled=True, rounded=True)
plt.show()


In [None]:
feature_importances = pd.Series(regressor.feature_importances_, index=X.columns).sort_values(ascending=False)
print(feature_importances)


In [None]:
import random
testtrip = random.choice(list(matched_traces_dict.keys()))
testtrip = matched_traces_dict[testtrip]

In [None]:
summary_dict = {}

#get edges
route = testtrip['edges']


In [None]:
#get route and turns
route = [tuple(x) for x in testtrip['edges'].values]
turns = [(route[i][0],route[i][1],route[i+1][0],route[i+1][1]) for i in range(0,len(route)-1)]

In [None]:
'''
Two different types of summarization:

Instance based (turns, signals, bridges, etc)

Length based on certain tag (bike facilities)

Cumulative (length,elevation)

'''

summary_attributes = {}

#get trip date for the bike facility check
trip_date_year = testtrip['trace'].iloc[0,2].year

#get route and turns
route = testtrip['edges']
turns = [(route.values[i][0],route.values[i][1],route.values[i+1][0],route.values[i+1][1]) for i in range(0,len(route.values)-1)]
turns = pd.DataFrame(turns,columns=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])


In [None]:

#retrieve attributes
route_w_attr = pd.merge(route,edges_w_attr,on='linkid')
turns_w_attr = pd.merge(turns,turns_df,on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'])

#turn to gdf
route_w_attr = gpd.GeoDataFrame(route_w_attr,geometry='geometry',crs=config['projected_crs_epsg'])
summary_attributes["geometry"] = MultiLineString(route_w_attr['geometry'].tolist())

#flip relevant attributes
route_w_attr.loc[route_w_attr['reverse_link']==True,ascent_columns+descent_columns+bike_facils] = \
    route_w_attr.loc[route_w_attr['reverse_link']==True,descent_columns+ascent_columns+bike_facils[::-1]].values

#set the bike facility to na if the trip date was before the bike facility
route_w_attr.loc[route_w_attr['year'] > trip_date_year,bike_facils] = np.nan

#summary columns
summary_attributes["length_ft"] = route_w_attr['length_ft'].sum().round(0)
summary_attributes["ascent_m"] = route_w_attr['ascent_m'].sum().round(0)
summary_attributes["descent_m"] = route_w_attr['descent_m'].sum().round(0)

# average grade by category (from broach)
zero_to_two = (route_w_attr['ascent_grade_%'] > 0) & (route_w_attr['ascent_grade_%'] <= 2)
two_to_four = (route_w_attr['ascent_grade_%'] > 2) & (route_w_attr['ascent_grade_%'] <= 4)
four_to_six = (route_w_attr['ascent_grade_%'] > 4) & (route_w_attr['ascent_grade_%'] <= 6)
six_and_beyond = (route_w_attr['ascent_grade_%'] > 6)
summary_attributes["zero_to_two_%_ft"] = route_w_attr.loc[zero_to_two,'length_ft'].sum().round(0)
summary_attributes["two_to_four_%_ft"] = route_w_attr.loc[two_to_four,'length_ft'].sum().round(0)
summary_attributes["four_to_six_%_ft"] = route_w_attr.loc[four_to_six,'length_ft'].sum().round(0)
summary_attributes["six_and_beyond_%_ft"] = route_w_attr.loc[six_and_beyond,'length_ft'].sum().round(0)

#TODO add this back in the elevation step and use the same limits?
#add meters on grade segments (i.e. add all in length along x to x)
#could possibly be a more accurate represntation of steep roads

# #instance columns to summarize
# count_cols = ['bridge','tunnel']
# for count_col in count_cols:
#     summary_attributes[count_col] = (route_w_attr[count_col]==True).sum().round(0)

# length of route columns to summarize
cols = ['link_type_new','facility_fwd']#['link_type','highway']#,'speedlimit_range_mph','lanes_per_direction']
for col in cols:
    #make a summary column for every unique value in that column
    for unique_val in route_w_attr[col].unique():
        #skip if nan
        if isinstance(unique_val,str) == False:
            continue
        summary_attributes[col+'.'+unique_val+'_ft'] = route_w_attr.loc[route_w_attr[col]==unique_val,'length_ft'].sum().round(0)
        
# turns
summary_attributes.update(turns_w_attr['turn_type'].value_counts().to_dict())


In [None]:
summary_attributes

In [None]:
df_edges['tup'] = list(zip(df_edges['linkid'],df_edges['reverse_link']))
chosen_links = df_edges.set_index('tup').loc[list_of_edges]
list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]
chosen_turns = pseudo_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link']).loc[list_of_turns]
chosen_links.columns


In [None]:
#intialize summary dict
summary_attributes = {}


In [None]:

#trip distance
summary_attributes['trip_distance_ft'] = chosen_links['length_ft'].sum()


In [None]:

#instance columns to summarize
count_cols = ['bridge','tunnel']
for count_col in count_cols:
    summary_attributes[count_col] = (chosen_links[count_col]==True).sum()

#general elevation
total_ascent = (chosen_links['ascent_m'].sum() / 3.28).round(0)
summary_attributes['ascent_ft'] = total_ascent 
summary_attributes['avg_ascent_grade'] = (total_ascent / chosen_links['length_ft'].sum() * 100).round(1)

#elevation broken by segment
elev_cols = ['(0,2]_ascent','(2,4]_ascent', '(4,6]_ascent', '(6,10]_ascent', '(10,15]_ascent','(15,inf]_ascent']
for elev_col in elev_cols:
    total_length = (chosen_links[elev_col].sum() / 3.28).round(0)
    summary_attributes[elev_col+'_ft'] = total_length

#pct of route columns to summarize
cols = ['link_type','highway','bike_facility_type','speedlimit_range_mph','lanes_per_direction']
for col in cols:
    #make a summary column for every unique value in that column
    for unique_val in chosen_links[col].unique():
        if (unique_val != None) | (unique_val == np.nan):
            total_length = chosen_links[chosen_links[col] == unique_val]['length_ft'].sum()
        else:
            continue
        if isinstance(unique_val,str) == False:
            unique_val = str(unique_val)
        summary_attributes[col+'.'+unique_val] = np.round(total_length/chosen_links['length_ft'].sum(),2)

# signalized and turns
summary_attributes['signalized'] = (chosen_turns['signalized']==True).sum()
summary_attributes['unsignalized'] = (chosen_turns['unsignalized']==True).sum()
turn_dict = chosen_turns['turn_type'].value_counts().to_dict()
summary_attributes.update(turn_dict)

summary_dict[tripid] = summary_attributes



In [None]:
summary_attributes

In [None]:
#turn into geodataframe
trips_df_info = pd.DataFrame.from_dict(summary_dict,orient='index')
trips_df_info.fillna(0,inplace=True)

In [None]:
trips_df_info

In [None]:
trips_df = trips_df.merge(trips_df_info,left_on='tripid',right_index=True)

In [None]:
#TODO find the visualiztion code that we had already made

In [None]:
# list_of_edges = list(zip(edges['linkid'],edges['reverse_link']))
# list_of_turns = [(list_of_edges[i][0],list_of_edges[i][1],list_of_edges[i+1][0],list_of_edges[i+1][1]) for i in range(0,len(list_of_edges)-1)]

# chosen_links = df_edges.set_index(['linkid','reverse_link'],drop=False).loc[list_of_edges]

# chosen_links['bridge'].value_counts()

In [None]:
#add user info
trip_and_user = pd.read_pickle(fp/'gps_traces/trip_and_user.pkl')

trips_df = trips_df_info.merge(trip_and_user,left_index=True,right_on='tripid')

In [None]:
trips_df.to_csv(fp/'all_attrs.csv',index=False)

In [None]:
#instead of visualizing here visualize elsewhere?

In [None]:

#import stochastic_optimization


In [None]:
# fp = Path.home() / 'Documents/BikewaySimData/Projects/gdot'
# #fp = Path.home() / 'Library/CloudStorage/OneDrive-GeorgiaInstituteofTechnology/BikewaySim/Data'

# with (fp / 'impedance_calibration.pkl').open('rb') as fh:
#     (df_edges,pseudo_df,pseudo_G) = pickle.load(fh)

In [None]:
# Retrieve link/turn costs
# default below is link distance
# link_costs = dict(zip(list(zip(df_edges['source'],df_edges['target'],df_edges['linkid'])),df_edges['length_ft']))
# tup = list(zip(pseudo_df['source_A'],pseudo_df['source_B'],pseudo_df['source_linkid']))
# pseudo_df['source_cost'] = list(map(link_costs.get,tup))
# tup = list(zip(pseudo_df['target_A'],pseudo_df['target_B'],pseudo_df['target_linkid']))

# pseudo_df['target_cost'] = list(map(link_costs.get,tup))
# pseudo_df['total_cost'] = pseudo_df['source_cost'] + pseudo_df['target_cost'] #+turn_cost

# costs = pseudo_df.groupby(['source','target'])['total_cost'].min()
# nx.set_edge_attributes(pseudo_G,values=costs,name='weight')
# source = list(pseudo_G.nodes())[0]
# target = list(pseudo_G.nodes())[420]
# print(source,target)
# import networkx as nx
# length, edge_list = nx.single_source_dijkstra(pseudo_G,source,target,weight="weight")
# turn_list = [[edge_list[i][0],edge_list[i][1],edge_list[i+1][0],edge_list[i+1][1]] for i in range(len(edge_list)-1)]

# turn_cols = ['turn_type','signalized_left_straight','unsignalized_left_straight_nonlocal']
# linkid_cols = ['source_linkid','source_reverse_link','target_linkid','target_reverse_link']
# chosen_turns = pseudo_df.set_index(['source_A','source_B','target_A','target_B'],drop=False).loc[turn_list,linkid_cols+turn_cols]

# tripid = 302

# #make a single row dataframe to attach to trips_df
# stats_dict = {}
# stats_dict[tripid] = {
#     'tripid':tripid,
#     'signalized_left_straight': chosen_turns['signalized_left_straight'].sum(),
#     'unsignalized_left_straight_nonlocal': chosen_turns['unsignalized_left_straight_nonlocal'].sum()
# }
# turn_dict = chosen_turns['turn_type'].value_counts().to_dict()
# stats_dict[tripid].update(turn_dict)

# Case 1: Need to run shortest paths to create link sequence

In [None]:
# source_links = chosen_turns[['source_linkid','source_reverse_link']]
# target_links = chosen_turns[['target_linkid','target_reverse_link']]
# source_links.columns = ['linkid','reverse_link']
# target_links.columns = ['linkid','reverse_link']
# linkids = pd.concat([source_links,target_links],ignore_index=True).drop_duplicates()
# chosen_links = df_edges.merge(linkids,on=['linkid','reverse_link'])

In [None]:
#TODO recalculate bearing
#create pseudo graph for modeling turns
df_edges, pseudo_links, pseudo_G = modeling_turns.create_pseudo_dual_graph(links,'A','B','linkid','oneway')


In [None]:
source = list(pseudo_G.nodes())[0]
target = list(pseudo_G.nodes())[420]

In [None]:
source = (68209677, 68209675)
target = (69200243, 69465418)

import networkx as nx
length, path = nx.single_source_dijkstra(pseudo_G,source,target)

In [None]:
edge_list = [(linkids[i],linkids[i+1]) for i in range(len(linkids)-1)]
edge_list

In [None]:
edge_list = [(*path[i],*path[i+1]) for i in range(len(path)-1)]
edge_list

In [None]:
pseudo_links.columns

In [None]:
pseudo_links.set_index(['source_A','source_B','target_A','target_B']).loc[edge_list,'turn_type'].value_counts()