# Network Prepare
This step adds in the supplemental attributes prepared in the previous notebooks (e.g., traffic signals, bicycle facilities (with approx. install dates), elevation, etc.). Links where cyclists are absolutely not allowed to travel (Interstates) are removed. A psuedo dual graph for modeling turn movements is created, and the links, nodes, and turns are exported for further processing in the `impedance_calibration` module.

For the GDOT/NCST projects, the following attributes were available: (TURN THIS INTO A TABLE LIKE IN THE REPORT LATER)
- Length
- Grade/Elevation
- Bike Facility w Dates
- Oneway
- Signals
- AADT
- Truck %
- Lanes
- Speed Category

In [1]:
import geopandas as gpd
from pathlib import Path
import numpy as np
import pandas as pd
import pickle
from shapely.ops import Point

from bikewaysim.paths import config
from bikewaysim.network import modeling_turns, add_attributes, prepare_network

## Import network links and add attributes back

In [2]:
links = gpd.read_file(config['network_fp'] /'networks.gpkg',layer='osm_links')
og_cols = links.columns
nodes = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_nodes')

#TODO modify network filter to stop adding the network name like this
links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
nodes.rename(columns={'osm_N':'N'},inplace=True)

#calculate link lengths
links['length_ft'] = links.length

#basic stats
print(links.shape[0],'links',(links.length.sum() / 5280).round(0),'miles',nodes.shape[0],'nodes')

#types and lengths
summary_df = pd.DataFrame({'size':links['link_type'].value_counts(),
                           'length_mi':links.groupby('link_type')['geometry'].apply(lambda x: x.length.sum() / 5280),
                           'length_pct':links.groupby('link_type')['geometry'].apply(lambda x: x.length.sum()) / links.length.sum() * 100})
print(summary_df.sort_values('length_mi',ascending=False))

#add osm attributes back (especially the oneway column)
osm_attrs = gpd.read_file(config['osmdwnld_fp'] / f"osm.gpkg",layer='raw')

# # get basic stats
osm_attrs.to_crs(links.crs,inplace=True)
print('Raw network is',(osm_attrs.length / 5280).sum().round(0),'miles')
links = pd.merge(links,osm_attrs.drop(columns=['oneway','geometry']),on='osmid')
del osm_attrs

197721 links 6892.0 miles 150880 nodes
                         size    length_mi  length_pct
road                    70233  3286.461080   47.685569
service                 49685  1118.567079   16.230074
parking_and_driveways   37505  1072.045948   15.555067
sidewalk                22633   543.082566    7.879966
restricted_access_road   2014   256.544600    3.722386
pedestrian              11622   243.736461    3.536543
no_bike                  1367   237.511755    3.446225
bike                     2593   127.630901    1.851886
no_access_or_private       18     3.367666    0.048864
Raw network is 6938.0 miles


# Add bicycle infrastructure and approximate date of opening

In [3]:
cycling_infra_dates = gpd.read_file(config['bicycle_facilities_fp']/'osm_cycleways_w_dates.gpkg',layer='dates_network')
links = pd.merge(links,cycling_infra_dates[['osm_linkid','facility_fwd','facility_rev','facility','year']],left_on='linkid',right_on='osm_linkid',how='left')
links.drop(columns=['osm_linkid'],inplace=True)
(links.groupby(['facility','year'])['length_ft'].sum() / 5280).round(2)

facility            year  
bike lane           2004.0     2.44
                    2006.0     0.45
                    2007.0     4.95
                    2008.0    16.52
                    2010.0     4.65
                    2011.0     8.29
                    2012.0     4.09
                    2013.0     2.62
                    2014.0     5.37
                    2015.0     2.37
                    2016.0     9.71
                    2017.0     1.06
                    2018.0     6.49
                    2019.0     2.70
                    2020.0     2.88
                    2021.0     3.62
                    2022.0     0.07
                    2023.0     0.53
buffered bike lane  2004.0     0.01
                    2008.0     0.07
                    2012.0     0.16
                    2013.0     0.85
                    2014.0     0.31
                    2015.0     0.54
                    2016.0     0.24
                    2017.0     0.48
                    2018.0     1.03
 

## Remove off street infrastructure built after 2016
Some of these may have still existed as informal dirt paths (Beltline). In that case add them back in manually.

In [4]:
max_year = 2016
max_year_cond = links['year'] > max_year

links.loc[max_year_cond].to_file(config['bicycle_facilities_fp']/'removed_bicycle_infra.gpkg')

# # remove infra before 2016 so it doesn't match to these
links.loc[max_year_cond & (links['link_type']=='road'),'facility_fwd'] = None
links.loc[max_year_cond & (links['link_type']=='road'),'facility_rev'] = None
links.loc[max_year_cond & (links['link_type']=='road'),'facility'] = None

#TODO reimplement this but don't remove links from the link database, just the graph so that we can do it dynamically
after = links['facility'].isin(['cycletrack','multi use path']) & \
          (links['link_type']!='road') & \
          links['year'].notna() & \
          (links['year']>max_year)
links = links[after==False]

# set no facility values to null
links.loc[links['facility_fwd'] == 'no facility','facility_fwd'] = None
links.loc[links['facility_rev'] == 'no facility','facility_rev'] = None

# nans to None
links.loc[links['facility_fwd'].isna(),'facility_fwd'] = None
links.loc[links['facility_rev'].isna(),'facility_rev'] = None
links.loc[links['facility'].isna(),'facility'] = None

# save in new column for reference for comparison
# links['future_facility'] = links['facility_fwd']

## Sidepaths
Add adjacent multi-use paths and cycletracks to roads as an attribute and vice versa. Think Stone Mountain Trail or Beltline next to Wylie Street.

In [5]:
# subset to bike infra and roads
mups_and_cycletracks = cycling_infra_dates.loc[cycling_infra_dates['link_type']!='road',['osm_linkid','facility','year','geometry']]
mups_and_cycletracks.rename(columns={'osm_linkid':'sidepath_linkid','facility':'sidepath','year':'sidepath_year'},inplace=True)
roads = links.loc[links['link_type']=='road',['linkid','geometry']].copy()
roads['og_length'] = roads.length

# get azimuth for getting angle change
roads.to_crs('epsg:4326',inplace=True)
roads[['fwd_azimuth','bck_azimuth']] = roads.apply(lambda row: modeling_turns.find_azimuth(row),axis=1)
roads.to_crs(config['projected_crs_epsg'],inplace=True)

mups_and_cycletracks.to_crs('epsg:4326',inplace=True)
mups_and_cycletracks[['fwd_azimuth','bck_azimuth']] = mups_and_cycletracks.apply(lambda row: modeling_turns.find_azimuth(row),axis=1)
mups_and_cycletracks.to_crs(config['projected_crs_epsg'],inplace=True)

# buffer by small amount
buffer_ft = 50
mups_and_cycletracks.set_geometry(mups_and_cycletracks.buffer(buffer_ft),inplace=True)

# intersect
intersection = gpd.overlay(roads,mups_and_cycletracks)

# calculate coverage and angle change (hausdorff distance returns too many false positives)
intersection['new_length'] = intersection.length
intersection['ratio'] = intersection['new_length']/intersection['og_length']

# angle difference (take min to account for direction differences)
intersection['diff1'] = np.abs(intersection['fwd_azimuth_1'] - intersection['bck_azimuth_2'])
intersection['diff2'] = np.abs(intersection['fwd_azimuth_1'] - intersection['fwd_azimuth_2'])
intersection['mindiff'] = intersection[['diff1','diff2']].min(axis=1)

# set minimum conditions for accepting
cond0 = intersection['ratio'] > 0.95 # this much coverage of the original link
cond1 = intersection['mindiff'] < 30 # no more than this change in angle
intersection = intersection[cond0&cond1]

# just take the one with the most overlap after that
has_sidepath = intersection.loc[intersection.groupby('linkid')['ratio'].idxmax()]#,['linkid','sidepath','sideear','geometry']]

# buffer the sidepaths again and perform unary union to get the connected parts
connected_parts = gpd.GeoDataFrame({'geometry':has_sidepath.buffer(50).unary_union},crs=config['projected_crs_epsg']).reset_index()

# intersect with the has_sidepath layer again and group by
# need 400 feet next to a road to count as sidepath
intersect2 = gpd.overlay(has_sidepath,connected_parts)
intersect2['adjacent_length_ft'] = intersect2.groupby('index')['og_length'].transform(sum)
# intersect2[intersect2['adjacent_length_ft']>400].explore()
final_sidepaths = intersect2.loc[intersect2['adjacent_length_ft']>400,['linkid','sidepath_linkid','sidepath','sidepath_year','geometry']]
# final_sidepaths
# final_sidepaths.explore()
final_sidepaths.to_file(config['bicycle_facilities_fp']/'sidepaths.gpkg',layer='sidepaths')

In [6]:
# merge back into main network dataframe
links = pd.merge(links,final_sidepaths.drop(columns='geometry'),on='linkid',how='left')

In [7]:
# assign the facility to the road if it doesn't already have facility
# links0 = links.copy()
cond = links[['sidepath_linkid']].notna().all(axis=1) & links[['facility_fwd','facility_rev','facility']].isna().all(axis=1)
links.loc[cond,['facility_fwd','facility_rev','facility']] = links.loc[cond,['sidepath','sidepath','sidepath']].values
# assign year if there is one
# cond = links['sidepath_year'].notna() & links['year'].isna()
links.loc[cond,'year'] = links['sidepath_year']

# assign the road attributes to the sidepath (i.e., attach the adjacent road linkid) (future step)

# Add Network Improvements (in development)

Atlanta Example

In [8]:
# improvements = gpd.read_file(config['bicycle_facilities_fp']/'network_improvements.gpkg',layer='coa',ignore_geometry=True)
# links = pd.merge(links,improvements,left_on='osm_linkid',right_on='linkid',how='left')
# links.drop(columns=['linkid'],inplace=True)

Savannah

In [9]:
# improvements = gpd.read_file(config['bicycle_facilities_fp']/'network_improvements.gpkg',layer='savannah',ignore_geometry=True)
# links = pd.merge(links,improvements,on='osm_linkid',how='left')
# links.drop(columns=['linkid'],inplace=True)

# Add GDOT data
GDOT provides # of lanes data, AADT, and truck %.

In [10]:
gdot_lanes = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="gdot_lanes",ignore_geometry=True)
gdot_traffic = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="gdot_traffic",ignore_geometry=True)

links = pd.merge(links,gdot_lanes,on="osmid",how='left')
links = pd.merge(links,gdot_traffic,on='osmid',how='left')

Handle null aadt

In [11]:
# this to explore na links
# links[(links['link_type']=='road')&links['AADT'].isna()].explore()
# give residential roads the lowest aadt category or below
links.loc[links['AADT'].isna() &
          (links['highway'].isin(['residential','service','unclassified','living_street'])) &
          (links['link_type']=='road'),'AADT'] = '[0,4k)'
# all others the middle category
links.loc[links['AADT'].isna() & (links['link_type']=='road'),'AADT'] = '[4k,10k)'
# any remaining nulls (bike paths, service roads, parking lots, get the lowest category)
links.loc[links['AADT'].isna(),'AADT'] = '[0,4k)'

#turn it into categorical data
links['AADT'] = pd.Categorical(links['AADT'],ordered=True,categories=['[0,4k)','[4k,10k)','[10k,inf)'])

# Add HERE data (SKIP IF NO HERE DATA)
HERE provides speed and lanes data.

In [12]:
here = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="here",ignore_geometry=True)
links = pd.merge(links,here,left_on='linkid',right_on='osm_linkid',how='left')

Handling null speeds

In [13]:
# this to explore na links
# links[(links['link_type']=='road')&links['here_speed'].isna()].explore()
# links[links['here_speed'].isna()&(links['link_type']=='road')]['highway'].unique()
# give residential roads a speed limit of 30 or below
links.loc[links['here_speed'].isna() &
          (links['highway'].isin(['residential','service','unclassified','living_street'])) &
          (links['link_type']=='road'),'here_speed'] = '[0,30]'
# all others get 30 +
links.loc[links['here_speed'].isna() & (links['link_type']=='road'),'here_speed'] = '(30,40]'
# any remaining nulls (bike paths, service roads, parking lots, get a speed limit of 30 or below)
links.loc[links['here_speed'].isna(),'here_speed'] = '[0,30]'
links.rename(columns={'here_speed':'speed'},inplace=True)

In [14]:
links['speed'] = pd.Categorical(links['speed'],ordered=True,categories=['[0,30]', '(30,40]', '(40,inf)'])

# Resolve GDOT/HERE lanes data
- All non-road links get a 1 (doing this so lanes attribute isn't being confounded with vehicle access)
- By direction is too detailed, use a per direction estimate (i.e. treat a 5 lane oneway road the same as a 10 lane twoway road or a 5 lane per direction)
- Simplify to:
    - 1 lane per direction
    - 2 lanes per direction
    - 3+ lanes per direction
- If unequal number of lanes use direction that would result in the higher category
    - Example: 10th Street NE would be 2 lanes per direction because it has 2/1 lanes by direction
- Turn lanes (middle, right, etc) are NOT counted in HERE or GDOT

In [15]:
#TODO figure out what to do when there is a disrepency between the lanes
## Examine where there's a big mismatch between HERE and GDOT
# - There are a couple of cases where GDOT will be way off, like North Highland Ave NE which shows up as having four lanes when it's mostly 2 lanes for most of its length.
# - Memorial Drive is also marked as having four lanes but it was road dieted post 2016, so just use the old value.
# - Unless it's a residential street or a few cases that were identified, use the GDOT values.

In [16]:
#give everything a default value of 1 (before we would give non-motorized links a value of 0)
links['lanes'] = 1

#if one is null take the non null value
links.loc[links['here_lanes'].isna() & links['gdot_lanes'].notna(),'lanes'] = links['gdot_lanes']
links.loc[links['here_lanes'].notna() & links['gdot_lanes'].isna(),'lanes'] = links['here_lanes']

#otherwise choose whichever is smaller
links.loc[links['here_lanes'].notna() & links['gdot_lanes'].notna(),'lanes'] = links[['here_lanes','gdot_lanes']].min(axis=1)

#drop to trim down the df
links.drop(columns=['gdot_lanes','here_lanes'],inplace=True)

# Add elevation data
Assign the correct direction for reverse links later

In [17]:
elevation = gpd.read_file(config['network_fp']/'elevation.gpkg',layer='elevation',ignore_geometry=True)
elevation = elevation[['linkid','ascent_ft','descent_ft','ascent_grade_cat','descent_grade_cat']]
links = pd.merge(links,elevation,on='linkid',how='left')
# del elevation

In [18]:
#set ascent grade and descent grade to zero
links.loc[links['ascent_grade_cat'].isna(),'ascent_grade_cat'] = '[0,4)'
links.loc[links['descent_grade_cat'].isna(),'descent_grade_cat'] = '[0,4)'
links.loc[:,['ascent_ft','descent_ft']] = links.loc[:,['ascent_ft','descent_ft']].fillna(0).values

In [19]:
# #for remaining bridge where lidar data was not available set the grade to 0 if grade exceeds 10 percent
# links.loc[(links['bridge'] == 'yes') & (links['ascent_grade_%'] > 10),'ascent_grade_%'] = 0
# #also for tunnels
# links.loc[(links['tunnel'] == 'yes'),'ascent_grade_%'] = 0

# Create reverse links and turn dataframe

In [20]:
#TODO change this to not create the turn graph (just make it an extra optional step)
## Create turn graph dataframe
from importlib import reload
reload(modeling_turns)
directed_links, turns_df = modeling_turns.create_pseudo_dual_graph(links,'A','B','linkid','oneway')

# find the degree of the intersection node and re-classify anything with degree 2 as straight turn movement?
# what about interstate exits that got removed?
from collections import Counter
node_degree = dict(Counter(links['A'].tolist()+links['B'].tolist()))
turns_df['node_degree'] = turns_df['source_B'].map(node_degree)
# turns_df[turns_df['node_degree']==2,'turn_type'] = 'straight'

# Add signals from OSM and GDOT to turns

In [21]:
osm_signals = pd.read_parquet(config['network_fp']/'osm_signals.parquet')[['source_linkid','source_reverse_link','target_linkid','target_reverse_link']]
gdot_signals = pd.read_parquet(config['network_fp']/'gdot_signals.parquet')[['source_linkid','source_reverse_link','target_linkid','target_reverse_link']]

osm_signals = set([tuple(x) for x in osm_signals.values])
gdot_signals = set([tuple(x) for x in gdot_signals.values])
added_signals = set.union(osm_signals,gdot_signals)

turns_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],inplace=True)

added_signals = set.intersection(set(turns_df.index.tolist()),added_signals)
turns_df.loc[added_signals,'signalized'] = True
turns_df.loc[turns_df['signalized'].isna(),'signalized'] = False

turns_df.reset_index(inplace=True)

turns_df['signalized'].value_counts()

False    747724
True      18744
Name: signalized, dtype: int64

# Add in cross street variables

In [22]:
# attach speed, lanes, AADT, and osm hihgway
link_attrs = links.copy()[['linkid','highway','link_type','lanes','AADT','speed']]
link_attrs.set_index('linkid',inplace=True)

source_cols = ['source_' + x for x in link_attrs.columns]
target_cols = ['target_' + x for x in link_attrs.columns]

link_attrs.columns = source_cols
turns_df = pd.merge(turns_df,link_attrs,left_on='source_linkid',right_index=True,how='left')
link_attrs.columns = target_cols
turns_df = pd.merge(turns_df,link_attrs,left_on='target_linkid',right_index=True,how='left')
turns_df

Unnamed: 0,source_linkid,source_reverse_link,target_linkid,target_reverse_link,source_A,source_B,target_A,target_B,source_azimuth,target_azimuth,...,source_highway,source_link_type,source_lanes,source_AADT,source_speed,target_highway,target_link_type,target_lanes,target_AADT,target_speed
0,32689,False,13803,False,315350788,69123159,69123159,69116629,250.7,233.9,...,motorway,no_bike,1.0,"[0,4k)","[0,30]",motorway,no_bike,1.0,"[0,4k)","[0,30]"
1,38135,False,13803,False,819937163,69123159,69123159,69116629,272.0,233.9,...,motorway_link,restricted_access_road,1.0,"[0,4k)","[0,30]",motorway,no_bike,1.0,"[0,4k)","[0,30]"
2,13803,True,32689,True,69116629,69123159,69123159,315350788,53.9,70.7,...,motorway,no_bike,1.0,"[0,4k)","[0,30]",motorway,no_bike,1.0,"[0,4k)","[0,30]"
3,38135,False,32689,True,819937163,69123159,69123159,315350788,272.0,70.7,...,motorway_link,restricted_access_road,1.0,"[0,4k)","[0,30]",motorway,no_bike,1.0,"[0,4k)","[0,30]"
4,13803,True,38135,True,69116629,69123159,69123159,819937163,53.9,92.0,...,motorway,no_bike,1.0,"[0,4k)","[0,30]",motorway_link,restricted_access_road,1.0,"[0,4k)","[0,30]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
766463,185139,True,185120,True,9517568994,9517568993,9517568993,9517568962,245.3,253.1,...,footway,sidewalk,1.0,"[0,4k)","[0,30]",footway,sidewalk,1.0,"[0,4k)","[0,30]"
766464,185120,False,185140,False,9517568962,9517568993,9517568993,9517577621,73.1,151.8,...,footway,sidewalk,1.0,"[0,4k)","[0,30]",steps,pedestrian,1.0,"[0,4k)","[0,30]"
766465,185139,True,185140,False,9517568994,9517568993,9517568993,9517577621,245.3,151.8,...,footway,sidewalk,1.0,"[0,4k)","[0,30]",steps,pedestrian,1.0,"[0,4k)","[0,30]"
766466,184645,False,184646,False,9501667031,9501667032,9501667032,9501667033,241.1,241.4,...,footway,sidewalk,1.0,"[0,4k)","[0,30]",footway,sidewalk,1.0,"[0,4k)","[0,30]"


In [23]:
# cross street would be to the left or right
cond1 = turns_df['turn_type'].isin(['left','right'])

#only road to road for now
cond2 = (turns_df['source_link_type'] == 'road') & (turns_df['target_link_type'] == 'road')
cross_streets = turns_df[cond1&cond2]

# get the worst possible cross street attribute
cross_streets = cross_streets.groupby(['source_linkid','source_reverse_link'])['target_AADT','target_lanes','target_speed'].max()
cross_streets.columns = ['cross_AADT','cross_lanes','cross_speed']
test = turns_df.merge(cross_streets,left_on=['source_linkid','source_reverse_link'],right_index=True)#,how='left')

In [24]:
# a stressful turn would be
aadt_cross_cond = test['cross_AADT'] == '[10k,inf)'
lanes_cross_cond = test['cross_lanes'] > 2
speed_cross_cond = test['cross_speed'] >= '(30,40]'
cross_high_stress = aadt_cross_cond | lanes_cross_cond | speed_cross_cond

# if the source street has these stats then assume that there is a signal
aadt_source_cond = test['source_AADT'] == '[10k,inf)'
lanes_source_cond = test['source_lanes'] > 2
speed_source_cond = test['source_speed'] >= '(30,40]'
source_high_stress = aadt_source_cond | lanes_source_cond | speed_source_cond

test['unsig_crossing'] = False
test.loc[(source_high_stress==False) & cross_high_stress & (test['signalized']==False) & (test['turn_type'].isin(['straight','left'])),'unsig_crossing'] = True

In [25]:
# add unsignalized crossing variable back in
turns_df = pd.merge(turns_df,test[['source_linkid','source_reverse_link','target_linkid','target_reverse_link','unsig_crossing']],on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],how='left')

In [26]:
# #optional add geo data to turns and export for examination
# reload(modeling_turns)
# cross_streets_gdf = modeling_turns.turn_gdf(links,test)
# for idx,x in enumerate(cross_streets_gdf.dtypes):
#     if (str(x) == "category") | (str(x)=='object'):
#         cross_streets_gdf.iloc[:,idx] = cross_streets_gdf.iloc[:,idx].astype(str)
# cross_streets_gdf.to_file(config['network_fp']/'scratch.gpkg',layer='cross_streets')

In [27]:
# nodes[nodes['N'].isin(set(cross_streets_gdf.loc[cross_streets_gdf['unsig_crossing']==True,'source_B'].tolist()))].explore() # looks much more reasonable

In [28]:
# not sure hwat i was going for here
# # get the worst possible cross street attribute
# # cross_streets.groupby(['source_linkid','source_reverse_link'])['target_AADT','target_lanes','target_speed'].idxmax()
# cross_streets.loc[18242]
# cross_streets.groupby(['source_linkid','source_reverse_link'])['target_speed'].max()


# # find the worst cross street if there are multiple
# cross_streets.groupby(['source_linkid','source_reverse_link'])['aadt'].apply(lambda x: aadt_order)


# # TODO do this for the other variables too
# cols = ['AADT','lanes','speed']
# for x in cols:
#     print(links[x].unique())
# # rules for high stress turns

# #Major/minor road classification to create high traffic stress variable
# major_road_values = ['primary','secondary']
# major_road_values = major_road_values + [item + '_link' for item in major_road_values]
# minor_road_values = ['tertiary','unclassified','residential','service','trunk','living_street','service']
# minor_road_values = minor_road_values + [item + '_link' for item in minor_road_values]

# #traffic

# #override major road if only one lane per direction
# major_road = set(links.loc[links['highway'].isin(major_road_values) & (links['lanes'] >= 2),'linkid'].tolist())
# minor_road = set(links.loc[links['highway'].isin(minor_road_values) | \
#                            (links['highway'].isin(major_road_values) & (links['lanes'] < 2) ), 
#                             'linkid'].tolist())
# cross_streets.loc[cross_streets['source_linkid']==3]
# print(links[links['linkid']==3].squeeze())
# links[links['linkid']==3].explore()

# # grouby the source link
# cross_streets.groupby(['source_linkid'])['target_highway'].agg(list)#['target_highway_order'].min()

# cross_streets.name = 'cross_street'

# #add to main df
# pd.merge(turns_df,cross_streets,left_on=['source_linkid','source_A','source_B'],right_index=True,how='left')

# # wasn't able to get major/minor to be significant
# #Major/minor road classification to create high traffic stress variable
# major_road_values = ['primary','secondary']
# major_road_values = major_road_values + [item + '_link' for item in major_road_values]
# minor_road_values = ['tertiary','unclassified','residential','service','trunk','living_street','service']
# minor_road_values = minor_road_values + [item + '_link' for item in minor_road_values]

# #override major road if only one lane per direction
# major_road = set(links.loc[links['highway'].isin(major_road_values) & (links['lanes'] >= 2),'linkid'].tolist())
# minor_road = set(links.loc[links['highway'].isin(minor_road_values) | links['lanes'] < 2,'linkid'].tolist())

# #unsignalized straight/left turn where crossing street is a major road
# turns_df['unsig_major_road_crossing'] = (turns_df['signalized']==False) & \
#     turns_df['target_linkid'].isin(major_road) & \
#     turns_df['source_linkid'].isin(minor_road) & \
#     turns_df['turn_type'].isin(['left','straight'])

# # #sets turns that are not from road to road to None, effectively ignoring them
# # exclude = ['road','service']
# # turns_df.loc[(turns_df['source_link_type'].isin(exclude)==False) & 
# #              (turns_df['target_link_type'].isin(exclude)==False),'turn_type'] = None

In [29]:
#create layer of unsignalized crossings for examining
unsig_major_road_crossing = set(turns_df.loc[turns_df['unsig_crossing']==True,'source_B'].tolist())
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
nodes = nodes[nodes['N'].isin(unsig_major_road_crossing)]
nodes.to_file(config['calibration_fp']/'unsig_major_road_crossing.gpkg')

# Remove Interstates and Private Links
Remove these because we're absolutely sure we don't want bikes on these links.

In [30]:
print(links['link_type'].unique())
remove = ['no_access_or_private','restricted_access_road','no_bike']

['no_bike' 'restricted_access_road' 'service' 'parking_and_driveways'
 'road' 'bike' 'pedestrian' 'sidewalk' None 'no_access_or_private']


In [31]:
remove_cond = links['link_type'].isin(remove)
links = links[remove_cond==False]
print(remove_cond.sum(),'links removed')

3399 links removed


# Remove isolated links

In [32]:
links, nodes = prepare_network.largest_comp_and_simplify(links,nodes)

Before connected components: Links 193722 Nodes 4245
After connected components: Links 193305 Nodes 4245


# Export

In [33]:
final_cols = ['A', 'B', 'linkid', 'oneway', 'link_type', 'osmid', 'geometry',
       'length_ft', 'highway', 'name','all_tags',
       'facility_fwd', 'facility_rev', 'facility', 'year', 'sidepath',
       'sidepath_year', 'route_type', 'AADT',
       'speed', 'lanes', 'ascent_ft', 'descent_ft', 'ascent_grade_cat',
       'descent_grade_cat']

#TODO create a function for performing this
for idx,x in enumerate(links.dtypes):
    if (str(x) == "category"): #| (str(x)=='object'):
        links.iloc[:,idx] = links.iloc[:,idx].astype(str)

links[final_cols].to_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes.to_file(config['network_fp']/'final_network.gpkg',layer='nodes')

In [34]:
# add directional attributes and flip as needed
ascent_columns = ['ascent_ft', 'ascent_grade_cat']
descent_columns = ['descent_ft', 'descent_grade_cat']
directed_links = pd.merge(directed_links,links[['linkid','facility_fwd','facility_rev']+ascent_columns+descent_columns],on='linkid')
directed_links.loc[directed_links['reverse_link']==True,ascent_columns+descent_columns] = directed_links.loc[directed_links['reverse_link']==True,descent_columns+ascent_columns].values
directed_links.loc[directed_links['reverse_link']==True,['facility_fwd','facility_rev']] = directed_links.loc[directed_links['reverse_link']==True,['facility_rev','facility_fwd']].values

In [35]:
# tuple columns not compatible with parquet
# turns_df.drop(columns=['source','target'],inplace=True)

In [36]:
#TODO pickles later
turns_df.to_parquet(config['network_fp']/'turns_df.parquet')
directed_links.to_parquet(config['network_fp']/'directed_edges.parquet')

In [37]:
# #optional add geo data to turns and export for examination
# from shapely.ops import MultiLineString
# geo_dict = dict(zip(links['linkid'],links['geometry']))
# turns_df['source_geo'] = turns_df['source_linkid'].map(geo_dict)
# turns_df['target_geo'] = turns_df['target_linkid'].map(geo_dict)
# turns_df['geometry'] = turns_df.apply(lambda row: MultiLineString([row['source_geo'],row['target_geo']]),axis=1)
# turns_df.drop(columns=['source_geo','target_geo'],inplace=True)
# turns_gdf = gpd.GeoDataFrame(turns_df,crs=links.crs)
# # turns_gdf.drop(columns=['source','target'])
# turns_gdf.to_file(config['network_fp']/'final_network.gpkg',layer='turns')

In [38]:
# #TODO serialize the attributes to add as needed?
# with (config['network_fp'] / 'edges_with_attributes.pkl').open('wb') as fh:
#     pickle.dump(links,fh)

In [39]:
# with (config['network_fp'] / 'edges.pkl').open('wb') as fh:
#     pickle.dump(links,fh,protocol=pickle.HIGHEST_PROTOCOL)
# with (config['network_fp'] / 'nodes.pkl').open('wb') as fh:
#     pickle.dump(nodes,fh,protocol=pickle.HIGHEST_PROTOCOL)
# with (config['network_fp'] / 'directed_edges.pkl').open('wb') as fh:
#     pickle.dump(edges,fh,protocol=pickle.HIGHEST_PROTOCOL)
# with (config['network_fp'] / 'turn_df.pkl').open('wb') as fh:
#     pickle.dump(turn_df,fh,protocol=pickle.HIGHEST_PROTOCOL)

# Deprecated past here

In [40]:

# #add attributes back and then flip elevation/bicyccle attributes
# #do so i don't have to re-flip everytime i import? could potentially save memory though
# #TODO it would still be smarter to store as a dict or something
# edges
# links.columns
# edges = pd.merge(edges,links.drop(columns=['A','B']),on='linkid')
# #if reverse_geo == true then ascent should be descent and vice versa
# # loops have reverse_geometry is np.nana
# # assume that all elevation columns will be paired by what is after ascent/descent
# elevation_columns = ['ascent_m', 'descent_m', 'ascent_grade_%','descent_grade_%']
# # Remove elements containing "ascent" or "descent"
# cleaned_columns = [col for col in elevation_columns if "ascent" not in col and "descent" not in col]    
# # Remove duplicates by converting the list to a set and back to a list
# cleaned_columns = list(set(cleaned_columns))

# for cleaned_column in cleaned_columns:
#     #swap if reverse geometry == true
#     links.loc[links['reverse_geometry']==True,ascent_columns+descent_columns] = links.loc[links['reverse_geometry']==True,descent_columns+ascent_columns]
    
    
#     df_edges[] = np.where(df_edges['reverse_link'], df_edges[elev_columns[1]].abs(), df_edges[elev_columns[0]])
#     #drop the down version?
#     df_edges.drop(columns=elev_columns[1],inplace=True)
# #if reverse_geo == true then ascent should be descent and vice versa
# # loops have reverse_geometry is np.nan
# # assume that all elevation columns will be paired by what is after ascent/descent
# elevation_columns = ['ascent_m', 'descent_m', 'ascent_grade_%','descent_grade_%']
# # Remove elements containing "ascent" or "descent"
# cleaned_columns = [col for col in elevation_columns if "ascent" not in col and "descent" not in col]    
# # Remove duplicates by converting the list to a set and back to a list
# cleaned_columns = list(set(cleaned_columns))

# for cleaned_column in cleaned_columns:
#     #swap
    
    
    
#     df_edges[] = np.where(df_edges['reverse_link'], df_edges[elev_columns[1]].abs(), df_edges[elev_columns[0]])
#     #drop the down version?
#     df_edges.drop(columns=elev_columns[1],inplace=True)
# ## Rename columns

# links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
# nodes.rename(columns={'osm_N':'N'},inplace=True)
# ## 
# ## Create turn graph dataframe
# edges, turn_df = modeling_turns.create_pseudo_dual_graph(links,'A','B','linkid','oneway')
# ## Flip attributes if needed (elevation, bicycle facilities)
# Turns should be good as is
# #add geo (needed for map matching part)
# df_edges = df_edges.merge(links.drop(columns=['A','B']),on=['linkid'])
# df_edges = gpd.GeoDataFrame(df_edges,geometry='geometry',crs=links.crs)
# df_edges = df_edges.loc[:,~df_edges.columns.duplicated()].copy()
# df_edges.reset_index(drop=True,inplace=True)
# #just export the df_edges?
# df_edges.to_file(export_fp/'Map_Matching/matching.gpkg',layer='edges')
# nodes.to_file(export_fp/'Map_Matching/matching.gpkg',layer='nodes')
# pseudo_df.columns
# #add geo to the turns too
# from shapely.ops import MultiLineString
# pseudo_df = pseudo_df.merge(links[['linkid','geometry']],left_on='source_linkid',right_on='linkid')
# pseudo_df = pseudo_df.merge(links[['linkid','geometry']],left_on='target_linkid',right_on='linkid')

# geometry = pseudo_df.apply(lambda row: MultiLineString([row['geometry_x'],row['geometry_y']]),axis=1)
# pseudo_df.drop(columns=['geometry_x','geometry_y','linkid_x','linkid_y'],inplace=True)
# pseudo_df = gpd.GeoDataFrame(pseudo_df,geometry=geometry,crs=links.crs)

# # pseudo_edges = pseudo_edges.loc[:,~pseudo_edges.columns.duplicated()].copy()
# # pseudo_edges.reset_index(drop=True,inplace=True)
# pseudo_df['source'] = pseudo_df['source'].astype(str)
# pseudo_df['target'] = pseudo_df['target'].astype(str)
# pseudo_df.to_file(export_fp/'Map_Matching/matching.gpkg',layer='turns')
# #pickle the graph
# with (export_fp / 'Map_Matching/turn_G.pkl').open('wb') as fh:
#     pickle.dump(pseudo_G,fh)
# # Come back to below later
# # Network Prepare
# This notebook prepares the final routing network.

# 1. Import the desired routing network
# 1. Add attributes
# 1. Add reconciled attributes
# 1. Add signals
# 1. Add elevation

# Then the network will be turned into a directed network graph complete with an edge list representing the directed edges and another one representing turns. Some attribute values are reversed to account for direction (e.g., elevation, signals).
# Import the data from previous notebooks and merge them. Merge here so updates can be done at each step without having to repeat everything.
# network_filepath = Path.home() / "Documents/BikewaySimData/Projects/gdot/networks"
# #filtered data
# links = gpd.read_file(network_filepath/'filtered.gpkg',layer='osm_links')
# nodes = gpd.read_file(network_filepath/'filtered.gpkg',layer='osm_nodes')
# links.columns
# #add osm data
# links = add_attributes.add_osm_attr(links,network_filepath / 'osm_attr.pkl')
# #rename
# links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
# nodes.rename(columns={'osm_N':'N'},inplace=True)
# links.columns
# #reconciled data
# reconciled = gpd.read_file(network_filepath/'reconciled.gpkg',layer='links',ignore_geometry=True)
# #[col for col in reconciled.columns if col not in links.columns]
# cols_to_keep = ['osm_linkid','speedlimit_range_mph','lanes_per_direction']
# links = links.merge(reconciled[cols_to_keep],on='osm_linkid',how='left')
# del reconciled
# #rename
# links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
# nodes.rename(columns={'osm_N':'N'},inplace=True)
# #signals added
# links_w_signals = gpd.read_file(network_filepath/'signals_added.gpkg',layer='links',ignore_geometry=True)

# nodes_w_signals = gpd.read_file(network_filepath/'signals_added.gpkg',layer='nodes',ignore_geometry=True)
# nodes_w_signals
# #TODO change linkid to osm_linkid later
# cols_to_keep = ['linkid','signal_A','signal_B']
# links = links.merge(links_w_signals[cols_to_keep],on='linkid',how='left')
# ##del nodes_w_signals

# #elevation added
# links_w_elevation = gpd.read_file(network_filepath/'elevation_added.gpkg',ignore_geometry=True)
# links_w_elevation.columns
# links_w_elevation.rename(columns={
#     'a_s_c_e_n_t___m':'ascent_m',
#     'd_e_s_c_e_n_t___m':'descent_m',
#     'a_s_c_e_n_t___g_r_a_d_e':'ascent_grade',
#     'd_e_s_c_e_n_t___g_r_a_d_e':'descent_grade',
# }, inplace =True)
# cols_to_keep = ['linkid','ascent_m','descent_m','ascent_grade','descent_grade','(0,2]_descent',
#        '(2,4]_descent', '(4,6]_descent', '(6,10]_descent', '(10,15]_descent',
#        '(15,inf]_descent', '(0,2]_ascent', '(2,4]_ascent', '(4,6]_ascent',
#        '(6,10]_ascent', '(10,15]_ascent', '(15,inf]_ascent']
# links = links.merge(links_w_elevation[cols_to_keep],on='linkid')
# del links_w_elevation
# links.columns
# fp = Path.home() / "Documents/BikewaySimData/Projects/gdot"
# edges = gpd.read_file(fp/'networks/elevation_added.gpkg',layer="links")


# edges.columns
# #use geometry one last time
# edges['length_ft'] = edges.length

# #turn bridge and tunnel to boolean values
# edges['tunnel'] = edges['tunnel'].notna()
# edges['bridge'] = edges['bridge'].notna()
# #turn bike facil into one column
# edges['bike_facility_type'] = np.nan
# edges.loc[(edges['mu'] == 1) & (edges['bike_facility_type'].isna()),'bike_facility_type'] = 'shared-use path'
# edges.loc[(edges['pbl'] == 1) & (edges['bike_facility_type'].isna()),'bike_facility_type'] = 'protected bike lane'
# edges.loc[(edges['bl'] == 1) & (edges['bike_facility_type'].isna()),'bike_facility_type'] = 'bike lane'
# df_edges, pseudo_df, pseudo_G = modeling_turns.create_pseudo_dual_graph(edges,'A','B','linkid','oneway',True)
# ## Add desired attributes from links to df_edges
# #df_edges = df_edges.merge(edges[['linkid','geometry']])

# basic_cols = ['linkid', 'osmid', 'link_type', 'name', 'oneway','length_ft']

# #anything that's an instance or would be better as a count value (but not a turn)
# event_cols = ['bridge','tunnel']

# #anything that's for the duration of the entire link and has categories
# category_cols = ['link_type','highway','speedlimit_range_mph',
#                'lanes_per_direction','bike_facility_type']

# #reverse in tuple form (these need to be flipped if going the other direction)
# rev_columns = [('ascent_m','descent_m'),
#                ('ascent_grade','descent_grade'),
#                ('(0,2]_ascent','(0,2]_descent'),
#                ('(2,4]_ascent','(2,4]_descent'),
#                ('(4,6]_ascent','(4,6]_descent'),
#                ('(6,10]_ascent','(6,10]_descent'),
#                ('(10,15]_ascent','(10,15]_descent'),
#                ('(15,inf]_ascent','(15,inf]_descent')]

# from itertools import chain
# keep_cols = basic_cols + event_cols + category_cols + list(chain(*rev_columns))
# # attrs = ['linkid', 'osmid', 'link_type', 'name', 'highway',
# #        'bridge', 'tunnel', 'bl', 'pbl', 'mu','speedlimit_range_mph',
# #        'lanes_per_direction', 'up_grade', 'down_grade', 'length_ft',
# #        'vehicle_separation','geometry']
# df_edges = df_edges.merge(edges[keep_cols],on='linkid',how='left')
# df_edges
# ## Deal with grade
# Need to flip sign of grade for reverse links
# # def combine_up_down_tuples(lst):
# #     result = []
# #     current_tuple = []

# #     for item in lst:
# #         if 'ascent' in item or 'descent' in item:
# #             current_tuple.append(item)
# #             if len(current_tuple) == 2:
# #                 result.append(tuple(current_tuple))
# #                 current_tuple = []

# #     return result

# # rev_columns = ['ascent_m','descent_m','ascent_grade','descent_grade',
# #                '(0,2]_down', '(2,4]_down', '(4,6]_down',
# #                '(6,10]_down', '(10,15]_down','(15,inf]_down',
# #                '(0,2]_up', '(2,4]_up', '(4,6]_up', '(6,10]_up',
# #                '(10,15]_up', '(15,inf]_up'
# #                ]

# # combined_tuples = combine_up_down_tuples(rev_columns)

# for elev_columns in rev_columns:
#     df_edges[elev_columns[0]] = np.where(df_edges['reverse_link'], df_edges[elev_columns[1]].abs(), df_edges[elev_columns[0]])
#     #drop the down version?
#     df_edges.drop(columns=elev_columns[1],inplace=True)
# ## Turns and Signals
# #add additional attributes needed for processing
# source_links = edges[['linkid','osmid','link_type','name','highway']]
# target_links = edges[['linkid','osmid','link_type','name','highway']]
# source_links.columns = 'source_' + source_links.columns
# target_links.columns = 'target_' + target_links.columns
# pseudo_df = pseudo_df.merge(source_links,on='source_linkid',how='left')
# pseudo_df = pseudo_df.merge(target_links,on='target_linkid',how='left')
# ## Turn Restrictions
# Two types in OSM (represented as OSM relations):
# - No (blank) turns
# - Only this turn allowed

# For chosen we don't need to consider turn restrictions
# # turn_restrictions = pd.read_csv(fp.parent/'osm_turn_restrictions.csv')
# # pseudo_df = pseudo_df.merge(turn_restrictions,left_on=['source_osmid','target_osmid'],right_on=['from_way_id','to_way_id'],how='left')
# # road_cond = (pseudo_df['source_link_type'] == 'road') & (pseudo_df['target_link_type'] == 'road')
# # no_restr = pseudo_df['type'] == 'no'
# # only_restr = pseudo_df['type'] == 'only'

# # #add a remove column
# # pseudo_df['remove'] = False

# # #remove the no turns
# # pseudo_df.loc[road_cond & no_restr,'remove'] = True

# # #for only, find all instances road_cond + from source and set to True
# # sources = set(turn_restrictions.loc[turn_restrictions['type']=='only','from_way_id'].tolist())
# # pseudo_df.loc[road_cond & pseudo_df['source_osmid'].isin(sources) & pseudo_df['type'].isna(),'remove'] = True

# # #Remove these turns and drop the added columns
# # print((pseudo_df['remove']==True).sum(),'turns removed')
# # pseudo_df = pseudo_df[pseudo_df['remove']==False]
# # pseudo_df.drop(columns=['relation_id', 'restriction', 'from_way_id',
# #        'to_way_id', 'type', 'remove'],inplace=True)
# # Deal with signals
# Perform two merges and use the source/target reverse link columns to determine which signal ID to keep.
# - For the source link, use signal_B if reverse == False else signal_A
# - For the target link, use signal_A if reverse == False else signal_B
# source = pseudo_df[['source_linkid','source_reverse_link']].merge(edges,left_on='source_linkid',right_on='linkid',how='left')
# pseudo_df['source_signal'] = np.where(source['source_reverse_link'], source['signal_A'], source['signal_B'])

# target = pseudo_df[['target_linkid','target_reverse_link']].merge(edges,left_on='target_linkid',right_on='linkid',how='left')
# pseudo_df['target_signal'] = np.where(target['target_reverse_link']==False, target['signal_B'], target['signal_A'])
# ## Identifying signalized/unsignalized turns
# - Only look at roads for now
# - Filter to left/right turns per source linkid per direction
# - Take the highest road classification and assign it as the cross street road classification
# import pandas as pd
# highway_order = {
#     'trunk': 0,
#     'trunk_link': 1,
#     'primary': 2,
#     'primary_link': 3,
#     'secondary': 4,
#     'secondary_link': 5,
#     'tertiary': 6,
#     'tertiary_link': 7,
#     'unclassified': 8,
#     'residential': 9
# }
# highway_order = pd.Series(highway_order)
# highway_order = highway_order.reset_index()
# highway_order.columns = ['highway','order']
# #add highway ranking based on the above
# pseudo_df['target_highway_order'] = pseudo_df['target_highway'].map(highway_order.set_index('highway')['order'])
# pseudo_df['source_highway_order'] = pseudo_df['source_highway'].map(highway_order.set_index('highway')['order'])
# #remove straight and uturn
# cond1 = pseudo_df['turn_type'].isin(['left','right'])
# #only road to road for now
# cond2 = (pseudo_df['source_link_type'] == 'road') & (pseudo_df['target_link_type'] == 'road')
# cross_streets = pseudo_df[cond1 & cond2]

# #use groupby to find the max target_highway order
# cross_streets = cross_streets.groupby(['source_linkid','source_A','source_B'])['target_highway_order'].min()
# cross_streets.name = 'cross_street'

# #add to main df
# pseudo_df = pd.merge(pseudo_df,cross_streets,left_on=['source_linkid','source_A','source_B'],right_index=True,how='left')

# #change numbers back to normal
# pseudo_df['cross_street_order'] = pseudo_df['cross_street']
# pseudo_df['cross_street'] = pseudo_df['cross_street'].map(highway_order.set_index('order')['highway'])
# # TODO Add OSM crossing into this logic
#     - Signals
#         - Wait on this until we have the route attributes code done
#         - Add crossings in signalization
#         - Majority of crossings are nodes not ways
#         - Cycleway crossings typically dealt the same way
#         - If meeting nodes are both crossings and within the traffic signal buffer, they're signalized crossings
#             - Or if both connecting links are crossings/connect to the road etc
#         - Way attributes
#             - Footway = crossing
#             - Highway = footway
#         - Node attributes
#             - Crossing = * (traffic signals/marked/etc)
#             - Highway = crossing
#         - Link attributes
#             - Some links are labeled as crossings but this is not as consistent

# signalized = pseudo_df['source_signal'] == pseudo_df['target_signal']
# left_or_straight =  pseudo_df['turn_type'].isin(['left','straight'])
# both_road = (pseudo_df['source_link_type'] == 'road') & (pseudo_df['target_link_type'] == 'road')
# cross_street = pseudo_df['cross_street_order'] <= 5

# #signalized
# pseudo_df.loc[signalized & both_road,'signalized'] = True
# pseudo_df.loc[pseudo_df['signalized'].isna(),'signalized'] = False
# # pseudo_df.loc[signalized & left_or_straight & both_road,'signalized_left_straight'] = True
# # pseudo_df.loc[pseudo_df['signalized_left_straight'].isna(),'signalized_left_straight'] = False

# pseudo_df.loc[-signalized & both_road & cross_street,'unsignalized'] = True
# pseudo_df.loc[pseudo_df['unsignalized'].isna(),'unsignalized'] = False

# #clean up
# rem =  ['source_osmid', 'source_link_type', 'source_name',
#        'source_highway', 'target_osmid', 'target_link_type', 'target_name',
#        'target_highway', 'source_signal', 'target_signal',
#        'target_highway_order', 'source_highway_order', 'cross_street',
#        'cross_street_order']
# pseudo_df.drop(columns=rem,inplace=True)
# # Export for impedance calibration

# # df_edges = gpd.GeoDataFrame(df_edges,crs='epsg:2240')
# df_edges.columns
# with (fp.parent / 'chosen.pkl').open('wb') as fh:
#     export = (df_edges,pseudo_df,pseudo_G)
#     pickle.dump(export,fh)
# ## Add geometry to examine results in QGIS
# #add geo
# link_geo = dict(zip(links['linkid'],links['geometry']))
# pseudo_df['src_geo'] = pseudo_df['source_linkid'].map(link_geo)
# pseudo_df['trgt_geo'] = pseudo_df['target_linkid'].map(link_geo)
# pseudo_df['geometry'] = pseudo_df[['src_geo','trgt_geo']].apply(lambda row: MultiLineString([row['src_geo'],row['trgt_geo']]),axis=1)

# pseudo_df.drop(columns=['src_geo','trgt_geo'],inplace=True)
# pseudo_df = gpd.GeoDataFrame(pseudo_df,crs=links.crs)

# pseudo_df['source'] = pseudo_df['source'].astype(str)
# pseudo_df['target'] = pseudo_df['target'].astype(str)

# #check results (may need a smaller road network to test on)
# pseudo_df.to_file(Path.home()/'Downloads/testing.gpkg',layer='cross_streets')