# Network Prepare
This step adds in the supplemental attributes prepared in the previous notebooks (e.g., traffic signals, bicycle facilities (with approx. install dates), elevation, etc.). Links where cyclists are absolutely not allowed to travel (Interstates) are removed. A psuedo dual graph for modeling turn movements is created, and the links, nodes, and turns are exported for further processing in the `impedance_calibration` module.

For the GDOT/NCST projects, the following attributes were available: (TURN THIS INTO A TABLE LIKE IN THE REPORT LATER)
- Length
- Grade/Elevation
- Bike Facility w Dates
- Oneway
- Signals
- AADT
- Truck %
- Lanes
- Speed Category

In [1]:
import geopandas as gpd
from pathlib import Path
import numpy as np
import pandas as pd
import pickle
from shapely.ops import Point

from bikewaysim.paths import config
from bikewaysim.network import modeling_turns, add_attributes, prepare_network

## Import network links and add attributes back

In [2]:
links = gpd.read_file(config['network_fp'] /'networks.gpkg',layer='osm_links')
nodes = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_nodes')

#calculate link lengths
links['length_ft'] = links.length

#basic stats
print(links.shape[0],'links',(links.length.sum() / 5280).round(0),'miles',nodes.shape[0],'nodes')

#types and lengths
summary_df = pd.DataFrame({'size':links['link_type'].value_counts(),
                           'length_mi':links.groupby('link_type')['geometry'].apply(lambda x: x.length.sum() / 5280),
                           'length_pct':links.groupby('link_type')['geometry'].apply(lambda x: x.length.sum()) / links.length.sum() * 100})
print(summary_df.sort_values('length_mi',ascending=False))

#add osm attributes back (especially the oneway column)
osm_attrs = gpd.read_file(config['network_fp'] / f"osm.gpkg",layer='raw')

# # get basic stats
osm_attrs.to_crs(links.crs,inplace=True)
print('Raw network is',(osm_attrs.length / 5280).sum().round(0),'miles')
links = pd.merge(links,osm_attrs.drop(columns=['oneway','geometry']),on='osmid')
del osm_attrs

32930 links 709.0 miles 23702 nodes
                         size   length_mi  length_pct
link_type                                            
road                     8253  227.104152   32.041003
sidewalk                10233  209.776630   29.596348
parking_and_driveways    4018   82.140625   11.588815
service                  5255   78.311095   11.048525
pedestrian               3688   50.874695    7.177659
bike                     1035   28.884809    4.075215
restricted_access_road    167   17.225836    2.430308
no_bike                   279   14.462356    2.040422
Raw network is 723.0 miles


# Add bicycle infrastructure and approximate date of opening

In [3]:
cycling_infra_dates = gpd.read_file(config['bicycle_facilities_fp']/'osm_cycleways_w_dates.gpkg',layer='dates_network')
cycling_infra_dates['facility_fwd'].unique()

array(['multi use path', 'bike lane', 'cycletrack', 'buffered bike lane',
       'sharrow', None], dtype=object)

In [4]:
links = pd.merge(links,cycling_infra_dates[['linkid','facility_fwd','facility_rev','facility','year']],on='linkid',how='left')
(links.groupby(['facility','year'])['length_ft'].sum() / 5280).round(2)

facility            year  
bike lane           2007.0    0.58
                    2008.0    5.14
                    2010.0    0.75
                    2011.0    0.76
                    2013.0    0.65
                    2014.0    0.63
                    2015.0    0.39
                    2016.0    0.10
                    2017.0    0.29
                    2018.0    1.30
                    2020.0    1.37
                    2021.0    0.39
                    2023.0    0.07
buffered bike lane  2013.0    0.85
                    2014.0    0.01
                    2015.0    0.16
                    2016.0    0.03
cycletrack          2005.0    0.09
                    2013.0    0.23
                    2015.0    1.04
                    2016.0    0.57
                    2017.0    0.68
                    2019.0    0.25
                    2021.0    0.77
multi use path      1976.0    2.39
                    2007.0    0.32
                    2008.0    6.15
                    2010.0  

## Remove off street infrastructure built after 2016
Some of these may have still existed as informal dirt paths (Beltline). In that case add them back in manually.

In [5]:
max_year = 2016
max_year_cond = links['year'] > max_year

links.loc[max_year_cond].to_file(config['bicycle_facilities_fp']/'removed_bicycle_infra.gpkg')

# # remove infra before 2016 so it doesn't match to these
links.loc[max_year_cond & (links['link_type']=='road'),'facility_fwd'] = None
links.loc[max_year_cond & (links['link_type']=='road'),'facility_rev'] = None
links.loc[max_year_cond & (links['link_type']=='road'),'facility'] = None

#TODO reimplement this but don't remove links from the link database, just the graph so that we can do it dynamically
# NOTE this doesn't do anything because all the attributes are already set to none
after = links['facility'].isin(['cycletrack','multi use path']) & \
          (links['link_type']!='road') & \
          links['year'].notna() & \
          (links['year']>max_year)
links = links[after==False]

# set no facility values to null
links.loc[links['facility_fwd'] == 'no facility','facility_fwd'] = None
links.loc[links['facility_rev'] == 'no facility','facility_rev'] = None

# nans to None
links.loc[links['facility_fwd'].isna(),'facility_fwd'] = None
links.loc[links['facility_rev'].isna(),'facility_rev'] = None
links.loc[links['facility'].isna(),'facility'] = None

# save in new column for reference for comparison
# links['future_facility'] = links['facility_fwd']

## Sidepaths
Add adjacent multi-use paths and cycletracks to roads as an attribute and vice versa. Think Stone Mountain Trail or Beltline next to Wylie Street.

In [6]:
# subset to bike infra and roads
mups_and_cycletracks = cycling_infra_dates.loc[cycling_infra_dates['link_type']!='road',['linkid','facility','year','geometry']]
mups_and_cycletracks.rename(columns={'linkid':'sidepath_linkid','facility':'sidepath','year':'sidepath_year'},inplace=True)
roads = links.loc[links['link_type']=='road',['linkid','geometry']].copy()
roads['og_length'] = roads.length

# get azimuth for getting angle change
roads.to_crs('epsg:4326',inplace=True)
roads[['fwd_azimuth','bck_azimuth']] = roads.apply(lambda row: modeling_turns.find_azimuth(row),axis=1)
roads.to_crs(config['projected_crs_epsg'],inplace=True)

mups_and_cycletracks.to_crs('epsg:4326',inplace=True)
mups_and_cycletracks[['fwd_azimuth','bck_azimuth']] = mups_and_cycletracks.apply(lambda row: modeling_turns.find_azimuth(row),axis=1)
mups_and_cycletracks.to_crs(config['projected_crs_epsg'],inplace=True)

# buffer by small amount
buffer_ft = 50
mups_and_cycletracks.set_geometry(mups_and_cycletracks.buffer(buffer_ft),inplace=True)

# intersect
intersection = gpd.overlay(roads,mups_and_cycletracks)

# calculate coverage and angle change (hausdorff distance returns too many false positives)
intersection['new_length'] = intersection.length
intersection['ratio'] = intersection['new_length']/intersection['og_length']

# angle difference (take min to account for direction differences)
intersection['diff1'] = np.abs(intersection['fwd_azimuth_1'] - intersection['bck_azimuth_2'])
intersection['diff2'] = np.abs(intersection['fwd_azimuth_1'] - intersection['fwd_azimuth_2'])
intersection['mindiff'] = intersection[['diff1','diff2']].min(axis=1)

# set minimum conditions for accepting
cond0 = intersection['ratio'] > 0.95 # this much coverage of the original link
cond1 = intersection['mindiff'] < 30 # no more than this change in angle
intersection = intersection[cond0&cond1]

# just take the one with the most overlap after that
has_sidepath = intersection.loc[intersection.groupby('linkid')['ratio'].idxmax()]#,['linkid','sidepath','sideear','geometry']]

# buffer the sidepaths again and perform unary union to get the connected parts
connected_parts = gpd.GeoDataFrame({'geometry':has_sidepath.buffer(50).unary_union},index=[0],crs=config['projected_crs_epsg']).reset_index()

# intersect with the has_sidepath layer again and group by
# need 400 feet next to a road to count as sidepath
intersect2 = gpd.overlay(has_sidepath,connected_parts)
intersect2['adjacent_length_ft'] = intersect2.groupby('index')['og_length'].transform(sum)
# intersect2[intersect2['adjacent_length_ft']>400].explore()
final_sidepaths = intersect2.loc[intersect2['adjacent_length_ft']>400,['linkid','sidepath_linkid','sidepath','sidepath_year','geometry']]
# final_sidepaths
# final_sidepaths.explore()
final_sidepaths.to_file(config['bicycle_facilities_fp']/'sidepaths.gpkg',layer='sidepaths')

In [7]:
# merge back into main network dataframe
links = pd.merge(links,final_sidepaths.drop(columns='geometry'),on='linkid',how='left')

# NOTE 10/14/24 instead of adding in the infra here, leave it as is becuase the buffer optimization should account for this
# assign the facility to the road if it doesn't already have facility
# links0 = links.copy()
# cond = links[['sidepath_linkid']].notna().all(axis=1) & links[['facility_fwd','facility_rev','facility']].isna().all(axis=1)
# links.loc[cond,['facility_fwd','facility_rev','facility']] = links.loc[cond,['sidepath','sidepath','sidepath']].values
# assign year if there is one
# cond = links['sidepath_year'].notna() & links['year'].isna()
# links.loc[cond,'year'] = links['sidepath_year']

# assign the road attributes to the sidepath (i.e., attach the adjacent road linkid) (future step)

# Add Network Improvements

Atlanta Example

In [8]:
improvements = gpd.read_file(config['bicycle_facilities_fp']/'network_improvements.gpkg',layer='coa',ignore_geometry=True)
links = pd.merge(links,improvements,on='linkid',how='left')

Savannah

In [9]:
# improvements = gpd.read_file(config['bicycle_facilities_fp']/'network_improvements.gpkg',layer='savannah',ignore_geometry=True)
# links = pd.merge(links,improvements,on='osm_linkid',how='left')
# links.drop(columns=['linkid'],inplace=True)

# Add GDOT data
GDOT provides # of lanes data, AADT, and truck %.

In [10]:
gdot_lanes = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="gdot_lanes",ignore_geometry=True)
gdot_lanes['state_route'] = gdot_lanes['route_type'] == 'State Route' # TODO add to the GDOT notebook
gdot_traffic = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="gdot_traffic",ignore_geometry=True)

links = pd.merge(links,gdot_lanes,on="osmid",how='left')
links = pd.merge(links,gdot_traffic,on='osmid',how='left')

Handle null aadt

In [11]:
# this to explore na links
# links[(links['link_type']=='road')&links['AADT'].isna()].explore()
# give residential roads the lowest aadt category or below
links.loc[links['AADT'].isna() &
          (links['highway'].isin(['residential','service','unclassified','living_street'])) &
          (links['link_type']=='road'),'AADT'] = '[0,4k)'
# all others the middle category
links.loc[links['AADT'].isna() & (links['link_type']=='road'),'AADT'] = '[4k,10k)'
# any remaining nulls (bike paths, service roads, parking lots, get the lowest category)
links.loc[links['AADT'].isna(),'AADT'] = '[0,4k)'

#turn it into categorical data
links['AADT'] = pd.Categorical(links['AADT'],ordered=True,categories=['[0,4k)','[4k,10k)','[10k,inf)'])

# Add HERE data (SKIP IF NO HERE DATA)
HERE provides speed and lanes data.

In [12]:
here = gpd.read_file(config['network_fp']/"conflation.gpkg",layer="here",ignore_geometry=True)
links = pd.merge(links,here,on='linkid',how='left')

Handling null speeds

In [13]:
# this to explore na links
# links[(links['link_type']=='road')&links['here_speed'].isna()].explore()
# links[links['here_speed'].isna()&(links['link_type']=='road')]['highway'].unique()
# give residential roads a speed limit of 30 or below
links.loc[links['here_speed'].isna() &
          (links['highway'].isin(['residential','service','unclassified','living_street'])) &
          (links['link_type']=='road'),'here_speed'] = '[0,30]'
# all others get 30 +
links.loc[links['here_speed'].isna() & (links['link_type']=='road'),'here_speed'] = '(30,40]'
# any remaining nulls (bike paths, service roads, parking lots, get a speed limit of 30 or below)
links.loc[links['here_speed'].isna(),'here_speed'] = '[0,30]'
links.rename(columns={'here_speed':'speed'},inplace=True)

In [14]:
links['speed'] = pd.Categorical(links['speed'],ordered=True,categories=['[0,30]', '(30,40]', '(40,inf)'])

# Resolve GDOT/HERE lanes data
- All non-road links get a 1 (doing this so lanes attribute isn't being confounded with vehicle access)
- By direction is too detailed, use a per direction estimate (i.e. treat a 5 lane oneway road the same as a 10 lane twoway road or a 5 lane per direction)
- Simplify to:
    - 1 lane per direction
    - 2 lanes per direction
    - 3+ lanes per direction
- If unequal number of lanes use direction that would result in the higher category
    - Example: 10th Street NE would be 2 lanes per direction because it has 2/1 lanes by direction
- Turn lanes (middle, right, etc) are NOT counted in HERE or GDOT

In [15]:
#TODO figure out what to do when there is a disrepency between the lanes
## Examine where there's a big mismatch between HERE and GDOT
# - There are a couple of cases where GDOT will be way off, like North Highland Ave NE which shows up as having four lanes when it's mostly 2 lanes for most of its length.
# - Memorial Drive is also marked as having four lanes but it was road dieted post 2016, so just use the old value.
# - Unless it's a residential street or a few cases that were identified, use the GDOT values.

In [16]:
#give everything a default value of 1 (before we would give non-motorized links a value of 0)
links['lanes'] = 1

#if one is null take the non null value
links.loc[links['here_lanes'].isna() & links['gdot_lanes'].notna(),'lanes'] = links['gdot_lanes']
links.loc[links['here_lanes'].notna() & links['gdot_lanes'].isna(),'lanes'] = links['here_lanes']

#otherwise choose whichever is smaller
links.loc[links['here_lanes'].notna() & links['gdot_lanes'].notna(),'lanes'] = links[['here_lanes','gdot_lanes']].min(axis=1)

#drop to trim down the df
links.drop(columns=['gdot_lanes','here_lanes'],inplace=True)

# Add elevation data
Assign the correct direction for reverse links later

In [17]:
elevation = gpd.read_file(config['network_fp']/'elevation.gpkg',layer='elevation',ignore_geometry=True)
elevation = elevation[['linkid','ascent_ft','descent_ft','ascent_grade_cat','descent_grade_cat']]
links = pd.merge(links,elevation,on='linkid',how='left')
# del elevation

In [18]:
#set ascent grade and descent grade to zero
links.loc[links['ascent_grade_cat'].isna(),'ascent_grade_cat'] = '[0,4)'
links.loc[links['descent_grade_cat'].isna(),'descent_grade_cat'] = '[0,4)'
links.loc[:,['ascent_ft','descent_ft']] = links.loc[:,['ascent_ft','descent_ft']].fillna(0).values

In [19]:
# #for remaining bridge where lidar data was not available set the grade to 0 if grade exceeds 10 percent
# links.loc[(links['bridge'] == 'yes') & (links['ascent_grade_%'] > 10),'ascent_grade_%'] = 0
# #also for tunnels
# links.loc[(links['tunnel'] == 'yes'),'ascent_grade_%'] = 0

# Create reverse links and turn dataframe

In [20]:
#TODO change this to not create the turn graph (just make it an extra optional step)
## Create turn graph dataframe
from importlib import reload
reload(modeling_turns)
directed_links, turns_df = modeling_turns.create_pseudo_dual_graph(links,'A','B','linkid','oneway')

# find the degree of the intersection node and re-classify anything with degree 2 as straight turn movement?
# what about interstate exits that got removed?
from collections import Counter
node_degree = dict(Counter(links['A'].tolist()+links['B'].tolist()))
turns_df['node_degree'] = turns_df['source_B'].map(node_degree)
# turns_df[turns_df['node_degree']==2,'turn_type'] = 'straight'

# Add signals from OSM and GDOT to turns

In [21]:
osm_signals = pd.read_csv(config['network_fp']/'osm_signals.csv')[['source_linkid','source_reverse_link','target_linkid','target_reverse_link']]
gdot_signals = pd.read_csv(config['network_fp']/'gdot_signals.csv')[['source_linkid','source_reverse_link','target_linkid','target_reverse_link']]

osm_signals = set([tuple(x) for x in osm_signals.values])
gdot_signals = set([tuple(x) for x in gdot_signals.values])
added_signals = set.union(osm_signals,gdot_signals)

turns_df.set_index(['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],inplace=True)

added_signals = set.intersection(set(turns_df.index.tolist()),added_signals)
turns_df.loc[list(added_signals),'signalized'] = True
turns_df.loc[turns_df['signalized'].isna(),'signalized'] = False

turns_df.reset_index(inplace=True)

turns_df['signalized'].value_counts()

signalized
False    135644
True       3688
Name: count, dtype: int64

# Add in cross street variables

In [22]:
# attach speed, lanes, AADT, and osm hihgway
link_attrs = links.copy()[['linkid','highway','link_type','lanes','AADT','speed']]
link_attrs.set_index('linkid',inplace=True)

source_cols = ['source_' + x for x in link_attrs.columns]
target_cols = ['target_' + x for x in link_attrs.columns]

link_attrs.columns = source_cols
turns_df = pd.merge(turns_df,link_attrs,left_on='source_linkid',right_index=True,how='left')
link_attrs.columns = target_cols
turns_df = pd.merge(turns_df,link_attrs,left_on='target_linkid',right_index=True,how='left')
turns_df

Unnamed: 0,source_linkid,source_reverse_link,target_linkid,target_reverse_link,source_A,source_B,target_A,target_B,source_azimuth,target_azimuth,...,source_highway,source_link_type,source_lanes,source_AADT,source_speed,target_highway,target_link_type,target_lanes,target_AADT,target_speed
0,15159,True,15133,True,5423926546,5423926551,5423926551,5423926004,320.3,140.3,...,service,service,1,"[0,4k)","[0,30]",service,service,1,"[0,4k)","[0,30]"
1,15159,True,27278,True,5423926546,5423926551,5423926551,9627846619,320.3,342.8,...,service,service,1,"[0,4k)","[0,30]",service,parking_and_driveways,1,"[0,4k)","[0,30]"
2,15133,False,15159,False,5423926004,5423926551,5423926551,5423926546,320.3,140.3,...,service,service,1,"[0,4k)","[0,30]",service,service,1,"[0,4k)","[0,30]"
3,15133,False,27278,True,5423926004,5423926551,5423926551,9627846619,320.3,342.8,...,service,service,1,"[0,4k)","[0,30]",service,parking_and_driveways,1,"[0,4k)","[0,30]"
4,27278,True,24379,True,5423926551,9627846619,9627846619,8243455446,342.8,316.3,...,service,parking_and_driveways,1,"[0,4k)","[0,30]",service,parking_and_driveways,1,"[0,4k)","[0,30]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139327,9637,False,17612,False,3691354840,5999097833,5999097833,69425053,179.4,179.2,...,trunk,road,2,"[10k,inf)","(30,40]",trunk,road,2,"[10k,inf)","(30,40]"
139328,9637,False,17613,False,3691354840,5999097833,5999097833,5999097832,179.4,89.6,...,trunk,road,2,"[10k,inf)","(30,40]",footway,sidewalk,1,"[0,4k)","[0,30]"
139329,9637,False,17614,True,3691354840,5999097833,5999097833,5999097834,179.4,271.6,...,trunk,road,2,"[10k,inf)","(30,40]",footway,sidewalk,1,"[0,4k)","[0,30]"
139330,5646,True,5645,False,3691354840,1278582054,1278582054,2993189362,359.4,147.1,...,trunk,road,2,"[10k,inf)","(30,40]",tertiary,road,2,"[10k,inf)","(30,40]"


In [23]:
# cross street would be to the left or right
cond1 = turns_df['turn_type'].isin(['left','right'])

#only road to road for now
cond2 = (turns_df['source_link_type'] == 'road') & (turns_df['target_link_type'] == 'road')
cross_streets = turns_df[cond1&cond2]

# get the worst possible cross street attribute
cross_streets = cross_streets.groupby(['source_linkid','source_reverse_link'])[['target_AADT','target_lanes','target_speed']].max()
cross_streets.columns = ['cross_AADT','cross_lanes','cross_speed']
test = turns_df.merge(cross_streets,left_on=['source_linkid','source_reverse_link'],right_index=True)#,how='left')

In [24]:
# a stressful turn would be
aadt_cross_cond = test['cross_AADT'] == '[10k,inf)'
lanes_cross_cond = test['cross_lanes'] > 2
speed_cross_cond = test['cross_speed'] >= '(30,40]'
cross_high_stress = aadt_cross_cond | lanes_cross_cond | speed_cross_cond

# if the source street has these stats then assume that there is a signal
aadt_source_cond = test['source_AADT'] == '[10k,inf)'
lanes_source_cond = test['source_lanes'] > 2
speed_source_cond = test['source_speed'] >= '(30,40]'
source_high_stress = aadt_source_cond | lanes_source_cond | speed_source_cond

test['unsig_crossing'] = False
test.loc[(source_high_stress==False) & cross_high_stress & (test['signalized']==False) & (test['turn_type'].isin(['straight','left'])),'unsig_crossing'] = True

In [25]:
# add unsignalized crossing variable back in
turns_df = pd.merge(turns_df,test[['source_linkid','source_reverse_link','target_linkid','target_reverse_link','unsig_crossing']],on=['source_linkid','source_reverse_link','target_linkid','target_reverse_link'],how='left')

In [26]:
# #optional add geo data to turns and export for examination
# reload(modeling_turns)
# cross_streets_gdf = modeling_turns.turn_gdf(links,test)
# for idx,x in enumerate(cross_streets_gdf.dtypes):
#     if (str(x) == "category") | (str(x)=='object'):
#         cross_streets_gdf.iloc[:,idx] = cross_streets_gdf.iloc[:,idx].astype(str)
# cross_streets_gdf.to_file(config['network_fp']/'scratch.gpkg',layer='cross_streets')

In [27]:
# nodes[nodes['N'].isin(set(cross_streets_gdf.loc[cross_streets_gdf['unsig_crossing']==True,'source_B'].tolist()))].explore() # looks much more reasonable

In [28]:
# not sure hwat i was going for here
# # get the worst possible cross street attribute
# # cross_streets.groupby(['source_linkid','source_reverse_link'])['target_AADT','target_lanes','target_speed'].idxmax()
# cross_streets.loc[18242]
# cross_streets.groupby(['source_linkid','source_reverse_link'])['target_speed'].max()


# # find the worst cross street if there are multiple
# cross_streets.groupby(['source_linkid','source_reverse_link'])['aadt'].apply(lambda x: aadt_order)


# # TODO do this for the other variables too
# cols = ['AADT','lanes','speed']
# for x in cols:
#     print(links[x].unique())
# # rules for high stress turns

# #Major/minor road classification to create high traffic stress variable
# major_road_values = ['primary','secondary']
# major_road_values = major_road_values + [item + '_link' for item in major_road_values]
# minor_road_values = ['tertiary','unclassified','residential','service','trunk','living_street','service']
# minor_road_values = minor_road_values + [item + '_link' for item in minor_road_values]

# #traffic

# #override major road if only one lane per direction
# major_road = set(links.loc[links['highway'].isin(major_road_values) & (links['lanes'] >= 2),'linkid'].tolist())
# minor_road = set(links.loc[links['highway'].isin(minor_road_values) | \
#                            (links['highway'].isin(major_road_values) & (links['lanes'] < 2) ), 
#                             'linkid'].tolist())
# cross_streets.loc[cross_streets['source_linkid']==3]
# print(links[links['linkid']==3].squeeze())
# links[links['linkid']==3].explore()

# # grouby the source link
# cross_streets.groupby(['source_linkid'])['target_highway'].agg(list)#['target_highway_order'].min()

# cross_streets.name = 'cross_street'

# #add to main df
# pd.merge(turns_df,cross_streets,left_on=['source_linkid','source_A','source_B'],right_index=True,how='left')

# # wasn't able to get major/minor to be significant
# #Major/minor road classification to create high traffic stress variable
# major_road_values = ['primary','secondary']
# major_road_values = major_road_values + [item + '_link' for item in major_road_values]
# minor_road_values = ['tertiary','unclassified','residential','service','trunk','living_street','service']
# minor_road_values = minor_road_values + [item + '_link' for item in minor_road_values]

# #override major road if only one lane per direction
# major_road = set(links.loc[links['highway'].isin(major_road_values) & (links['lanes'] >= 2),'linkid'].tolist())
# minor_road = set(links.loc[links['highway'].isin(minor_road_values) | links['lanes'] < 2,'linkid'].tolist())

# #unsignalized straight/left turn where crossing street is a major road
# turns_df['unsig_major_road_crossing'] = (turns_df['signalized']==False) & \
#     turns_df['target_linkid'].isin(major_road) & \
#     turns_df['source_linkid'].isin(minor_road) & \
#     turns_df['turn_type'].isin(['left','straight'])

# # #sets turns that are not from road to road to None, effectively ignoring them
# # exclude = ['road','service']
# # turns_df.loc[(turns_df['source_link_type'].isin(exclude)==False) & 
# #              (turns_df['target_link_type'].isin(exclude)==False),'turn_type'] = None

# Remove Interstates and Private Links
Remove these because we're absolutely sure we don't want bikes on these links.

In [29]:
print(links['link_type'].unique())
remove = ['no_access_or_private','restricted_access_road','no_bike']

['service' 'road' 'sidewalk' 'pedestrian' 'parking_and_driveways' 'bike'
 'no_bike' 'restricted_access_road' None]


In [30]:
remove_cond = links['link_type'].isin(remove)
links = links[remove_cond==False]
print(remove_cond.sum(),'links removed')

446 links removed


# Remove isolated links

In [31]:
links, nodes = prepare_network.largest_comp_and_simplify(links,nodes)

Before connected components: Links 32346 Nodes 23702
After connected components: Links 32240 Nodes 23287


In [32]:
turns_df = turns_df[turns_df['source_linkid'].isin(set(links['linkid'].tolist())) & turns_df['target_linkid'].isin(set(links['linkid'].tolist()))]

# Export

In [33]:
links.columns

Index(['A', 'B', 'linkid', 'oneway', 'link_type', 'osmid', 'geometry',
       'length_ft', 'timestamp', 'version', 'type', 'highway', 'name',
       'bridge', 'tunnel', 'cycleway', 'service', 'footway', 'sidewalk',
       'bicycle', 'foot', 'access', 'area', 'surface', 'all_tags', 'geom_type',
       'facility_fwd', 'facility_rev', 'facility', 'year', 'sidepath_linkid',
       'sidepath', 'sidepath_year', 'coa_id', 'coa_name', 'improvement',
       'route_type', 'state_route', 'AADT', 'truck_pct', 'speed', 'lanes',
       'ascent_ft', 'descent_ft', 'ascent_grade_cat', 'descent_grade_cat'],
      dtype='object')

In [34]:
final_cols = [
    'A', 'B', 'linkid', 'osmid', 'oneway', 'highway', 'name','all_tags', # OSM derived or ID variables
    'link_type',
    'facility_fwd', 'facility_rev', 'facility', 'year',
    # 'sidepath_linkid', 'sidepath', 'sidepath_year', # export this seperately
    'state_route', 'AADT', 'speed', 'lanes',
    'ascent_ft', 'descent_ft','ascent_grade_cat','descent_grade_cat',
    'length_ft', 'geometry'
    ]

# export version with the categorical values
links[final_cols].to_pickle(config['network_fp']/'final_network_edges.parquet')

#TODO create a function for performing this
for col, dtype in links.dtypes.to_dict().items():
    if (str(dtype) == "category"):
        links[col] = links[col].astype(str).values


In [35]:
#TODO not sure why I had to explicitly do this
col = 'AADT'
links[col] = links[col].astype(str).values

In [36]:
links[final_cols].to_file(config['network_fp']/'final_network.gpkg',layer='edges')
nodes.to_file(config['network_fp']/'final_network.gpkg',layer='nodes')

In [37]:
# add directional attributes and flip as needed
ascent_columns = ['ascent_ft', 'ascent_grade_cat']
descent_columns = ['descent_ft', 'descent_grade_cat']
directed_links = pd.merge(directed_links,links[['linkid','facility_fwd','facility_rev']+ascent_columns+descent_columns],on='linkid')
directed_links.loc[directed_links['reverse_link']==True,ascent_columns+descent_columns] = directed_links.loc[directed_links['reverse_link']==True,descent_columns+ascent_columns].values
directed_links.loc[directed_links['reverse_link']==True,['facility_fwd','facility_rev']] = directed_links.loc[directed_links['reverse_link']==True,['facility_rev','facility_fwd']].values

In [38]:
# tuple columns not compatible with parquet
# turns_df.drop(columns=['source','target'],inplace=True)

In [39]:
#TODO pickles later
turns_df.to_parquet(config['network_fp']/'turns_df.parquet')
directed_links.to_parquet(config['network_fp']/'directed_edges.parquet')

In [40]:
turns_df.columns

Index(['source_linkid', 'source_reverse_link', 'target_linkid',
       'target_reverse_link', 'source_A', 'source_B', 'target_A', 'target_B',
       'source_azimuth', 'target_azimuth', 'azimuth_change', 'turn_type',
       'node_degree', 'signalized', 'source_highway', 'source_link_type',
       'source_lanes', 'source_AADT', 'source_speed', 'target_highway',
       'target_link_type', 'target_lanes', 'target_AADT', 'target_speed',
       'unsig_crossing'],
      dtype='object')

In [41]:
#TODO create a function for performing this
for idx,x in enumerate(turns_df.dtypes):
    if (str(x) == "category"): #| (str(x)=='object'):
        turns_df.iloc[:,idx] = turns_df.iloc[:,idx].astype(str)

#optional add geo data to turns and export for examination
from shapely.ops import MultiLineString
geo_dict = dict(zip(links['linkid'],links['geometry']))
turns_df['source_geo'] = turns_df['source_linkid'].map(geo_dict)
turns_df['target_geo'] = turns_df['target_linkid'].map(geo_dict)
turns_df['geometry'] = turns_df.apply(lambda row: MultiLineString([row['source_geo'],row['target_geo']]),axis=1)
turns_df.drop(columns=['source_geo','target_geo'],inplace=True)
turns_gdf = gpd.GeoDataFrame(turns_df,crs=links.crs)
# turns_gdf.drop(columns=['source','target'])
turns_gdf.to_file(config['network_fp']/'final_network.gpkg',layer='turns')

## Other layers

In [42]:
#create layer of unsignalized crossings for examining
unsig_major_road_crossing = set(turns_df.loc[turns_df['unsig_crossing']==True,'source_B'].tolist())
nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')
nodes = nodes[nodes['N'].isin(unsig_major_road_crossing)]
nodes.to_file(config['calibration_fp']/'unsig_major_road_crossing.gpkg')