## Step 2 Network Reconciliation
---
This notebook is for adding network attributes from other networks to the OSM network.

In [1]:
from pathlib import Path
import geopandas as gpd
import pandas as pd
import numpy as np

from bikewaysim.paths import config
from bikewaysim.network import conflation_tools

## HERE Attributes (ignore if HERE data not available)
Want to add HERE road attributes, speed limit and number of lanes categories, to OSM


## Import OSM

In [2]:
osm = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_links')

# filter to roads
osm = osm[osm['link_type']=='road']

#add street names back to osm
#add attributes back
osm_attrs = gpd.read_file(config['network_fp'] / f"osm.gpkg",layer='raw',ignore_geometry=True)
cols_to_keep = ['osmid','highway','name']
osm = pd.merge(osm,osm_attrs[cols_to_keep],on='osmid')
del osm_attrs

#format name attrbute
osm['name2'] = osm['name'].apply(lambda row: conflation_tools.contract_suffix(row))

## Import HERE


In [3]:
studyarea = gpd.read_file(config['network_fp'] / "base_layers.gpkg",layer='studyarea_bounds')
here = gpd.read_file(Path(config['here_fp']),mask=studyarea).to_crs(config['projected_crs_epsg'])

#lowercase street names
here['ST_NAME2'] = here['ST_NAME'].apply(lambda row: conflation_tools.contract_suffix(row))

#turn lanes to int
here['LANE_CAT'] = here['LANE_CAT'].astype(int)

#filter to just roads and remove interstates and interstate ramps
controlled_access = (here['CONTRACC'].str.contains('Y')) | (here['RAMP'].str.contains('Y'))
ped_access = here['AR_PEDEST'] == 'Y'
road = (here['AR_AUTO'].str.contains('Y')) & (here['SPEED_CAT'].astype(int) < 7)
here = here[(controlled_access==False) & road & ped_access]

#create unequal lanes column
#here['unequal_lanes'] = (here['FROM_LANES'] != here['TO_LANES']) & (here['FROM_LANES']!=0) & (here['TO_LANES']!=0)

## Modify the HERE lanes attributes
Lane category is supposed to represent the lanes per direction but occasionally it doesn't in HERE for oneway streets. 


In [4]:
nonzero = (here['TO_LANES'] != 0) | (here['FROM_LANES'] != 0)

one_lane_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) == 1)
two_lanes_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) == 2)
three_or_more_lanes_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) >= 3)

here['here_lanes'] = here['LANE_CAT']
here.loc[one_lane_per_direction,'here_lanes'] = 1
here.loc[two_lanes_per_direction,'here_lanes'] = 2
here.loc[three_or_more_lanes_per_direction,'here_lanes'] = 3

#for examining
#here.loc[here['LANE_CAT'] != here['here_lanes'],['ST_NAME','FROM_LANES','TO_LANES','LANE_CAT','here_lanes','geometry']].explore()

## Modify the HERE speed limit attribute

In [5]:
# based on the distribution of speed categories, three categories makes sense
# first column of comments is the new range while the second is original range
# here says these don't neccessarily represent the legal speed limit
new_here_speed_bins = {
    '1': '(40,inf)',#4, #'41+ MPH', #'> 80 MPH',
    '2': '(40,inf)',#4, #'41+ MPH', #'65-80 MPH',
    '3': '(40,inf)',#4, #'41+ MPH', #'55-64 MPH',
    '4': '(40,inf)',#4, #'41+ MPH', #'41-54 MPH',
    '5': '(30,40]',#3, #'31-40 MPH',
    '6': '[0,30]',#2, #'21-30 MPH', #'21-30 MPH',
    '7': '[0,30]',#1, #'<= 20 MPH', #'6-20 MPH',
    '8': '[0,30]'#1 #'<= 20 MPH' #'< 6 MPH'
}
here['here_speed'] = here['SPEED_CAT'].map(new_here_speed_bins)

In [6]:
# here[here['here_speed']=='(40,inf)'].explore()

In [7]:
#clean up column
cols_to_keep = ['LINK_ID','ST_NAME','ST_NAME2', 'here_speed','here_lanes','geometry']
here = here[cols_to_keep]

## Conflate HERE Speed and Lanes

In [8]:
#example of how the name matching works
import difflib
name1 = 'cascade ave sw'
name2 = 'cascade rd'
ratio = difflib.SequenceMatcher(None, name1, name2).ratio()
ratio

0.6666666666666666

In [9]:
# buffer here
here_buffer = here.copy()
here_buffer.geometry = here_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(osm,here_buffer)
print(intersect.shape[0],'initial matches')

# name check
intersect['name_check'] = intersect.apply(lambda row: conflation_tools.name_check(row['ST_NAME2'],row['name2']),axis=1)
# If both are none change from false to None (if one is none then leave it because that probably means it was a bad match)
intersect.loc[intersect['name2'].isna() & intersect['ST_NAME2'].isna(),'name_check'] = None

# calculate hausdorff between intersected feature (intersected OSM geo) and original feature (full OSM geo)
merged = pd.merge(intersect,osm[['linkid','geometry']],on='linkid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# eliminate matches where the name doesn't match
export = merged[merged['name_check']!=False]

# take match with minimum hausdorff distance
min_hausdorff = export.groupby('linkid')['hausdorff_dist'].idxmin()
export = export.loc[min_hausdorff]
exceed = export['hausdorff_dist'] < 1500
print((exceed==False).sum())
export = export[exceed]

# export to add later
cols = ['linkid','here_speed','here_lanes','geometry']
export[cols].to_file(config['network_fp']/"conflation.gpkg",layer="here")
print(export.shape[0],'final matches')

113623 initial matches


  intersect.loc[intersect['name2'].isna() & intersect['ST_NAME2'].isna(),'name_check'] = None


0
7823 final matches


In [10]:
print(export['linkid'].nunique(),'/',osm['linkid'].nunique(),'assigned HERE attributes')
print(export['LINK_ID'].nunique(),'/',here['LINK_ID'].nunique(),'assigned to OSM links')

7823 / 8310 assigned HERE attributes
2323 / 8643 assigned to OSM links


# GDOT
Either download and store locally or read directly from the website.
- Downloads Website: https://www.dot.ga.gov/GDOT/Pages/RoadTrafficData.aspx
- Road Inventory: https://www.dot.ga.gov/DriveSmart/Data/Documents/Road_Inventory_Geodatabase.zip
- Traffic Data: https://www.dot.ga.gov/DriveSmart/Data/Documents/Traffic_GeoDatabase.zip

## Reimport OSM

In [11]:
osm = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_links')
osm = osm[osm['link_type']=='road']
osm_attrs = gpd.read_file(config['network_fp'] / f"osm.gpkg",layer='raw',ignore_geometry=True)
cols_to_keep = ['osmid','highway','name']
osm_attrs = osm_attrs[cols_to_keep]
osm = pd.merge(osm,osm_attrs[cols_to_keep],on='osmid')
del osm_attrs

## Import GDOT data

In [12]:
studyarea = gpd.read_file(config['network_fp'] / "base_layers.gpkg",layer='studyarea_bounds')

# gdot lanes
gdot_lanes = gpd.read_file(config['gdot_road_inventory_fp'],layer='THROUGH_LANE',mask=studyarea).to_crs(config['projected_crs_epsg']).reset_index()
gdot_lanes.rename(columns={'ValueNumeric':'lanes'},inplace=True)
gdot_lanes = gdot_lanes[['index','RouteID','lanes','geometry']]

# gdot traffic
gdot_traffic = gpd.read_file(config['gdot_traffic_fp'],mask=studyarea).to_crs(config['projected_crs_epsg']).reset_index()
gdot_traffic['truck_pct'] = ((gdot_traffic['AADTT_CU_T']) / gdot_traffic['AADT'] * 100).round() # took out single unit because the pcts were too high
gdot_traffic = gdot_traffic[['index','RouteID','STATISTICS','AADT','truck_pct','geometry']]

  return ogr_read_info(
  crs = pyogrio.read_info(path_or_bytes).get("crs")
  return ogr_read(
  return ogr_read_info(
  return ogr_read(


In [13]:
# in the future look at whether we need to do further processing of the aadt data
bins = [0,4,10,np.inf]
labels = ['[0,4k)','[4k,10k)','[10k,inf)']
gdot_traffic['AADT'] = pd.cut(gdot_traffic['AADT'] / 1000,bins=bins,labels=labels,include_lowest=True).astype(str)

# add route type
gdot_ownership = gpd.read_file(config['gdot_road_inventory_fp'],layer='GA_2021_ROUTES',ignore_geometry=True)[['RouteID','Comments']]
gdot_ownership.rename(columns={'Comments':'route_type'},inplace=True)

gdot_lanes = pd.merge(gdot_lanes,gdot_ownership,on='RouteID',how='left')
gdot_traffic = pd.merge(gdot_traffic,gdot_ownership,on='RouteID',how='left')

# drop interstates/ramps
gdot_lanes = gdot_lanes[gdot_lanes['route_type'].isin(['Interstate','Ramp'])==False]
gdot_traffic = gdot_traffic[gdot_traffic['route_type'].isin(['Interstate','Ramp'])==False]

  return ogr_read(


## Conflate GDOT Lanes

In [14]:
#for gdot features, it's better to use the full osm way rather than the network version
osm = osm.dissolve('osmid').reset_index()

In [15]:
osm_buffer = osm.copy()
osm_buffer.geometry = osm_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(gdot_lanes,osm_buffer)
print(intersect.shape[0],'initial matches')

# calculate hausdorff between intersected feature and original feature
merged = pd.merge(intersect,osm[['osmid','geometry']],on='osmid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# take match with minimum hausdorff distance
min_hausdorff = merged.groupby('osmid')['hausdorff_dist'].idxmin()
export = merged.loc[min_hausdorff]
exceed = export['hausdorff_dist'] < 1500
print((exceed==False).sum())
export = export[exceed]

13278 initial matches
0


## Simplify GDOT lanes into HERE lane categories


In [16]:
# for one way links
oneway = export['oneway']
oneway_one_lane_per_direction = (export['lanes'] == 1) & oneway
oneway_two_lanes_per_direction = (export['lanes'] == 2) & oneway
oneway_three_or_more_lanes_per_direction = (export['lanes'] >= 3) & oneway

# for two way links
twoway_one_lane_per_direction = (export['lanes'] <= 2) & (oneway == False)
twoway_two_lanes_per_direction = (export['lanes'] >= 3) & (export['lanes'] <= 4) & (oneway == False)
twoway_three_or_more_lanes_per_direction = (export['lanes'] > 5) & (oneway == False)

# assign to a new column
export['gdot_lanes'] = np.nan
export.loc[oneway_one_lane_per_direction | twoway_one_lane_per_direction, 'gdot_lanes'] = 1
export.loc[oneway_two_lanes_per_direction | twoway_two_lanes_per_direction, 'gdot_lanes'] = 2
export.loc[oneway_three_or_more_lanes_per_direction | twoway_three_or_more_lanes_per_direction, 'gdot_lanes'] = 3

In [17]:
# export to add back in the export network step
cols = ['osmid','route_type','gdot_lanes','geometry']
export[cols].to_file(config['network_fp']/"conflation.gpkg",layer="gdot_lanes")
print(export.shape[0],'final matches')

2259 final matches


In [18]:
print(export['osmid'].nunique(),'/',osm['osmid'].nunique(),'assigned GDOT attributes')
print(export['index'].nunique(),'/',gdot_lanes['index'].nunique(),'assigned to OSM links')

2259 / 2263 assigned GDOT attributes
869 / 1517 assigned to OSM links


## Conflate GDOT Traffic

In [19]:
osm_buffer = osm.copy()
osm_buffer.geometry = osm_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(gdot_traffic,osm_buffer)
print(intersect.shape[0],'initial matches')

# calculate hausdorff between intersected feature and original feature
merged = pd.merge(intersect,osm[['osmid','geometry']],on='osmid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# take match with minimum hausdorff distance
min_hausdorff = merged.groupby('osmid')['hausdorff_dist'].idxmin()
export2 = merged.loc[min_hausdorff]
exceed = export2['hausdorff_dist'] < 1500
print((exceed==False).sum())
export2 = export2[exceed]

# export to add back in the export network step
cols = ['osmid','AADT','truck_pct','geometry']
export2[cols].to_file(config['network_fp']/"conflation.gpkg",layer="gdot_traffic")
print(export2.shape[0],'final matches')

11951 initial matches
0
2259 final matches


In [20]:
print(export2['osmid'].nunique(),'/',osm['osmid'].nunique(),'assigned GDOT traffic attributes')
print(export2['index'].nunique(),'/',gdot_traffic['index'].nunique(),'assigned to OSM links')

2259 / 2263 assigned GDOT traffic attributes
739 / 1246 assigned to OSM links
