# Step 2 Network Reconciliation
---
New version meant to add network attributes from other networks to OSM network.

## Import Modules

In [1]:
from pathlib import Path
import geopandas as gpd
import pandas as pd
import numpy as np

import networkx as nx
from tqdm import tqdm

import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

import src.conflation_tools as conflation_tools
import src.add_attributes as add_attributes

# HERE Attributes (ignore if HERE data not available)
Want to add HERE road attributes, speed limit and number of lanes categories, to OSM


## Import OSM

In [2]:
osm = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_links')

# filter to roads
osm = osm[osm['link_type']=='road']

#add street names back to osm
#add attributes back (especially the oneway column)
osm_attrs = gpd.read_file(config['osmdwnld_fp'] / f"osm_{config['geofabrik_year']}.gpkg",layer='raw',ignore_geometry=True)
cols_to_keep = ['osmid','highway','name']
osm = pd.merge(osm,osm_attrs[cols_to_keep],on='osmid')
del osm_attrs

#format name attrbute
osm['name2'] = osm['name'].apply(lambda row: conflation_tools.remove_suffix(row))

## Import HERE


In [3]:
studyarea = gpd.read_file(config['network_fp'] / "base_layers.gpkg",layer='studyarea_bounds')
here = gpd.read_file(Path(config['here_fp']),mask=studyarea).to_crs(config['projected_crs_epsg'])

#lowercase street names
here['ST_NM_BASE'] = here['ST_NM_BASE'].str.lower()

#turn lanes to int
here['LANE_CAT'] = here['LANE_CAT'].astype(int)

#filter to just roads (no interstates)
controlled_access = (here['CONTRACC'].str.contains('Y')) | (here['RAMP'].str.contains('Y'))
road = (here['AR_AUTO'].str.contains('Y')) & (here['SPEED_CAT'].astype(int) < 7)
here = here[(controlled_access==False) & road]

#create unequal lanes column
#here['unequal_lanes'] = (here['FROM_LANES'] != here['TO_LANES']) & (here['FROM_LANES']!=0) & (here['TO_LANES']!=0)

### Modify the HERE lanes attributes
Lane category is supposed to represent the lanes per direction but occasionally it doesn't in HERE for oneway streets. 


In [4]:
nonzero = (here['TO_LANES'] != 0) | (here['FROM_LANES'] != 0)

one_lane_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) == 1)
two_lanes_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) == 2)
three_or_more_lanes_per_direction = nonzero & (here[['FROM_LANES','TO_LANES']].max(axis=1) >= 3)

here['here_lanes'] = here['LANE_CAT']
here.loc[one_lane_per_direction,'here_lanes'] = 1
here.loc[two_lanes_per_direction,'here_lanes'] = 2
here.loc[three_or_more_lanes_per_direction,'here_lanes'] = 3

#for examining
#here.loc[here['LANE_CAT'] != here['here_lanes'],['ST_NAME','FROM_LANES','TO_LANES','LANE_CAT','here_lanes','geometry']].explore()

### Modify the HERE speed limit attribute

In [5]:
# here['SPEED_CAT'].value_counts()
# based on the distribution of speed categories, four categories makes sense
# first column of comments is the new range while the second is original range
# here says these don't neccessarily represent the legal speed limit
new_here_speed_bins = {
    '1': 4, #'41+ MPH', #'> 80 MPH',
    '2': 4, #'41+ MPH', #'65-80 MPH',
    '3': 4, #'41+ MPH', #'55-64 MPH',
    '4': 4, #'41+ MPH', #'41-54 MPH',
    '5': 3, #'31-40 MPH',
    '6': 2, #'21-30 MPH', #'21-30 MPH',
    '7': 1, #'<= 20 MPH', #'6-20 MPH',
    '8': 1 #'<= 20 MPH' #'< 6 MPH'
}
here['here_speed'] = here['SPEED_CAT'].map(new_here_speed_bins)

In [6]:
#clean up column
cols_to_keep = ['LINK_ID','ST_NM_BASE', 'here_speed','here_lanes','geometry']
here = here[cols_to_keep]

## Conflate HERE Speed and Lanes

In [7]:
# buffer here
here_buffer = here.copy()
here_buffer.geometry = here_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(osm,here_buffer)
print(intersect.shape[0],'initial matches')

# name check
intersect['name_check'] = intersect.apply(lambda row: conflation_tools.name_check(row['ST_NM_BASE'],row['name2']),axis=1)
intersect.loc[intersect['name2'].isna() & intersect['ST_NM_BASE'].isna(),'name_check'] = None # If both are none change from false to None

# calculate hausdorff between intersected feature and original feature
merged = pd.merge(intersect,osm[['osm_linkid','geometry']],on='osm_linkid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# eliminate matches where the name doesn't match
export = merged[merged['name_check']!=False]

# take match with minimum hausdorff distance
min_hausdorff = export.groupby('osm_linkid')['hausdorff_dist'].idxmin()
export = export.loc[min_hausdorff]
exceed = export['hausdorff_dist'] < 1500
print((exceed==False).sum())
export = export[exceed]

# export to add later
cols = ['osm_linkid','here_speed','here_lanes','geometry']
export[cols].to_file(config['network_fp']/"conflation.gpkg",layer="here")
print(export.shape[0],'final matches')

314862 initial matches
11
58338 final matches


In [8]:
print(export['osm_linkid'].nunique(),'/',osm['osm_linkid'].nunique(),'assigned HERE attributes')
print(export['LINK_ID'].nunique(),'/',here['LINK_ID'].nunique(),'assigned to OSM links')

58338 / 65292 assigned HERE attributes
35856 / 73866 assigned to OSM links


# GDOT
Either download and store locally or read directly from the website.
- Downloads Website: https://www.dot.ga.gov/GDOT/Pages/RoadTrafficData.aspx
- Road Inventory: https://www.dot.ga.gov/DriveSmart/Data/Documents/Road_Inventory_Geodatabase.zip
- Traffic Data: https://www.dot.ga.gov/DriveSmart/Data/Documents/Traffic_GeoDatabase.zip

## Reimport OSM

In [9]:
osm = gpd.read_file(config['network_fp'] / 'networks.gpkg',layer='osm_links')
osm = osm[osm['link_type']=='road']
osm_attrs = gpd.read_file(config['osmdwnld_fp'] / f"osm_{config['geofabrik_year']}.gpkg",layer='raw',ignore_geometry=True)
cols_to_keep = ['osmid','highway','oneway','name']
osm_attrs = osm_attrs[cols_to_keep]
osm = pd.merge(osm,osm_attrs[cols_to_keep],on='osmid')
del osm_attrs

## Import GDOT data

In [11]:
studyarea = gpd.read_file(config['network_fp'] / "base_layers.gpkg",layer='studyarea_bounds')

# gdot lanes
gdot_lanes = gpd.read_file(config['gdot_road_inventory_fp'],layer='THROUGH_LANE',mask=studyarea).to_crs(config['projected_crs_epsg']).reset_index()
gdot_lanes.rename(columns={'ValueNumeric':'lanes'},inplace=True)
gdot_lanes = gdot_lanes[['index','RouteID','lanes','geometry']]

# gdot traffic
gdot_traffic = gpd.read_file(config['gdot_traffic_fp'],mask=studyarea).to_crs(config['projected_crs_epsg']).reset_index()
gdot_traffic['truck_pct'] = ((gdot_traffic['AADTT_CU_T']) / gdot_traffic['AADT'] * 100).round() # took out single unit because the pcts were too high
gdot_traffic = gdot_traffic[['index','RouteID','STATISTICS','AADT','truck_pct','geometry']]
# in the future look at whether we need to do further processing of the aadt data

# add route type
gdot_ownership = gpd.read_file(config['gdot_road_inventory_fp'],layer='GA_2021_ROUTES',ignore_geometry=True)[['RouteID','Comments']]
gdot_ownership.rename(columns={'Comments':'route_type'},inplace=True)

gdot_lanes = pd.merge(gdot_lanes,gdot_ownership,on='RouteID',how='left')
gdot_traffic = pd.merge(gdot_traffic,gdot_ownership,on='RouteID',how='left')

# drop interstates/ramps
gdot_lanes = gdot_lanes[gdot_lanes['route_type'].isin(['Interstate','Ramp'])==False]
gdot_traffic = gdot_traffic[gdot_traffic['route_type'].isin(['Interstate','Ramp'])==False]

## Conflate GDOT Lanes

In [12]:
#for gdot features, it's better to use the full osm way rather than the network version
osm = osm.dissolve('osmid').reset_index()

In [13]:
osm_buffer = osm.copy()
osm_buffer.geometry = osm_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(gdot_lanes,osm_buffer)
print(intersect.shape[0],'initial matches')

# calculate hausdorff between intersected feature and original feature
merged = pd.merge(intersect,osm[['osmid','geometry']],on='osmid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# take match with minimum hausdorff distance
min_hausdorff = merged.groupby('osmid')['hausdorff_dist'].idxmin()
export = merged.loc[min_hausdorff]
exceed = export['hausdorff_dist'] < 1500
print((exceed==False).sum())
export = export[exceed]

63074 initial matches
133


### simplify gdot lanes into lane categories


In [14]:
# for one way links
oneway = export['oneway'] == True
oneway_one_lane_per_direction = (export['lanes'] == 1) & oneway
oneway_two_lanes_per_direction = (export['lanes'] == 2) & oneway
oneway_three_or_more_lanes_per_direction = (export['lanes'] >= 3) & oneway

# for two way links
twoway_one_lane_per_direction = (export['lanes'] <= 2) & (oneway == False)
twoway_two_lanes_per_direction = (export['lanes'] >= 3) & (export['lanes'] <= 4) & (oneway == False)
twoway_three_or_more_lanes_per_direction = (export['lanes'] > 5) & (oneway == False)

# assign to a new column
export['gdot_lanes'] = np.nan
export.loc[oneway_one_lane_per_direction | twoway_one_lane_per_direction, 'gdot_lanes'] = 1
export.loc[oneway_two_lanes_per_direction | twoway_two_lanes_per_direction, 'gdot_lanes'] = 2
export.loc[oneway_three_or_more_lanes_per_direction | twoway_three_or_more_lanes_per_direction, 'gdot_lanes'] = 3

In [15]:
# export to add back in the export network step
cols = ['osmid','route_type','gdot_lanes','geometry']
export[cols].to_file(config['network_fp']/"conflation.gpkg",layer="gdot_lanes")
print(export.shape[0],'final matches')

18551 final matches


In [16]:
print(export['osmid'].nunique(),'/',osm['osmid'].nunique(),'assigned GDOT attributes')
print(export['index'].nunique(),'/',gdot_lanes['index'].nunique(),'assigned to OSM links')

18551 / 20176 assigned GDOT attributes
8935 / 13410 assigned to OSM links


## Conflate GDOT Traffic

In [17]:
osm_buffer = osm.copy()
osm_buffer.geometry = osm_buffer.buffer(100)

# intersect with osm
intersect = gpd.overlay(gdot_traffic,osm_buffer)
print(intersect.shape[0],'initial matches')

# calculate hausdorff between intersected feature and original feature
merged = pd.merge(intersect,osm[['osmid','geometry']],on='osmid')
merged['hausdorff_dist'] = merged.apply(lambda row: row['geometry_x'].hausdorff_distance(row['geometry_y']),axis=1)
merged.drop(columns=['geometry_x'],inplace=True)
merged.rename(columns={'geometry_y':'geometry'},inplace=True)
merged.set_geometry('geometry',inplace=True)

# take match with minimum hausdorff distance
min_hausdorff = merged.groupby('osmid')['hausdorff_dist'].idxmin()
export2 = merged.loc[min_hausdorff]
exceed = export2['hausdorff_dist'] < 1500
print((exceed==False).sum())
export2 = export2[exceed]

# export to add back in the export network step
cols = ['osmid','AADT','truck_pct','geometry']
export2[cols].to_file(config['network_fp']/"conflation.gpkg",layer="gdot_traffic")
print(export2.shape[0],'final matches')

61006 initial matches
102
18578 final matches


In [18]:
print(export2['osmid'].nunique(),'/',osm['osmid'].nunique(),'assigned GDOT traffic attributes')
print(export2['index'].nunique(),'/',gdot_traffic['index'].nunique(),'assigned to OSM links')

18578 / 20176 assigned GDOT traffic attributes
8358 / 12017 assigned to OSM links


In [19]:
#export.hist('AADT',bins=100)
#export['AADT'].describe()

In [20]:
#arc = gpd.read_file(config['abm_fp'],layer='DAILY')

In [None]:
# # OLD CODE
# ## Step 2 Network Reconciliation (In Development, skip for now)
# ---
# Use this notebook to setup a semi-automated reconciliation process between networks using functions available in 'conflation_tools.py' and 'network_reconcile.py.'

# In general, you want to select one network to act as the base network (ground-truth) and add network data/attributes from the other networks.

# These are the main functions in the conflation_tools module (type help(function_name) for a detailed description):
# - match_nodes: finds node pairs between base and join network
# - split_lines_create_points: Uses points from the join network to split links in the base network
# - add_split_links: add the split links

# From network_filter
# - add_ref_ids: adds new reference ids from the nodes layer

# Once finished reconciling, network can be exported for further manual reconciling or it can be prepped for network routing in BikewaySim.

# type "help(insert_name_of_function)" to get more information about what the function does.
# ## Adding and processing attribute data
# These functions add in relevant attributes from the '.pkl' created in Step 1, or process supplemental data such as bicycle inventories.

# The following three functions add attribute data back into the network and pre-processes it to match up with the desired impedance columns. These are custom network specific functions, so if adding a new network, will need to make another specific function.
# - add_osm_attr
# - add_here_attr
# - add_abm_attr
# # GDOT Project
# ---
# ### Add HERE road data to the OSM road data
# # project directory
# project_dir = Path.home() / 'Documents/BikewaySimData/Projects/gdot/networks'
# osm_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_links')
# osm_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')

# osm_links = add_attributes.add_osm_attr(osm_links,project_dir / 'osm_attr.pkl')
# # filter to roads
# osm_road_links = osm_links[osm_links['link_type']=='road']
# #osm_road_links = add_attributes.add_osm_attr(osm_road_links,project_dir / 'osm_attr.pkl')
# osm_road_nodes = osm_nodes[osm_nodes['osm_N'].isin(osm_road_links['osm_A'].append(osm_road_links['osm_B']))]
# #import here road layer
# here_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='here_links')
# here_road_links = here_links[here_links['link_type']=='road']

# #add attributes back
# here_road_links = add_attributes.add_here_attr(here_road_links,project_dir / 'here_attr.pkl')
# #function for adding attributes of one network network to another
# road_links, overlapping = conflation_tools.add_attributes(
#     osm_road_links, here_road_links, 'here', 100, 5, True)
# This block modifies the street name attribute to compare the OSM street name vs the HERE assigned street name
# street_names = dict(zip(osm_road_links['temp_ID'],osm_road_links['name']))
# overlapping['name'] = overlapping['temp_ID'].map(street_names)

# overlapping['match name'] = overlapping['ST_NAME'].apply(lambda row: conflation_tools.simplify_names(row))
# overlapping['name'] = overlapping['name'].str.lower()

# ## Use these columns to examine the match quality
# #check name
# overlapping['name_check'] = overlapping['match name'] == overlapping['name']

# #check overlap
# overlapping['overlap_check'] = overlapping['percent_overlap'] > 0.9

# #check bearing diff
# overlapping['bearing_check'] = overlapping['bearing_diff'] < 5

# #final check
# overlapping['final_check'] = overlapping.apply(lambda row: row['name_check']+row['overlap_check']+row['bearing_check'],axis=1)

# #drop 0s
# overlapping = overlapping[overlapping['final_check'] >= 1]

# #only keep max for each max
# keep = overlapping.groupby('temp_ID')['final_check'].idxmax().to_list()
# keep = overlapping.loc[keep]
# ## Add the here link id and export
# #only keep here cols
# remove_cols = set(osm_road_links.columns.tolist())
# remove_cols.remove('temp_ID')
# remove_cols = remove_cols & set(keep.columns.tolist())
# keep.drop(columns=remove_cols,inplace=True)
# # replace temp_id with the linkid
# replace_temp_id = dict(zip(osm_road_links['temp_ID'],osm_road_links['osm_linkid']))
# keep['osm_linkid'] = keep['temp_ID'].map(replace_temp_id)
# keep.drop(columns=['temp_ID'],inplace=True)
# osm_links = pd.merge(osm_links,keep,on='osm_linkid')

# # osm_links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
# # osm_nodes.rename(columns={'osm_N':'N'},inplace=True)
# osm_links.to_file(project_dir/'reconciled.gpkg',layer='links')
# #osm_nodes.to_file(project_dir/'reconciled.gpkg',layer='nodes')
# # #import bike layer
# # bike_links = merged[merged['link_type']=='bike']
# # bike_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')
# # bike_nodes = bike_nodes[bike_nodes['osm_N'].isin(bike_links['osm_A'].append(bike_links['osm_B']))]
# # Hold off on this until the right before routing, this should only be for speeding up routing
# #simplify the graph by removing interstital nodes
# #merged = conflation_tools.remove_interstitial_nodes(osm_links,'osm_A','osm_B','osmid','osm_linkid',ignore_id=False)
# # #Re-calculate the azimuth/bearing
# # import pyproj
# # prev_crs = merged.crs
# # merged.to_crs('epsg:4326',inplace=True)
# # merged[['fwd_azimuth','bck_azimuth']] = merged.apply(lambda row: modeling_turns.find_azimuth(row), axis=1)
# # merged.to_crs(prev_crs,inplace=True)

# # import network_filter

# # nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')

# # #reassign link node ids
# # ref_nodes_added = network_filter.add_ref_ids(merged,nodes,'osm')
# # #get node count to find dead ends
# # bike_nodes['num_links'] = bike_nodes['osm_N'].map(pd.concat([bike_links['osm_A'],bike_links['osm_B']],ignore_index=True).value_counts())
# # dead_ends = bike_nodes[bike_nodes['num_links']==1]

# # #remove dead ends already connected to road network
# # dead_ends = dead_ends[-dead_ends['osm_N'].isin(osm_road_nodes['osm_N'])]

# # #use full network to fix disconnected links from dead ends (road crossing breaks etc)
# # osm_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_links')
# # osm_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')
# # connectors = conflation_tools.find_path(osm_links,osm_nodes,'osm',osm_road_nodes,dead_ends,100)
# # connectors.explore()
# # links = pd.concat([osm_road_links,connectors,bike_links],ignore_index=True).drop_duplicates()
# # nodes = pd.concat([osm_road_nodes,bike_nodes],ignore_index=True)
# # #create unique link id column (make sure to find the max linkid using the full dataset)
# # max_linkid = int(links['osm_linkid'].max())
# # links.loc[links['osm_linkid'].isna(),'osm_linkid'] = range(max_linkid+1,max_linkid+links['osm_linkid'].isna().sum()+1)
# ## Remove isolated nodes/links
# # before_links = links.shape[0]
# # before_nodes = nodes.shape[0]

# # #create undirected graph
# # G = nx.Graph()  # create directed graph
# # for row in links[['osm_A','osm_B']].itertuples(index=False):
# #     # forward graph, time stored as minutes
# #     G.add_edges_from([(row[0],row[1])])

# # #only keep largest component
# # largest_cc = max(nx.connected_components(G), key=len)

# # #get nodes
# # nodes = nodes[nodes['osm_N'].isin(largest_cc)]
# # #get links
# # links = links[links['osm_A'].isin(largest_cc) & links['osm_B'].isin(largest_cc)]

# # print('Links removed:',before_links-links.shape[0],'Nodes removed:',before_nodes-nodes.shape[0])
# # links.rename(columns={'osm_A':'A','osm_B':'B','osm_linkid':'linkid'},inplace=True)
# # nodes.rename(columns={'osm_N':'N'},inplace=True)
# # #export
# # links.to_file(project_dir / 'reconciled_network.gpkg',layer='links')
# # nodes.to_file(project_dir / 'reconciled_network.gpkg',layer='nodes')
# # Assessing Bike-Transit Accessibility
# ---
# The code blocks below this are for creating a network to use for transitsim. Only uses OSM.
# # # project directory
# # project_dir = Path.home() / 'Documents/TransitSimData/Data/networks'

# # #import osm road layer
# # osm_road_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_links')
# # osm_road_links = osm_road_links[osm_road_links['link_type']=='road']
# # osm_road_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')
# # osm_road_nodes = osm_road_nodes[osm_road_nodes['osm_N'].isin(osm_road_links['osm_A'].append(osm_road_links['osm_B']))]

# # #import bike layer
# # bike_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_links')
# # bike_links = bike_links[bike_links['link_type']=='bike']
# # bike_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')
# # bike_nodes = bike_nodes[bike_nodes['osm_N'].isin(bike_links['osm_A'].append(bike_links['osm_B']))]

# # #get node count to find dead ends
# # bike_nodes['num_links'] = bike_nodes['osm_N'].map(pd.concat([bike_links['osm_A'],bike_links['osm_B']],ignore_index=True).value_counts())
# # dead_ends = bike_nodes[bike_nodes['num_links']==1]

# # #remove dead ends already connected to road network
# # dead_ends = dead_ends[-dead_ends['osm_N'].isin(osm_road_nodes['osm_N'])]

# # #use full network to fix disconnected links from dead ends (road crossing breaks etc)
# # #connectors are assigned a new unique link id before export
# # osm_links = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_links')
# # osm_nodes = gpd.read_file(project_dir / 'filtered.gpkg',layer='osm_nodes')
# # connectors = conflation_tools.find_path(osm_links,osm_nodes,'osm',osm_road_nodes,dead_ends,50)

# # #add connectors, bike links, and the new nodes
# # links = pd.concat([osm_road_links,connectors,bike_links],ignore_index=True).drop_duplicates()
# # nodes = pd.concat([osm_road_nodes,bike_nodes],ignore_index=True).drop_duplicates()
# # #create unique link id column (make sure to find the max linkid using the full dataset)
# # max_linkid = int(osm_links['osm_linkid'].max())
# # links.loc[links['osm_linkid'].isna(),'osm_linkid'] = range(max_linkid+1,max_linkid+links['osm_linkid'].isna().sum()+1)
# # #add attributes
# # links = add_osm_attr(links, project_dir / 'osm_attr.pkl')

# # #export
# # links.to_file(project_dir / 'reconciled_network.gpkg',layer='links')
# # nodes.to_file(project_dir / 'reconciled_network.gpkg',layer='nodes')
