# Bicycle Facilities Reconciliation
This notebook is for determining the approximate build date (when facility was available for use) of bike facilities. See `readme.md` for a full explanation.

- Need to also conflate OSM off-street bike facilities with on street ones

In [None]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import bikewaysim.network.matching_script as matching_script
import numpy as np


In [None]:
from pyprojroot import here
here()


In [None]:
import pyprojroot


root = pyprojroot.find_root(pyprojroot.has_dir(".git"))
# import sys
# sys.path.insert(0,str(Path.cwd().parent))
# import file_structure_setup
# config = file_structure_setup.filepaths()

## Import the 2023 OSM, ARC, and CoA bicycle networks

In [None]:
coa = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='coa')
arc = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='arc')
cycleways_osm = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='osm_cycleways_network')
# cycleways_osm = cycleways_osm.loc[cycleways_osm['year']=='2023',['osmid','highway','name','facility_fwd','facility_rev','geometry']]

## Experimenting with adding multi-use paths and cycletracks to adjacent roads 

In [None]:
# TODO come back to this part at the end, focus on infra now
# multi_use_paths = cycleways_osm[cycleways_osm['facility_fwd']=='multi use path'].copy()
# multi_use_paths.geometry = multi_use_paths.buffer(100)
# # multi_use_paths.explore(popup=True)
# osmid = 123367987
# smt = multi_use_paths[multi_use_paths['osmid']==osmid]
# links = gpd.read_file(config['network_fp'] /'networks.gpkg',layer='osm_links')
# nodes = gpd.read_file(config['network_fp'] /'networks.gpkg',layer='osm_nodes')
# #try out the raw version instead
# links = links[links['link_type']=='road']

- identify features within 100 feet
- project start and end point of network link onto bike path
- clip line accordingly
- calculate hausdorff distance to judge similarity between features

In [None]:
#todo turn this into a loop and see what the outputs look like
# osm_f = 1125692554#1125692531

# #1125692544#1125692426
# test = links[links['osm_linkid']==osm_f]
# start = list(test.geometry.item().coords)[0]
# end = list(test.geometry.item().coords)[-1]

# from shapely.ops import Point, LineString


# start = Point(start)
# end = Point(end)

# start = smt_geo.project(start)
# end = smt_geo.project(end)

# print(start)
# print(end)

# start_geo = smt_geo.interpolate(start)
# end_geo = smt_geo.interpolate(end)

# import numpy as np

# points = [smt_geo.project(Point(x)) for x in list(smt_geo.coords)]

# A = (np.array(points) > start).argmax()
# B = (np.array(points) < end).argmin()
# points = list(start_geo.coords) + list(smt_geo.coords)[A:B] + list(end_geo.coords)
# #start
# LineString(points).hausdorff_distance(test.geometry.item())


# candidate = links[links.geometry.intersects(smt.geometry.item())]#.explore()

# smt_geo = cycleways_osm.loc[cycleways_osm['osmid']==osmid,'geometry'].item()

# candidate['hausdorff_distance'] = candidate['geometry'].apply(lambda x: x.hausdorff_distance(smt_geo))

# m = smt.explore(style_kwds={'color':'red','fillOpacity':0})
# candidate.explore(m=m,popup=True)

# Suggested Matches Script
Buffers the OSM cycleways and intersect it with the other bicycle inventory files. Check the names and the similarity of the intersected features to the original features. Results should be manually verified in QGIS.

### Settings

In [None]:
buffer_ft = 100 # buffer the osm cycleways by this much
max_hausdorff_dist_ft = 1500 # if above this, reject a match

In [None]:
cycleways_osm

In [None]:
#perform overlap
from importlib import reload
reload(matching_script)
coa_overlap = matching_script.suggested_matches(cycleways_osm,coa,'coa',buffer_ft,max_hausdorff_dist_ft,'osm_linkid')
arc_overlap = matching_script.suggested_matches(cycleways_osm,arc,'arc',buffer_ft,max_hausdorff_dist_ft,'osm_linkid')

print('Total Matches:')
print('coa:',coa_overlap.shape[0],'arc:',arc_overlap.shape[0])
print('Undecided:')
print('coa:',coa_overlap['auto_match'].isna().sum(),'arc:',arc_overlap['auto_match'].isna().sum())
print('Accept:')
print('coa:',(coa_overlap['auto_match']==1).sum(),'arc:',(arc_overlap['auto_match']==1).sum())
print('Reject:')
print('coa:',(coa_overlap['auto_match']==0).sum(),'arc:',(arc_overlap['auto_match']==0).sum())

#drop all the false matches
coa_overlap = coa_overlap[coa_overlap['auto_match']!=False]
arc_overlap = arc_overlap[arc_overlap['auto_match']!=False]

#use this to force select feature to false
coa_exceptions = [426,1]
arc_exceptions = []
#TODO add code to remove these

In [None]:
#export
coa_overlap.to_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='coa')
arc_overlap.to_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='arc')

# Missing Infrastructure (ID version)
Find the coa/arc features that were not considered in the overlap step

In [None]:
#load the suggested infra
coa_overlap = gpd.read_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='coa')
arc_overlap = gpd.read_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='arc')

In [None]:
#get ids of accepted matches
suggested_coa_ids = coa_overlap['coa_id'].unique().tolist()
suggested_arc_ids = arc_overlap['arc_id'].unique().tolist()

In [None]:
#import raw versions
coa = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='coa')
arc = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='arc')
#garber = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='garber')

In [None]:
#what's not covered
coa_inv = coa[coa['coa_id'].isin(suggested_coa_ids) == False].copy()
arc_inv = arc[arc['arc_id'].isin(suggested_arc_ids) == False].copy()
print(coa_inv.shape[0],'CoA and',arc_inv.shape[0],'ARC features not covered by osm cycleways')

In [None]:
coa_inv['include'] = None
coa_inv['suggested_osmid'] = None # used to indicate which osmid this should be assigned to
coa_inv['notes'] = None
coa_inv.to_file(config['bicycle_facilities_fp']/'missing.gpkg',layer='coa')

arc_inv['include'] = None
arc_inv['suggested_osmid'] = None
arc_inv['notes'] = None
arc_inv.to_file(config['bicycle_facilities_fp']/'missing.gpkg',layer='arc')

# Reference OSM network if suggested matches is incorrect

In [None]:
# raw_osm = gpd.read_file(config['project_fp']/"OSM_Download/osm_2023.gpkg",layer='raw')

# raw_osm['suggested_arc_match'] = None
# raw_osm['suggested_coa_match'] = None
# raw_osm['notes'] = None

# raw_osm.to_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer="osm_2023")
# del raw_osm

In [None]:
# network_osm = gpd.read_file(config['project_fp']/"Network/networks.gpkg",layer='osm_links')

# network_osm['suggested_arc_match'] = None
# network_osm['suggested_coa_match'] = None
# network_osm['notes'] = None

# network_osm.to_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer="osm_network_2023")
# del network_osm

# Add dates to OSM cycleways network

In [None]:
coa_overlap = gpd.read_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='coa')
arc_overlap = gpd.read_file(config['bicycle_facilities_fp']/'suggested_matches.gpkg',layer='arc')

coa_cols = [x for x in coa_overlap.columns if 'coa_' in x] + [
    'osm_linkid','auto_match', 'manual_match',
    'notes', 'name_check', 'one_to_many', 'hausdorff_dist']

# arc_cols = [x for x in coa_overlap.columns if 'arc_' in x]
merged = pd.merge(coa_overlap[coa_cols],arc_overlap,on='osm_linkid',how='outer')
# merged.columns = ['coa_'+ x.split('_x')[0] if '_x' in x else x for x in merged.columns]
# merged.columns = ['arc_'+ x.split('_y')[0] if '_y' in x else x for x in merged.columns]

merged = gpd.GeoDataFrame(merged,crs=config['projected_crs_epsg'])
# coa_cols = [ x if ('coa_' in x) & (x != 'osmid') else 'coa_' + x for x in coa_cols]

#initialize year column
merged['year'] = np.nan

# if both years are the same then assign that year
same_years= merged['coa_year'] == merged['arc_year']
merged.loc[same_years,'year'] = merged['coa_year']

# if difference between facility types, choose the one that matches up to OSM
mismatch =  (merged['coa_osm_type'] != merged['arc_osm_type'])
coa_matches = merged['coa_osm_type'] == merged['facility']
arc_matches = merged['arc_osm_type'] == merged['facility']
merged.loc[mismatch & coa_matches & merged['year'].isna(),'year'] = merged['coa_year']
merged.loc[mismatch & arc_matches & merged['year'].isna(),'year'] = merged['arc_year']

# figuring out the right date
# if available, take the coa date (they tend to be more accurate)
coa_notna = merged['coa_year'].notna()
arc_notna = merged['arc_year'].notna()
merged.loc[merged['coa_year'].notna() & merged['year'].isna(),'year'] = merged['coa_year']
merged.loc[merged['arc_year'].notna() & merged['year'].isna(),'year'] = merged['arc_year']

# merge with the osm data
cycleways_osm = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='osm_cycleways_network')
cycleways_osm = pd.merge(cycleways_osm,merged[['osm_linkid','year']],how='left')

In [None]:
cycleways_osm.columns

In [None]:
date_override = {
    22939: 2019,
    162250: 2019
}
date_override = cycleways_osm['osm_linkid'].map(date_override).dropna()
cycleways_osm.loc[date_override.index.tolist(),'year'] = date_override

In [None]:
# date override
missing_dates_dict = {
    9183807: 2013,#'2/2013', # maybe put month and date if available?
    1118729510: 2017,
    1118729511: 2017,  
}
missing_dates = cycleways_osm.loc[cycleways_osm['year'].isna(),'osmid']
missing_dates = missing_dates.map(missing_dates_dict).dropna()
cycleways_osm.loc[missing_dates.index.tolist(),'year'] = missing_dates

#use this to find what osm features still need dates
cycleways_osm_full = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='osm_cycleways_full')
nodate = set(cycleways_osm.loc[cycleways_osm['year'].isna(),'osmid'].tolist())
hasdate = set(cycleways_osm.loc[cycleways_osm['year'].notna(),'osmid'].tolist())
# remove features that have at least one date
# nodate = nodate - hasdate
cycleways_osm_full = cycleways_osm_full[cycleways_osm_full['osmid'].isin(nodate)]

In [None]:
# connector links
# these connect off-street infra back to the street (not really infra)
connector_links = [903328316,903328313]

# export


In [None]:
from importlib import reload
reload(matching_script)
matching_script.remove_suffix('M.L.K. Jr.')

In [None]:
cycleways_osm.to_file(config['bicycle_facilities_fp']/'osm_cycleways_w_dates.gpkg',layer='dates_network')
cycleways_osm_full.to_file(config['bicycle_facilities_fp']/'osm_cycleways_w_dates.gpkg',layer='no_dates')

In [None]:
cycleways_osm[cycleways_osm['year'].isna()].explore()

In [None]:
# TODO use this to add in new dates


In [None]:
coa_overlap[['coa_osm_type','facility']].value_counts()

In [None]:
#mismatching
# reorder = ['coa_osm_type','arc_osm_type','coa_year','arc_year','osmid','name','facility','hausdorff_dist_x','hausdorff_dist_y','geometry']
# merged.loc[(merged['coa_osm_type'] != merged['arc_osm_type']) & merged[['coa_osm_type','arc_osm_type']].notna().all(axis=1),reorder].to_file(config['bicycle_facilities_fp']/'scratch.gpkg',layer='mismatch')
# merged[reorder].to_file(config['bicycle_facilities_fp']/'scratch.gpkg',layer='test')

In [None]:
# cycleways_osm_coa_arc[cycleways_osm_coa_arc[['arc_year','coa_year']].notnull().any(axis=1)].explore()

# Add reference osm files

In [None]:
# years = [2014,2015,2016,2023]

# for year in years:
#     raw_osm = gpd.read_file(Path(config['project_directory'])/f"OSM_Download/osm_{year}.gpkg",layer='raw')
#     raw_osm.to_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer=f"osm_{year}")
#     del raw_osm

# Create a version of OSM for editing and adding data to (only run once)

In [None]:
# osm = gpd.read_file(config['bicycle_facilities_fp']/'reference_layers.gpkg',layer='osm_2023')
# overwrite = False
# if overwrite:
#     osm.to_file(config['bicycle_facilities_fp']/'osm_edit.gpkg')

In [None]:
# <!-- # Missing Infrastructure (Geometry Version)
# buffer_ft = 100
# overwrite_diff = False
# confirm_diff = False
# # import the 2023 OSM bicycle network
# cycleways_osm = gpd.read_file(config['bicycle_facilities_fp']/'osm_cycleways.gpkg')
# cycleways_osm = cycleways_osm.loc[cycleways_osm['year']=='2023']

# # get unary union of all features after buffering
# cycleways_osm_all = cycleways_osm.buffer(buffer_ft).unary_union
# arc_diff = arc[arc.geometry.intersects(cycleways_osm_all) == False]
# coa_diff = coa[coa.geometry.intersects(cycleways_osm_all) == False]
# garber_diff = garber[garber.geometry.intersects(cycleways_osm_all) == False]
# overwrite = False
# if overwrite:
#     coa_diff['valid_difference'] = None
#     coa_diff['notes'] = None
#     coa_diff.to_file(config['bicycle_facilities_fp']/'differences.gpkg',layer='coa')
    
#     arc_diff['valid_difference'] = None
#     arc_diff['notes'] = None
#     arc_diff.to_file(config['bicycle_facilities_fp']/'differences.gpkg',layer='arc')

#     garber_diff['valid_difference'] = None
#     garber_diff['notes'] = None
#     garber_diff.to_file(config['bicycle_facilities_fp']/'differences.gpkg',layer='garber')

# raw_osm = gpd.read_file(Path(config['project_directory'])/f"OSM_Download/osm_{config['geofabrik_year']}.gpkg",layer='raw')
# raw_osm.to_crs(config['projected_crs_epsg'],inplace=True)
# final_confirm = False
# if (overwrite_check(overwrite_diff,confirm_diff) == True) & (final_confirm == True):
#     raw_osm['arc_feature_id'] = None
#     raw_osm['coa_feature_id'] = None
#     raw_osm['garber_feature_id'] = None
#     raw_osm.to_file(config['bicycle_facilities_fp']/'differences.gpkg',layer='osm_edit')
# # raw_osm = gpd.read_file(Path(config['project_directory'])/f"OSM_Download/osm_{config['geofabrik_year']}.gpkg",layer='raw')
# # raw_osm.to_crs(config['projected_crs_epsg'],inplace=True)
# # final_confirm = True
# # if (overwrite_check(overwrite_diff,confirm_diff) == True) & (final_confirm == True):
# #     raw_osm['arc_feature_id'] = None
# #     raw_osm['coa_feature_id'] = None
# #     raw_osm['garber_feature_id'] = None
# #     raw_osm.to_file(config['bicycle_facilities_fp']/'differences.gpkg',layer='osm_edit')
# # #based on the 2023-01-01 Geofabrik Georgia Extract
# # #osm = gpd.read_file(Path(config['project_directory'])/'Network/networks.gpkg',layer='osm_links')
# # osm = gpd.read_file(Path(config['project_directory'])/f"OSM_Download/osm_2023.gpkg",layer='raw',ignore_geometry=True)
# # #osm = pd.merge(osm,raw_osm,on='osmid',how='left')

# # #create new fields for install dates
# # osm['install_year'] = None
# # osm['install_month'] = None
# # osm['install_day'] = None

# # #create new fields for updated fwd and rev infra types
# # osm['facility_fwd'] = None
# # osm['facility_rev'] = None

# # #create field for notes
# # osm['notes'] = None

# # #create field for edit date
# # osm['last_edited'] = None -->