# Extract Cycling Infrastructure from OSM

In [1]:
from pathlib import Path
import geopandas as gpd
import pandas as pd
import json

import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

## Import each year and find potential cycling infrastructure

In [2]:
# osm_years = list(config['bicycle_facilities_fp'].glob("osm_*.gpkg"))
osm_years = list(config['osmdwnld_fp'].glob("osm_2023.gpkg"))
for osm_year in osm_years:
    print("Creating cycleway layer for",osm_year.stem)
    links = gpd.read_file(osm_year,layer='raw')

    # retrieve cycleway columns
    all_cycleway_tags = {}
    for idx, row in links.iterrows():
        #read the tags column as a dict
        tags = json.loads(row['all_tags'])
        #check for keys with cycleway mentioned
        cycleway_tags = {key:tags[key] for key in tags.keys() if "cycleway" in key}

        if len(cycleway_tags) > 0:
            all_cycleway_tags[idx] = cycleway_tags

    #add as columns to the main dataframe
    all_cycleway_tags_df = pd.DataFrame.from_dict(all_cycleway_tags,orient='index')
    links = pd.merge(links, all_cycleway_tags_df, left_index=True, right_index=True, how='left')

    # reduce to features that are most likely to be cycleways
    cycleway_or_path = links['highway'].isin(['path','cycleway'])
    cycleway_tags = (links['cycleway'].notna()) | (links.index.isin(all_cycleway_tags.keys()))
    bikes_allowed = (links['bicycle'] != 'no') & (links['bicycle'].notna())
    cycleways = links[cycleway_or_path | cycleway_tags | bikes_allowed]
    
    cycleways.to_file(osm_year,layer='cycleways')


Creating cycleway layer for osm_2023


In [3]:
all_cycleway_tags

{1140: {'cycleway:right': 'lane'},
 6794: {'cycleway:right': 'lane'},
 9101: {'cycleway:right': 'lane'},
 10252: {'cycleway:right': 'lane'},
 10253: {'cycleway:left': 'lane'},
 11811: {'cycleway:left': 'lane'},
 12529: {'cycleway:left': 'lane'},
 12560: {'cycleway:right': 'lane'},
 12576: {'cycleway:right': 'lane'},
 12578: {'cycleway:right': 'lane'},
 12602: {'cycleway:right': 'lane'},
 12629: {'cycleway:right': 'lane'},
 12630: {'cycleway:right': 'lane'},
 12759: {'cycleway:right': 'lane'},
 12837: {'cycleway:right': 'lane'},
 19351: {'cycleway:right': 'lane'},
 19748: {'cycleway:right': 'lane'}}

# Classify Cycling Infrastructure Types
Heavily inspired from [People for Bikes](https://drive.google.com/file/d/1iJtlhDbTMEPdoUngrCKL-rfSK84ib081/view)

In [4]:
list_of_df = []
gpkgs = list(config['osmdwnld_fp'].glob("osm_2023.gpkg"))
for gpkg in gpkgs:
    cycleways = gpd.read_file(gpkg,layer='cycleways')
    cycleways['year'] = gpkg.stem.split('_')[-1]
    list_of_df.append(cycleways)
all_cycleways = pd.concat(list_of_df,ignore_index=True)   
all_cycleways.to_crs(config['projected_crs_epsg'],inplace=True)

# cols_to_remove = ['access', 'area', 'bridge', 'footway','junction', 'lanes', 'lit', 'maxspeed', 'motorcar',
#        'motor_vehicle', 'psv', 'ref', 'service',
#        'segregated', 'sidewalk', 'smoothness', 'surface', 'tunnel', 'width','tags', 'osm_type', 'length', 'turn',
#        'passing_places', 'tracktype']
# all_cycleways.drop(columns=cols_to_remove,inplace=True)
#all_cycleways.to_file(config['bicycle_facilities_fp']/'cycleways.gpkg') 

Cycling infrastructure was categorized into the following types by direction:
- Sharrow (Class III)
- Bicycle Lanes (Class II):
    - None (Class II)
    - Buffered (Class II)
    - Flex posts (Class II)
- Cycletracks or Bike Lanes with Physical Separation (Class IV)
- Multi-Use Trails (Class I)
    - Includes side paths and wide sidewalks
    - Multi-use trails/shared-use paths that don't follow a road

Do by direction like the PFB methodology

In [5]:
all_cycleways['facility_fwd'] = None #facility type for the forward direction (if any)
all_cycleways['facility_rev'] = None #facility type for the reverse direction (if any)

left and right cycleway columns


In [6]:
right_cols = all_cycleways.columns[all_cycleways.columns.str.startswith("cycleway:right")].tolist()
left_cols = all_cycleways.columns[all_cycleways.columns.str.startswith("cycleway:left")].tolist()

## No Facility
Pre-assign ways that don't have a bicycle facility. If there is a seperate way (cycleway=seperate) then mark as no facility because there should be a corresponding cyclepath way.


In [7]:
no_facility_keywords = ['no','separate','none']

In [8]:
#mark all private access as no facility
all_cycleways.loc[all_cycleways['access'].isin(['no','private','customers']),['facility_fwd','facility_rev']] = 'no facility'

In [9]:
#BUG this filter won't run if any tags are present

In [10]:
all_cycleways.columns

Index(['osmid', 'timestamp', 'version', 'type', 'highway', 'oneway', 'name',
       'bridge', 'tunnel', 'cycleway', 'service', 'footway', 'sidewalk',
       'bicycle', 'foot', 'access', 'area', 'all_tags', 'geom_type',
       'cycleway:right', 'cycleway:left', 'geometry', 'year', 'facility_fwd',
       'facility_rev'],
      dtype='object')

In [11]:
add_blank_cols = ['cycleway:both','cycleway:both:buffer','cycleway:right:buffer','cycleway:left:buffer']
for blank_col in add_blank_cols:
    all_cycleways[blank_col] = None

In [12]:

not_cycleway = all_cycleways['highway'].isin(['cycleway','path']) == False
not_both = all_cycleways[['cycleway','cycleway:both']].isna().all(axis=1) | all_cycleways[['cycleway','cycleway:both']].isin(no_facility_keywords).any(axis=1)
no_right = (all_cycleways['cycleway:right'].isna() | all_cycleways['cycleway:right'].isin(no_facility_keywords))
no_left = (all_cycleways['cycleway:left'].isna() | all_cycleways['cycleway:left'].isin(no_facility_keywords)) 

all_cycleways.loc[not_cycleway & not_both & no_right & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'no facility'
all_cycleways.loc[not_cycleway & not_both & no_left & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'no facility'


## Multi-use paths and cycletracks (Class I and Class IV)
OSM doesn't have a good way to distinguish between multi-use paths and cycletracks bike facilities. Sometimes a segregation tag will be used but many of the class iv bike facilities in Atlanta are bi-directional and thus are typically drawn as separate geometries. Because of this they look identical to side-paths and multi-use trails (class i). In the older data, there are a few occurances of highway=cycleway being accompanied by cycleway=lane. Cycleway should take precedent in these cases.

**Cycletracks (aka Class IV Bike Lanes) Should Have "foot = no" OR be on a street but this isn't always consistent**

In [13]:
no_peds = all_cycleways['foot'] == 'no'
all_cycleways.loc[((all_cycleways['highway'].isin(['cycleway','path']) & no_peds) | all_cycleways['cycleway'].isin(['track','both'])) & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'cycletrack'
all_cycleways.loc[((all_cycleways['highway'].isin(['cycleway','path']) & no_peds) | all_cycleways['cycleway'].isin(['track','both'])) & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'cycletrack'

all_cycleways.loc[(all_cycleways['cycleway:left'] == 'track') & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'cycletrack'
all_cycleways.loc[(all_cycleways['cycleway:right'] == 'track') & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'cycletrack'

In [14]:
all_cycleways.loc[(all_cycleways['highway'].isin(['cycleway','path'])) & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'multi use path'
all_cycleways.loc[(all_cycleways['highway'].isin(['cycleway','path'])) & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'multi use path'

## Sharrows

In [15]:
#sharrows will have a shared_lane attribute value in the cycleway or cycleway:both column
sharrow = (all_cycleways[['cycleway','cycleway:both']] == 'shared_lane').any(axis=1)
all_cycleways.loc[sharrow & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'sharrow'
all_cycleways.loc[sharrow & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'sharrow'

#assume left = opposing direction and right = forward direction
sharrow_right = (all_cycleways["cycleway:right"] == 'shared_lane')
sharrow_left = (all_cycleways["cycleway:left"] == 'shared_lane')
all_cycleways.loc[sharrow_right & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'sharrow'
all_cycleways.loc[sharrow_left & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'sharrow'
    

## Buffered Bike Lanes

In [16]:
#buffered bike lanes
all_cycleways.loc[all_cycleways[right_cols+left_cols].isna().all(axis=1) & (all_cycleways['cycleway:both:buffer']=='yes') & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = "buffered bike lane"
all_cycleways.loc[all_cycleways[right_cols+left_cols].isna().all(axis=1) & (all_cycleways['cycleway:both:buffer']=='yes') & all_cycleways['facility_rev'].isna(),'facility_rev'] = "buffered bike lane"

# left / assume to be the reverse direction
# buffered bike lanes
all_cycleways.loc[(all_cycleways['cycleway:left'] == 'lane') & (all_cycleways['cycleway:left:buffer'] == 'yes') & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'buffered bike lane'


# right / assume to be the forward direction
# buffered bike lanes
all_cycleways.loc[(all_cycleways['cycleway:right'] == 'lane') & (all_cycleways['cycleway:right:buffer'] == 'yes') & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'buffered bike lane'


## Traditional Bike Lanes

In [17]:
#traditional painted bike lanes (shouldn't have to worry about the buffered part now)
no_left_right_attrs = (all_cycleways[right_cols+left_cols].isna().all(axis=1)) | (all_cycleways[right_cols+left_cols] == 'no').any(axis=1)
all_cycleways.loc[
    no_left_right_attrs & (all_cycleways[['cycleway:both','cycleway']].isin(['lane','yes'])).any(axis=1) & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = "bike lane"
all_cycleways.loc[
    no_left_right_attrs & (all_cycleways[['cycleway:both','cycleway']].isin(['lane','yes'])).any(axis=1) & all_cycleways['facility_rev'].isna(),'facility_rev'] = "bike lane"

#left bike lanes
all_cycleways.loc[(all_cycleways['cycleway:left'].isin(['lane','yes'])) & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'bike lane'
all_cycleways.loc[(all_cycleways['cycleway:left'] == 'opposite_lane') & all_cycleways['facility_rev'].isna(),'facility_rev'] = 'bike lane' #'contra flow bike lane'

#right bike lanes
all_cycleways.loc[(all_cycleways['cycleway:right'].isin(['lane','yes'])) & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'bike lane'
all_cycleways.loc[(all_cycleways['cycleway:right'] == 'opposite_lane') & all_cycleways['facility_fwd'].isna(),'facility_fwd'] = 'bike lane'

Check: what's still unclassified?

In [18]:
unclassifed = all_cycleways[all_cycleways['facility_fwd'].isna() | all_cycleways['facility_rev'].isna()]
unclassifed
#unclassifed.drop(columns='all_tags').explore()

Unnamed: 0,osmid,timestamp,version,type,highway,oneway,name,bridge,tunnel,cycleway,...,cycleway:right,cycleway:left,geometry,year,facility_fwd,facility_rev,cycleway:both,cycleway:both:buffer,cycleway:right:buffer,cycleway:left:buffer


# Drop no facility options

In [19]:
no_facility = (all_cycleways[['facility_fwd','facility_rev']] == 'no facility').all(axis=1)
all_cycleways = all_cycleways[no_facility==False]

# Remove Dirt Trails and Hiking Trails (included in routing but will have to mess around with speed)
- highway=path but bicycle=no or null
- OR surface=dirt/sand/unpaved (etc)

In [20]:
#filter out some of the dirt trails
def get_surface_tag(item):
    tags = json.loads(item)
    surface_tag = tags.get('surface',0)
    if surface_tag != 0:
        return surface_tag
    else:
        return None
all_cycleways['surface'] = all_cycleways['all_tags'].apply(get_surface_tag)

#all_cycleways['surface'].unique()
remove = ['gravel','log','wood','ground', 'grass', 'unpaved', 'dirt',
       'mud', 'stepping_stones', 'fine_gravel', 'brick', 'dirt/sand']
all_cycleways = all_cycleways[all_cycleways['surface'].isin([remove])==False]

#remove if bike is not allowed
bike_not_allowed = ['no','private','unkwown']
all_cycleways = all_cycleways[all_cycleways['bicycle'].isin(bike_not_allowed)==False]

#remove if highway=path and bike is na 
all_cycleways = all_cycleways[((all_cycleways['highway']=='path') & (all_cycleways['bicycle'].isna()))==False]

# Export

In [21]:
final_cycleways = all_cycleways[~all_cycleways['facility_fwd'].isna() & ~all_cycleways['facility_rev'].isna()]
final_cycleways.to_file(config['bicycle_facilities_fp'] / 'reference_layers.gpkg',layer='osm_cycleways')

In [22]:
final_cycleways[['facility_fwd','facility_rev']].value_counts()

facility_fwd    facility_rev  
multi use path  multi use path    40
bike lane       no facility       14
                bike lane          4
no facility     bike lane          3
dtype: int64