# OSM Downloading

## Import Packages

In [None]:
import os
import geopandas as gpd
import numpy as np
import pandas as pd
import pickle
from pathlib import Path
import networkx as nx
import matplotlib.pyplot as plt
import contextily as cx


## Import custom modules

In [None]:
from osm_dwnld import * #contains functions for downloaded data from OSMnx and Overpass API

## Choose Study Area

In [None]:
#give filepath of a shapefile/geojson/geopackage and study area name

studyarea_fp = Path.home() / Path('Documents/NewBikewaySimData/Data/Study Areas/bikewaysim_studyarea.geojson')
studyarea_name = 'bikewaysim'
crs = "epsg:2240"
export_fp = Path.home() / Path('Documents/NewBikewaySimData/Data')

#studyareafp = r'base_shapefiles/bikewaysim_study_area/bikewaysim_study_area.shp'
#studyareaname = 'bikewaysim'

#studyareafp = r'base_shapefiles/studyareas/ITS4US_StudyBoundary.shp'
#studyareaname = 'its4us'

#studyareafp = r'base_shapefiles/coa/Atlanta_City_Limits.shp'
#studyareaname = 'coa'

#studyareafp = r'base_shapefiles/arc/arc_bounds.shp'
#studyareaname = 'arc'

#studyareafp = r'base_shapefiles/studyareas/marta.gpkg'
#studyareaname = 'marta'

#studyareafp = r'base_shapefiles/studyareas/atl_district5.gpkg'
#studyareaname = 'atl_district5'

#read in study area polygon and convert to WGS 84
gdf = gpd.read_file(studyarea_fp).to_crs(epsg=4326)
ax = gdf.plot(figsize=(10,10),alpha=0.5,edgecolor='k')
cx.add_basemap(ax, crs=gdf.crs)

In [None]:
export_dir = export_fp / Path(f'osm')

#create osm folder if it doesn't exist already
if not export_dir.exists():
    export_dir.mkdir()

## Download OSM geometry using OSMnx and attribute data using Overpass API
More info about OSMnx: https://geoffboeing.com/2016/11/osmnx-python-street-networks/  
OSMnx documentation: https://osmnx.readthedocs.io/en/stable/index.html

In [None]:
osm_nodes, osm_links = download_osm(gdf)

In [None]:
osm_nodes.head()

In [None]:
osm_links.head()

### Which columns to keep?
1. Check for commpletion
1. Check for desired attributes (speed, bike lanes, etc.)
1. Filter to just those

In [None]:
#project to desired crs
network_length = osm_links.to_crs(crs).length
total_network_length = osm_links.to_crs(crs).length.sum()

#get pct complete weighted by total distance
attr_completion = osm_links.notna().apply(lambda col: col*network_length,axis=0).sum() / total_network_length
attr_completion.name = 'prop_complete'

#retrieve all complete fields
complete = list(attr_completion[attr_completion>0.95].index)
less_complete = list(attr_completion[attr_completion<=0.95].index)

#export attr completion csv
attr_completion.to_csv(export_fp / Path(f'osm/{studyarea_name}_attr_completion.csv'))

### Most attributes in OSM are empty

In [None]:
# Make boxplot
marks = np.array(attr_completion)

fig, axis = plt.subplots(figsize =(10, 5))
axis.hist(marks, bins = np.array([x for x in range(0, 110, 10)]) / 100)
plt.xlabel('Proportion of Attribute Complete')
plt.ylabel('Frequency')
# Displaying the graph
plt.show()

### Examine which attributes are present
Try 'maxspeed' or 'lanes'

In [None]:
#others to check: 'cycleway','lanes'
attributes_to_check = ['maxspeed']
osm_links.loc[osm_links[attributes_to_check].notna().any(axis=1),attributes_to_check+['geometry']].explore()

## Look at value counts to look at possible attribute values

In [None]:
osm_links['lanes'].value_counts()

# Attributes to Export (reduces size of geopackage)
Add more as needed

In [None]:
# road attributes
road_columns = ['name','highway_1','oneway','lanes','maxspeed','bridge','crossing','sidewalk']

# parking attributes
parking_columns = [x for x in osm_links.columns.to_list() if 'parking' in x]

# bike attributes
bike_columns = [x for x in osm_links.columns.to_list() if (('cycle' in x) | ('bike' in x)) & ('motorcycle' not in x)]
foot_columns = [x for x in osm_links.columns.to_list() if ('foot' in x)]
bike_columns = bike_columns + foot_columns + ['lit']

In [None]:
osm_export = osm_links[complete+bike_columns+road_columns+parking_columns]

#remove columns with lists in them (handle these later)
for col in osm_export.columns.tolist():
    if list in [type(x) for x in osm_links.loc[:,col]]:
        osm_export.drop(columns=col,inplace=True)
        print(f"{col} column removed...")   

# Filter and Export

In [None]:
osm_nodes.to_file(export_fp / Path(f'osm/{studyarea_name}.gpkg'),layer='nodes')
osm_export.to_file(export_fp / Path(f'osm/{studyarea_name}.gpkg'),layer='links')

# Just Bike Infrastructure

In [None]:
#create copy
bike_lanes = osm_links.copy()

#drop no bicycle
bike_lanes = bike_lanes[bike_lanes['bicycle']!='no']
bike_lanes = bike_lanes[bike_lanes['lit']!='no']

#drop shared lanes
not_sharedlane = (bike_lanes[bike_columns] != 'shared_lane').all(axis=1)
bike_lanes = bike_lanes.loc[not_sharedlane,:]

#remove sidewalks unless bikes explicitly allowed
remove_sidewalks = (bike_lanes['footway'].isin(['sidewalk','crossing'])) & (bike_lanes['bicycle'] != 'yes')
bike_lanes = bike_lanes[-remove_sidewalks] 

#drop the bicycle column
#bike_lanes.drop(columns=['bicycle','maxspeed:bicycle'],inplace=True)

#filter on bike specific columns
#drop_na = bike_lanes[filt].notna().any(axis=1)
#not_no = (bike_lanes[filt] != 'no').all(axis=1)


#bike_lanes = bike_lanes.loc[drop_na & not_no & not_sharedlane,filt+['geometry','highway']].copy()
#bike_lanes = bike_lanes.loc[:,filt+['geometry','highway','highway_1']]

#specific bike columns

#drop the bicycle yes values
#bike_lanes = bike_lanes[bike_lanes['bicycle']!='yes']

#need beltline and stuff
bike_lanes.explore()

## Pickle all the attributes

In [None]:
with (export_fp / Path(f'osm/{studyarea_name}_all_attr.pkl')).open('wb') as fh:
        pickle.dump(osm_links,fh)

Remove columns with unsupported datatypes

In [None]:
for col in osm_links.columns.tolist():
    if list in [type(x) for x in osm_links.loc[:,col]]:
        osm_links.drop(columns=col,inplace=True)
        print(f"{col} column removed...")        

#### Preprocess
- Remove controlled access roads
- Remove sidewalks and crosswalks unless they explicitly allow bikes

# Quick Export
- Remove restricted access roads
- Remove sidewalks

In [None]:
#remove restricted access roads + sidewalks
restr_access = links['highway'].isin(['motorway','motorway_link'])
links = links[-restr_access]

#remove sidewalks unless bikes explicitly allowed
remove_sidewalks = (links['footway'].isin(['sidewalk','crossing'])) & (links['bicycle'] != 'yes')
links = links[-remove_sidewalks]    

#find service links that still have a name
service_links_with_name = links[ (links['highway'] == 'service') & (links['name'].isnull() == False) ]

osm_filter_method = ['primary','primary_link','residential','secondary','secondary_link',
                    'tertiary','tertiary_link','trunk','trunk_link','cycleway','footway','path','pedestrian','steps'] 

links = links[links["highway"].isin(osm_filter_method)]

#add back in service links with a name
links = links.append(service_links_with_name)

#remove nodes that aren't in filtered links
nodes = osm_links_filt['A'].append(osm_links_filt['B']).unique()
osm_nodes_filt = osm_nodes[osm_nodes['osmid'].isin(nodes)]

#export these two

Sometimes, there are duplicate attribute names in OSM. The below code drops duplicate column names by first appearance

In [None]:
#get rid of duplicate columns names
#overpass_links = overpass_links.iloc[:,overpass_links.columns.str.lower().duplicated(keep='first') == False]
