# Step 0: OSM Network Download
Usually best to start here. OSM data is downloaded from Geofabrik in the '.pbf' format to ensure that all tags are retrieved. The OSM data is then turned into network routing format. Lastly, OSM cycling infrastructure is extracted.

In [1]:
from pathlib import Path
import geopandas as gpd
import osmnx as ox
import json
import fiona
from shapely.ops import LineString, Point
import pandas as pd

config = json.load((Path.cwd().parent / 'config.json').open('rb'))

#notebook variables
export_fp = Path(config['project_directory']) / 'OSM_Download'
if export_fp.exists() == False:
    export_fp.mkdir()

# Download
Download all available Geofabrik '.pbf' extracts for desired US states. Dowloads all availble '.pbf' files with filename pattern statename-YY0101.osm.pbf because these are the versions that are archived by Geofabrik. Use * to specify all years, provide a list of years, or just enter one year.

https://download.geofabrik.de/north-america/us/georgia.html

In [2]:
#TODO script this section

Specify Download Location of Geofabrik Extracts

## Import the Study area

In [3]:
studyarea_geo = gpd.read_file(config['studyarea']).to_crs('epsg:4326')#.unary_union

#export to geojson
studyarea_geo.to_file(Path(config['geofabrik_fp'])/'studyarea.geojson')

# Osmnium Scripts
In these next steps, we'll have to exit the notebook to run some lines of code on conda command line interface. A example ".bat" and ".sh" script have been prepared for running all the years/locations downloaded.

- Install osmium in its own conda environment:
    - `conda create --name osmium && conda install -c conda-forge osmium-tool`

- Change directory to where you downloaded the geofrabrik extracts:
    - `cd /D D:\RAW\OSM\Geofabrik_GA_Extracts` (for windows/mac/linux)

- If windows run the osmium.bat script

- If mac/linux run the osmium.sh script

## For each downloaded extract, clip by study area, then create a raw gpkg version. This takes a while.

In [4]:
osm_extracts = list(Path(config['geofabrik_fp']).glob('georgia-*.geojson'))

In [5]:
osm_extracts

[WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-140101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-150101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-160101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-170101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-180101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-200101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-210101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-220101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-230101.geojson'),
 WindowsPath('D:/RAW/OSM/Geofabrik_GA_Extracts/georgia-240101.geojson')]

In [6]:
for osm_extract in osm_extracts:

    #get the date
    extract_date = osm_extract.parts[-1].split('-')[1].split('.')[0][0:2]

    #import the geojson file
    with osm_extract.open('rb') as fh:
        file_contents = fh.read()
        raw_links = json.loads(file_contents)['features']

    #list for compiling the geodataframe
    gpkg = []

    included_tags = ['@id','@timestamp','@version','@type',
                    'highway','oneway','name',
                    'bridge','tunnel',
                    'cycleway','service',
                    'footway','sidewalk',
                    'bicycle','foot','access','area']

    for item in raw_links:
        all_tags = item['properties'].copy()
        
        tag_values = []
        
        for tag in included_tags:
            tag_value = all_tags.get(tag,None)     
            tag_values.append(tag_value)
            if tag_value is not None:
                all_tags.pop(tag)
        geom = item['geometry']['coordinates']
        geom_type = item['geometry']['type']

        if geom_type == 'LineString':
            geom = LineString(geom)
        elif geom_type == 'MultiPolygon':
            geom = LineString(geom[0][0])
        elif geom_type == 'Point':
            geom = Point(geom)
        
        gpkg.append([*tag_values,all_tags,geom_type,geom])

    raw_df = pd.DataFrame(gpkg,columns=[*included_tags,'all_tags','geom_type','geometry'])   
    raw_gdf = gpd.GeoDataFrame(raw_df,crs='epsg:4326')

    raw_gdf.rename(columns={
        '@id':'osmid',
        '@timestamp':'timestamp',
        '@version':'version',
        '@type':'type',
    },inplace=True)

    #seperate ways from nodes
    raw_links = raw_gdf[raw_gdf['type']=='way']
    raw_nodes = raw_gdf[raw_gdf['type']=='node']

    #drop duplicate ways (when a way forms a polygon it seems to keep both?)
    raw_links = raw_links[(raw_links['osmid'].duplicated(keep=False) & (raw_links['geom_type']=='MultiPolygon'))==False]

    #remove the areas (pedestrian plazas, not needed for routing)
    raw_links = raw_links[raw_links['area']!='yes']

    #export
    raw_links.to_file(export_fp/f'osm_20{extract_date}.gpkg',layer='raw')
    raw_nodes.to_file(export_fp/f'osm_20{extract_date}.gpkg',layer='highway_nodes')

## Now for the network version

Now use osmnx to import and simplify

In [5]:
# DONT USE: doesn't compile the full way correctly
# G = ox.graph.graph_from_xml(Path(config['geofabrik_fp'])/'studyarea_roads.osm',simplify=False)
# G = ox.convert.to_undirected(G)
# nodes, links = ox.convert.graph_to_gdfs(G)

# links.to_file(export_fp/f"osm_{config['geofabrik_year']}.gpkg",layer='raw')
# del nodes
# del links

In [6]:
G = ox.graph.graph_from_xml(Path(config['geofabrik_fp'])/'georgia-230101.osm',simplify=False)

#simplify graph unless different osm ids
#can change columns to change this behavior (i.e., )
G = ox.simplification.simplify_graph(G, edge_attrs_differ=['osmid'])

# Bug:
When converting to undirected, it doesn't retain the original direction of the OSM way which poses an issue later on in the elevation assignment step.

Proposed fix: if we had a list of the osm node ids, we could figure out the sequence used and know whether to reverse the link or not. Can we read the XML to do this?

Easier fix: It only matters for elevation, so maybe just try reversing the link order first

In [7]:
G = ox.convert.to_undirected(G)

In [8]:
nodes, links = ox.convert.graph_to_gdfs(G)
links.reset_index(inplace=True)
nodes.reset_index(inplace=True)

Fix link direction: we want to make sure that the network links follow the same geoemtry as the original links. Use the start and ending points to do this.

In [9]:
#add attributes from raw?
raw = gpd.read_file(export_fp/f"osm_{config['geofabrik_year']}.gpkg",layer="raw",ignore_geometry=True)
#raw.drop(columns=["length"],inplace=True)

In [10]:
import pandas as pd
links = pd.merge(links[['osmid','geometry']],raw,on="osmid")

Remove duplicated links using the assigned start and end nodes and linkid

In [11]:
# import sys
# sys.path.append(config["code_directory"])
# from network.src.network_filter import remove_directed_links

# links = remove_directed_links(links,"u","v","osmid")

In [12]:
links.to_file(export_fp/f"osm_{config['geofabrik_year']}.gpkg",layer='edges')
nodes.to_file(export_fp/f"osm_{config['geofabrik_year']}.gpkg",layer='nodes')


In [13]:
# #get the last two digits for year
# year = config['geofabrik_year'][-2::]
# extract_fp = list(Path(config['geofabrik_fp']).glob(f'*-{year}0101.osm.pbf'))[0]
# extract_fp
# fileyear = extract_fp.stem.split('-')[1][0:2]
# print(f"Processing 20{fileyear} data")
# osm = OSM(str(extract_fp), bounding_box=studyarea_geo)

# #OSMNX Simplification Next
# nodes, edges = osm.get_network(network_type='all',nodes=True)
# G = osm.to_graph(nodes,edges,graph_type='networkx')

# #simplify graph unless different osm ids
# #can change columns to change this behavior (i.e., )
# G = ox.simplification.simplify_graph(G, edge_attrs_differ=['osmid'])

# #remove directed links (do this seperately)
# #this doesn't seem to be working at the moment
# #G = ox.convert.to_undirected(G)

# #this will create an unsimplified graph
# nodes, links = ox.convert.graph_to_gdfs(G)
# del G

# links.reset_index(inplace=True)
# nodes.reset_index(drop=True,inplace=True)

In [14]:
#deprecated version that use pyrosm but ditched becuase it edited the osm geometry
# extract_fps = list(Path(config['geofabrik_fp']).glob('*.osm.pbf'))
# extract_fps
# osm = OSM(str(extract_fps[-2]), bounding_box=studyarea_geo)

# #6 mins for current network
# #older versions take less time
# raw_edges = osm.get_network(network_type='all')

# #export raw ways
# raw_edges.to_file(export_fp/f"osm_2023.gpkg",layer="raw")
# this function appears to modify the osm geometry somewhat
# for extract_fp in extract_fps:
#     fileyear = extract_fp.stem.split('-')[1][0:2]
#     print(f"Processing 20{fileyear} data")
#     osm = OSM(str(extract_fp), bounding_box=studyarea_geo)
    
#     #6 mins for current network
#     #older versions take less time
#     raw_edges = osm.get_network(network_type='all')

#     #export raw ways
#     raw_edges.to_file(export_fp/f"osm_20{fileyear}.gpkg",layer="raw")
    
#     del raw_edges
#     del osm