## Download and store OSM network data.
### Packages


In [27]:
# Import standard packages
import os
import sys
import geopandas as gpd
from shapely.geometry import box
import osmnx as ox
from datetime import date

### Directory & Parameters

In [7]:
# Directory
data_dir = os.path.normpath(r'..\\data')
os.makedirs(os.path.join(data_dir, 'osm_graph'), exist_ok = True)
output_dir = os.path.join(data_dir, 'osm_graph')

# Parameters
CRS = 'EPSG:4326'
CRS_METER = 'EPSG:3857'

# GIS route line geometries 
gis_lines_geojson = os.path.join(data_dir,
                                 'gis_bsrt_line',
                                 'gis_bsrt_line.geojson')

### Read data

This data should have the geometries for the universal set of routes for which we want to extract the OSM network.

In [10]:
def read_all_gis_route_lines(gis_lines_geojson,
                             crs):
    """
    Get all GIS route line geometries in the data.

    :param gis_lines_geojson: Directory of GIS bus route geojson file including file name and extension.
    :param crs: Coordinate Reference System in "EPSG:XXXX" format.
    :return: A GeoDataFrame with line geometries for all available route patterns in specified CRS.
    """

    # Import WMATA GIS bus route line geometries
    gis_routes = gpd.read_file(gis_lines_geojson)
    gis_routes = gis_routes.to_crs(crs)
    gis_routes.columns = [c.lower() if c != 'geometry' else c for c in gis_routes.columns]

    # Create "shape_id" column
    gis_routes['pattern_id'] = gis_routes['gis_routec'].str.split('_').str[-1]
    gis_routes['shape_id'] = gis_routes['route'] + gis_routes['pattern_id']
    gis_routes = gis_routes.sort_values(by='shape_id')

    # Create start and end date in YYYYMM format for schedule based selection
    gis_routes['start_date'] = gis_routes['str_date'].astype(str).str[:10].str.replace('-', '').astype(int)
    gis_routes['end_date'] = gis_routes['end_date'].astype(str).str[:10].str.replace('-', '').astype(int)

    return gis_routes.sort_values(by=['start_date'])

In [11]:
gis_routes = read_all_gis_route_lines(gis_lines_geojson, CRS)

### Create unified polygon
- Use a GeoDataFrame to create the boundary of a unified polygon. 
- Define a buffer threshold to include additional area around the actual boundary to capture nearly connected nodes in the street network. 

In [12]:
def get_unified_polygon(gdf,
                        crs,
                        buffer_threshold):
    """
    Union all geometries together and buffer the dissolved boundaries.
    Note: The buffer radius must be in the units of the coordinate reference system.

    :param gdf: A GeoDataFrame.
    :param crs: Coordinate Reference System in "EPSG:XXXX" format.
    :param buffer_threshold: Buffer distance in the unit of CRS.
    :return: A unified polygon geometry created from the given GeoDataFrame.
    """

    gdf = gdf.to_crs(crs)
    gdf.loc[:, 'geometry'] = (gdf.loc[:, 'geometry']
                              .apply(lambda x: box(x.bounds[0], x.bounds[1], x.bounds[2], x.bounds[3])))
    return gdf.unary_union.buffer(buffer_threshold)


In [15]:
polygon_geometry = get_unified_polygon(gis_routes, CRS, buffer_threshold = 0.005)

### Download OSM data
- Run this function to download and create a graph object for the given boundary. 
- The function downloads the filtered network excluding some link types that are not valid for transit route shape matching.
- If some parts of the data or the entire data are available in the cache from a previous query, the function reads the data from that cache folder bypassing the download step.
- By default, this function prints logs in the console. However, this can be controled using the `log_console` argument.
- Using the cache, it takes ~ 7 mins to create and simplyfy the graph. Downloading will take additional time and it may vary as the API requests are sent at random interval to avoid overflow. 

In [21]:
def download_osm_for_transit(polygon_geometry, log_console=True):
    """
    Download the OSM street network for given polygon
    and creates a Networkx graph object.

    :param polygon_geometry: A Shapely polygon object that defines the area for map extraction.
    :param log_console: Boolean indicator to specify if OSM logs should be printed on the console.
    :return: A Networkx graph object with street network.
    """

    custom_filter = (f'["highway"]["area"!~"yes"]["highway"!~"cycleway|footway|path|pedestrian|steps|track|'
                     f'corridor|elevator|escalator|proposed|construction|bridleway|abandoned|platform|raceway"]'
                     f'["motor_vehicle"!~"no"]["motorcar"!~"no"]'
                     f'["service"!~"emergency_access"]')

    ox.config(use_cache=True, log_console=log_console)
    return ox.graph_from_polygon(polygon_geometry,
                                 network_type='drive_service',
                                 custom_filter=custom_filter)

In [24]:
G = download_osm_for_transit(polygon_geometry, log_console = True)

### Export OSM Graph and Shapefiles
- Export the graph object of OSM street network in `.graphml` format.

In [28]:
def get_today():
    """
    Get current date.
    :return: Current date in YYYYMMDD format.
    """
    return date.today().strftime("%Y%m%d")

def export_osm_graph_shapefiles(graph_obj,
                                output_path):
    """
    Export OSM graph and nodes/edges GeoDataFrames if specified.
    :param graph_obj: A graph object of OSM street network.
    :param output_path: Output directory.
    :return: None
    """
    ox.save_graphml(graph_obj, os.path.join(output_path, f'{get_today()}_osm_graph.graphml'))
    return None

In [29]:
export_osm_graph_shapefiles(graph_obj = G, 
                            output_path = output_dir)