# Download cities' street networks

Using census tigerline shapefile of 2017 US places (ie, cities and towns)

In [1]:
import config
import geopandas as gpd
import json
import logging as lg
import networkx as nx
import os
import osmnx as ox
import pandas as pd
import time

ox.config(use_cache=True,
          log_file=True,
          log_console=True,
          log_filename='download-cities',
          cache_folder=config.cities_cache_folder)

print(ox.__version__)
print(nx.__version__)

0.8.1
2.1


In [2]:
network_type = 'drive'
retain_all = True
simplify = True

input_folder = 'input_data/places' #tiger place shapefiles
output_folder_shapefile = config.cities_shapefile_folder #where to save graph shapefiles
output_folder_graphml = config.cities_graphml_folder #where to save graphml files
output_folder_lists = config.cities_lists_folder #where to save node/edge lists

## Helper functions

In [3]:
with open('input_data/states_by_fips.json') as f:
    fips_to_state = json.load(f)

In [4]:
def load_state_shapefile(state_shapefile, input_folder=input_folder):
    
    gdf = gpd.read_file('{}/{}'.format(input_folder, state_shapefile))
    
    # create the output path to save to
    state_fips = gdf['STATEFP'].unique()[0]
    state_folder = '{}_{}'.format(state_fips, fips_to_state[state_fips]['abbreviation'])
    
    # filter geometries by non-CDP, except if this is hawaii, don't, because they're all CDPs there
    if state_fips == '15':
        cities = gdf
    else:
        cities = gdf[~gdf['NAMELSAD'].str.contains('CDP')]
    
    # tigerline data is epsg:4269, but osm uses epsg:4326, so project it
    cities = cities.to_crs({'init':'epsg:4326'})
    cities = cities.sort_values(by='GEOID', ascending=True)
    
    print('{} loaded {} cities'.format(state_folder, len(cities)))
    return cities, state_folder

In [5]:
def make_queries(cities):
    
    cities['city_folder'] = cities.apply(lambda row: '{}_{}'.format(row['GEOID'],
                                                                    row['NAME']).replace(' ', '_'), axis=1)
    queries = cities.apply(lambda row: {'city_folder':row['city_folder'].replace('/', '_'),
                                        'polygon':row['geometry']}, axis=1).tolist()
    
    return queries

In [6]:
def download_save_graph(query, state_folder):
    
    output_path_shapefile = '{}/{}'.format(output_folder_shapefile, state_folder)
    output_path_graphml = '{}/{}'.format(output_folder_graphml, state_folder)
    output_path_lists = '{}/{}/{}'.format(output_folder_lists, state_folder, query['city_folder'])

    # load graph and save it if it hasn't already been saved in the output_path
    if not os.path.exists('{}/{}.graphml'.format(output_path_graphml, query['city_folder'])):
        
        polygon = query['polygon']
        city_folder = query['city_folder']
        graph_name = '{}_{}'.format(state_folder, city_folder)
        
        # fix trivially invalid geometries (nested shells, ring self-intersections, etc)
        polygon = polygon.buffer(0)

        G = ox.graph_from_polygon(polygon=polygon,
                                  network_type=network_type, 
                                  name=graph_name,
                                  simplify=simplify,
                                  retain_all=retain_all)

        save_node_edge_lists(G, output_path_lists)
        ox.save_graphml(G, folder=output_path_graphml, filename='{}.graphml'.format(city_folder))
        ox.save_graph_shapefile(G, folder=output_path_shapefile, filename=city_folder)

In [7]:
def save_node_edge_lists(G, lists_folder):
    
    # save node and edge lists as csv
    nodes, edges = ox.graph_to_gdfs(G, node_geometry=False, fill_edge_geometry=False)
    edges['length'] = edges['length'].round(3)

    ecols = ['u', 'v', 'key', 'oneway', 'highway', 'name', 'length',
             'lanes', 'width', 'est_width', 'maxspeed', 'access', 'service',
             'bridge', 'tunnel', 'area', 'junction', 'osmid', 'ref']

    edges = edges.drop(columns=['geometry']).reindex(columns=ecols)
    nodes = nodes.reindex(columns=['osmid', 'x', 'y', 'ref', 'highway'])

    if not os.path.exists(lists_folder):
        os.makedirs(lists_folder)
    nodes.to_csv('{}/node_list.csv'.format(lists_folder), index=False, encoding='utf-8')
    edges.to_csv('{}/edge_list.csv'.format(lists_folder), index=False, encoding='utf-8')

## Run it

In [8]:
# how many cities/towns are there?
gdf = gpd.GeoDataFrame()
for place in os.listdir(input_folder):
    gdf_tmp = gpd.read_file('{}/{}'.format(input_folder, place))
    gdf = gdf.append(gdf_tmp)
mask = (gdf['STATEFP'] == '15') | (~gdf['NAMELSAD'].str.contains('CDP'))
cities = gdf[mask]
len(cities)

19678

In [9]:
all_start_time = time.time()

# for each state shapefile folder in the folder of state shapefile folders
for state_shapefile in os.listdir(input_folder):
    
    start_time = time.time()
    
    # load shapefile to get the city boundaries in this state
    cities, state_folder = load_state_shapefile(state_shapefile)
    queries = make_queries(cities)
    count = 0
    
    for query in queries:
        try:
            download_save_graph(query, state_folder)
            count += 1
        except Exception as e:
            ox.log('"{}" failed: {}'.format(query['city_folder'], e), level=lg.ERROR)
            

    print('Finished making {} {} graphs in {:,.1f} seconds\n'.format(count, state_folder, time.time()-start_time))

print('All finished in {:,.1f} seconds'.format(time.time()-all_start_time))

01_AL loaded 461 cities
Finished making 461 01_AL graphs in 1.4 seconds

02_AK loaded 148 cities
Finished making 143 02_AK graphs in 0.7 seconds

04_AZ loaded 91 cities
Finished making 91 04_AZ graphs in 0.5 seconds

05_AR loaded 501 cities
Finished making 501 05_AR graphs in 0.8 seconds

06_CA loaded 482 cities
Finished making 482 06_CA graphs in 1.3 seconds

08_CO loaded 271 cities
Finished making 271 08_CO graphs in 0.6 seconds

09_CT loaded 30 cities
Finished making 30 09_CT graphs in 0.1 seconds

10_DE loaded 57 cities
Finished making 57 10_DE graphs in 0.1 seconds

11_DC loaded 1 cities
Finished making 1 11_DC graphs in 0.0 seconds

12_FL loaded 412 cities
Finished making 410 12_FL graphs in 1.7 seconds

13_GA loaded 538 cities
Finished making 538 13_GA graphs in 1.0 seconds

15_HI loaded 151 cities
Finished making 146 15_HI graphs in 0.8 seconds

16_ID loaded 201 cities
Finished making 201 16_ID graphs in 0.4 seconds

17_IL loaded 1298 cities
Finished making 1297 17_IL graphs in

In [10]:
# finished in 72,000 seconds last time