# Download tracts' street networks

Using census tigerline shapefile of 2017 US census tracts

In [1]:
import config
import geopandas as gpd
import json
import logging as lg
import networkx as nx
import os
import osmnx as ox
import pandas as pd
import time

ox.config(use_cache=True,
          log_file=True,
          log_console=True,
          log_filename='download-tracts',
          cache_folder=config.tracts_cache_folder)

print(ox.__version__)
print(nx.__version__)

0.8.1
2.1


In [2]:
network_type = 'drive'
retain_all = True
simplify = True

input_folder = 'input_data/tracts' #tiger tract shapefiles
output_folder_shapefile = config.tracts_shapefile_folder #where to save graph shapefiles
output_folder_graphml = config.tracts_graphml_folder #where to save graphml files
output_folder_lists = config.tracts_lists_folder #where to save node/edge lists

## Helper functions

In [3]:
with open('input_data/states_by_fips.json') as f:
    fips_to_state = json.load(f)

In [4]:
def load_state_shapefile(state_shapefile, input_folder=input_folder):
    
    tracts = gpd.read_file('{}/{}'.format(input_folder, state_shapefile))
    
    # create the output path to save to
    state_fips = tracts['STATEFP'].unique()[0]
    state_folder = '{}_{}'.format(state_fips, fips_to_state[state_fips]['abbreviation'])
    
    ## drop the aleutian islands tract because it crosses the 180th meridian
    tracts = tracts[tracts['GEOID'] != '02016000100']

    # tigerline data is epsg:4269, but osm uses epsg:4326, so project it
    tracts = tracts.to_crs({'init':'epsg:4326'})
    tracts = tracts.sort_values(by='GEOID', ascending=True)
    
    print('{} loaded {} tracts'.format(state_folder, len(tracts)))
    return tracts, state_folder

In [5]:
def make_queries(tracts):
    
    tracts['tract_folder'] = tracts['GEOID']
    queries = tracts.apply(lambda row: {'tract_folder':row['tract_folder'].replace('/', '_'),
                                        'polygon':row['geometry']}, axis=1).tolist()
    
    return queries

In [6]:
def download_save_graph(query, state_folder):
    
    output_path_shapefile = '{}/{}'.format(output_folder_shapefile, state_folder)
    output_path_graphml = '{}/{}'.format(output_folder_graphml, state_folder)
    output_path_lists = '{}/{}/{}'.format(output_folder_lists, state_folder, query['tract_folder'])

    # load graph and save it if it hasn't already been saved in the output_path
    if not os.path.exists('{}/{}.graphml'.format(output_path_graphml, query['tract_folder'])):
        
        polygon = query['polygon']
        tract_folder = query['tract_folder']
        graph_name = '{}_{}'.format(state_folder, tract_folder)
        
        # fix trivially invalid geometries (nested shells, ring self-intersections, etc)
        polygon = polygon.buffer(0)

        G = ox.graph_from_polygon(polygon=polygon,
                                  network_type=network_type, 
                                  name=graph_name,
                                  simplify=simplify,
                                  retain_all=retain_all)

        save_node_edge_lists(G, output_path_lists)
        ox.save_graphml(G, folder=output_path_graphml, filename='{}.graphml'.format(tract_folder))
        ox.save_graph_shapefile(G, folder=output_path_shapefile, filename=tract_folder)

In [7]:
def save_node_edge_lists(G, lists_folder):
    
    # save node and edge lists as csv
    nodes, edges = ox.graph_to_gdfs(G, node_geometry=False, fill_edge_geometry=False)
    edges['length'] = edges['length'].round(3)

    ecols = ['u', 'v', 'key', 'oneway', 'highway', 'name', 'length',
             'lanes', 'width', 'est_width', 'maxspeed', 'access', 'service',
             'bridge', 'tunnel', 'area', 'junction', 'osmid', 'ref']

    edges = edges.drop(columns=['geometry']).reindex(columns=ecols)
    nodes = nodes.reindex(columns=['osmid', 'x', 'y', 'ref', 'highway'])

    if not os.path.exists(lists_folder):
        os.makedirs(lists_folder)
    nodes.to_csv('{}/node_list.csv'.format(lists_folder), index=False, encoding='utf-8')
    edges.to_csv('{}/edge_list.csv'.format(lists_folder), index=False, encoding='utf-8')

## Run it

In [8]:
# how many tracts are there?
gdf = gpd.GeoDataFrame()
for tract in os.listdir(input_folder):
    gdf_tmp = gpd.read_file('{}/{}'.format(input_folder, tract))
    gdf = gdf.append(gdf_tmp)
mask = (gdf['STATEFP'] == '15') | (~gdf['NAMELSAD'].str.contains('CDP'))
tracts = gdf[mask]
len(tracts)

74133

In [9]:
all_start_time = time.time()

# for each state shapefile folder in the folder of state shapefile folders
for state_shapefile in os.listdir(input_folder):
    
    if state_shapefile[8:10] in fips_to_state:
        
        start_time = time.time()

        # load shapefile to get the tract boundaries in this state
        tracts, state_folder = load_state_shapefile(state_shapefile)
        queries = make_queries(tracts)
        count = 0

        for query in queries:
            try:
                download_save_graph(query, state_folder)
                count += 1
            except Exception as e:
                ox.log('"{}" failed: {}'.format(query['tract_folder'], e), level=lg.ERROR)


        print('Finished making {} {} graphs in {:,.1f} seconds\n'.format(count, state_folder, time.time()-start_time))
        
    else:
        print('skipping {}'.format(state_shapefile))

print('All finished in {:,.1f} seconds'.format(time.time()-all_start_time))

01_AL loaded 1181 tracts
Finished making 1179 01_AL graphs in 1.8 seconds

02_AK loaded 166 tracts
Finished making 166 02_AK graphs in 0.4 seconds

04_AZ loaded 1526 tracts
Finished making 1526 04_AZ graphs in 0.8 seconds

05_AR loaded 686 tracts
Finished making 686 05_AR graphs in 0.8 seconds

06_CA loaded 8057 tracts
Finished making 8026 06_CA graphs in 46.1 seconds

08_CO loaded 1249 tracts
Finished making 1246 08_CO graphs in 1.2 seconds

09_CT loaded 833 tracts
Finished making 827 09_CT graphs in 1.9 seconds

10_DE loaded 218 tracts
Finished making 215 10_DE graphs in 1.7 seconds

11_DC loaded 179 tracts
Finished making 179 11_DC graphs in 0.1 seconds

12_FL loaded 4245 tracts
Finished making 4187 12_FL graphs in 17.1 seconds

13_GA loaded 1969 tracts
Finished making 1963 13_GA graphs in 1.8 seconds

15_HI loaded 351 tracts
Finished making 322 15_HI graphs in 67.9 seconds

16_ID loaded 298 tracts
Finished making 298 16_ID graphs in 0.6 seconds

17_IL loaded 3123 tracts
Finished ma