## Download neighborhoods' street networks

Using Zillow neighborhoods shapefile (version from March 2018)

In [1]:
import config
import geopandas as gpd
import json
import logging as lg
import networkx as nx
import os
import osmnx as ox
import pandas as pd
import time

ox.config(use_cache=True,
          log_file=True,
          log_console=True,
          log_filename='download-neighborhoods',
          cache_folder=config.neighborhoods_cache_folder)

print(ox.__version__)
print(nx.__version__)

0.7.4
2.1


In [2]:
network_type = 'drive'
retain_all = True
simplify = True

input_folder = 'input_data/neighborhoods' #neighborhood shapefiles
output_folder_shapefile = config.neighborhoods_shapefile_folder #where to save graph shapefiles
output_folder_graphml = config.neighborhoods_graphml_folder #where to save graphml files

In [3]:
# load state fips code to state name dict
with open('input_data/states_by_fips.json') as f:
    fips_to_state = json.load(f)

## Load Zillow neighborhood shapefiles into a single GeoDataFrame

In [4]:
nhoods = gpd.GeoDataFrame()
for state_shapefile_folder in os.listdir(input_folder):
    shapefile_path = '{}/{}'.format(input_folder, state_shapefile_folder)
    state_nhoods = gpd.read_file(shapefile_path)
    nhoods = nhoods.append(state_nhoods)
nhoods = nhoods.reset_index().drop('index', axis=1)
nhoods = nhoods.rename(columns=str.lower).rename(columns={'name':'nhood'}).drop(columns=['county', 'regionid'])

In [5]:
# project it from original CRS to 4326 to match OSM
print(nhoods.crs)
nhoods = nhoods.to_crs({'init':'epsg:4326'})
nhoods.crs

{'init': 'epsg:4269'}


{'init': 'epsg:4326'}

In [6]:
# get the (gross) area of each neighborhood, in meters
def get_area(geometry):
    original_crs = {'init':'epsg:4326'}
    geometry_utm, crs_utm = ox.project_geometry(geometry=geometry.buffer(0), crs=original_crs)
    return geometry_utm.area

nhoods['nhood_area_m'] = nhoods['geometry'].map(get_area).astype(int)

In [7]:
len(nhoods)

17037

In [8]:
nhoods.head()

Unnamed: 0,state,city,nhood,geometry,nhood_area_m
0,AK,Anchorage,Northeast,POLYGON ((-149.7038912589999 61.23497280100009...,12729723
1,AK,Anchorage,Rabbit Creek,POLYGON ((-149.7509806229999 61.10139700300005...,29644946
2,AK,Anchorage,Russian Jack Park,POLYGON ((-149.7783763009999 61.19888599700005...,4848223
3,AK,Anchorage,Bayshore-Klatt,POLYGON ((-149.9080830819999 61.13763518400004...,17899284
4,AK,Anchorage,Fire Island,POLYGON ((-150.2027799429999 61.13423928100008...,19028239


## Get networks

In [9]:
# where to save networks
state_to_fips = {data['abbreviation']:fips for fips, data in fips_to_state.items()}
nhoods['state_folder'] = nhoods['state'].map(lambda x: '{}_{}'.format(state_to_fips[x], x))
nhoods['city_folder'] = nhoods['city'].str.replace(' ', '_')
nhoods['nhood'] = nhoods['nhood'].str.replace(' ', '_').str.replace('/', '_').str.replace(':', '_')

In [10]:
# create list of queries
queries = nhoods.apply(lambda row: {'nhood':row['nhood'],
                                    'city_folder':row['city_folder'], 
                                    'state_folder':row['state_folder'], 
                                    'geometry':row['geometry'],
                                    'nhood_area_m':row['nhood_area_m']}, axis=1).tolist()
queries[0]

{'nhood': 'Northeast',
 'city_folder': 'Anchorage',
 'state_folder': '02_AK',
 'geometry': <shapely.geometry.polygon.Polygon at 0x171543a3630>,
 'nhood_area_m': 12729723}

In [11]:
start_time = time.time()
for query in queries:
    
    try:
        graphml_filename = '{}.graphml'.format(query['nhood'])
        graphml_folder = '{}/{}/{}'.format(output_folder_graphml, query['state_folder'], query['city_folder'])
        shapefile_folder = '{}/{}/{}'.format(output_folder_shapefile, query['state_folder'], query['city_folder'])

        # load graph and save it if it hasn't already been saved in the output_path
        if not os.path.exists('{}/{}'.format(graphml_folder, graphml_filename)):

            # fix trivially invalid geometries (nested shells, ring self-intersections, etc)
            geometry = query['geometry'].buffer(0)
            graph_name = '{}-{}-{}'.format(query['state_folder'], query['city_folder'], query['nhood'])

            G = ox.graph_from_polygon(polygon=geometry,
                                      network_type=network_type, 
                                      name=graph_name,
                                      retain_all=retain_all,
                                      simplify=simplify)

            G.graph['nhood_area_m'] = query['nhood_area_m']

            # save to disk
            ox.save_graph_shapefile(G, folder=shapefile_folder, filename=query['nhood'])
            ox.save_graphml(G, folder=graphml_folder, filename=graphml_filename)

    except Exception as e:
        ox.log('"{}-{}-{}" failed: {}'.format(query['state_folder'], query['city_folder'], query['nhood'], e), level=lg.ERROR)

print('Finished making graphs in {:,.2f} seconds'.format(time.time()-start_time))

TopologyException: Input geom 0 is invalid: Self-intersection at or near point -116.23352029549683 43.608904490018531 at -116.23352029549683 43.608904490018531


Finished making graphs in 66.01 seconds
