In [1]:
import osmnx as ox, pandas as pd, networkx as nx, time, geopandas as gpd, os, json
%matplotlib inline
ox.config(use_cache=True, log_file=True, log_console=True, log_filename='get_neighborhood_graphs',
          data_folder='G:/Geoff/osmnx/neighborhoods', cache_folder='G:/Geoff/osmnx/cache/neighborhoods')

In [2]:
network_type = 'drive_service'
retain_all = True
buffer = False

In [3]:
input_folder = 'input_data/neighborhoods' #neighborhood shapefiles
output_folder = 'G:/Geoff/osmnx/neighborhoods' #where to save graph shapefiles and graphml

## Load census population data

In [4]:
# load state fips code to state name dict
with open('input_data/states_by_fips.json') as f:
    fips_to_state = json.load(f)

In [5]:
# load 2010 decennial census population data by place (CDPs, towns, cities)
pops = pd.read_csv('input_data/DEC_10_SF1_P1.csv', encoding='ISO-8859-1').drop(0).reset_index().drop('index', axis=1)
pops.columns = ['geoid_full', 'geoid', 'place_name', 'pop10']
name = pops['place_name'].str.replace(r' CDP| city| town| village| borough| (balance)| municipality', '')
pops['city'] = name.map(lambda x: x.split(',')[0])
pops['state_full'] = name.map(lambda x: x.split(',')[1])
pops['state'] = pops['geoid'].map(lambda x: fips_to_state[x[0:2]]['abbreviation'])
pops['pop10'] = pops['pop10'].astype(int)
pops = pops[['geoid', 'state', 'city', 'pop10']]

In [6]:
# rename cities to match zillow data
pops_city_replace = {'Nashville-Davidson metropolitan government (balance)':'Nashville',
                     'St. Louis':'Saint Louis',
                     'Louisville/Jefferson County metro government (balance)':'Louisville',
                     'Augusta-Richmond County consolidated government (balance)':'Augusta',
                     'Lexington-Fayette urban county':'Lexington',
                     'Urban Honolulu':'Honolulu',
                     'Boise City':'Boise',
                     'St. Paul':'Saint Paul',
                     'St. Petersburg':'Saint Petersburg'}
pops['city'] = pops['city'].replace(pops_city_replace)

In [7]:
print(len(pops))
pops.head()

29261


Unnamed: 0,geoid,state,city,pop10
0,100100,AL,Abanda,192
1,100124,AL,Abbeville,2688
2,100460,AL,Adamsville,4522
3,100484,AL,Addison,758
4,100676,AL,Akron,356


## Load Zillow neighborhood shapefiles into a single GeoDataFrame

In [8]:
nhoods = gpd.GeoDataFrame()
for state_shapefile_folder in os.listdir(input_folder):
    shapefile_path = '{}/{}'.format(input_folder, state_shapefile_folder)
    state_nhoods = gpd.read_file(shapefile_path)
    nhoods = nhoods.append(state_nhoods)
nhoods = nhoods.reset_index().drop('index', axis=1)
nhoods = nhoods.rename(columns=str.lower)

In [9]:
# project it from original CRS to 4326 for OSM
print(nhoods.crs)
nhoods = nhoods.to_crs({'init':'epsg:4326'})
nhoods.crs

{'init': 'epsg:4269'}


{'init': 'epsg:4326'}

In [10]:
# rename cities to match census data
nhoods_city_replace = {'New York City-Queens':'New York',
                       'New York City-Manhattan':'New York',
                       'New York City-Staten Island':'New York',
                       'New York City-Bronx':'New York',
                       'New York City-Brooklyn':'New York'}
nhoods['city'] = nhoods['city'].replace(nhoods_city_replace)

In [11]:
print(len(nhoods))
nhoods.head()

6958


Unnamed: 0,city,county,name,regionid,state,geometry
0,Anchorage,Anchorage,Northeast,267847.0,AK,"POLYGON ((-149.718495442866 61.2350090941569, ..."
1,Anchorage,Anchorage,Old Seward-Oceanview,274891.0,AK,"POLYGON ((-149.88726129547 61.11603087070581, ..."
2,Anchorage,Anchorage,Portage Valley,275077.0,AK,"POLYGON ((-148.918401065296 60.90632882965091,..."
3,Anchorage,Anchorage,Glen Alps,267840.0,AK,"POLYGON ((-149.69367817045 61.1085876564845, -..."
4,Anchorage,Anchorage,Campbell Park,267835.0,AK,"POLYGON ((-149.785669185941 61.1808342938339, ..."


## Merge neighborhoods with census data to get GEOID, city population, then calculate area in meters

In [12]:
gdf = pd.merge(left=nhoods, right=pops, how='left', on=['city', 'state'])
gdf = gdf.rename(columns={'pop10':'city_pop10', 'name':'nhood'})

In [13]:
# get area of each neighborhood, in meters
def get_area(geometry):
    original_crs = {'init':'epsg:4326'}
    geometry_utm, crs_utm = ox.project_geometry(geometry=geometry.buffer(0), crs=original_crs)
    return geometry_utm.area

gdf['nhood_area_m'] = gdf['geometry'].map(get_area)

In [14]:
print(len(gdf))
gdf.head()

6958


Unnamed: 0,city,county,nhood,regionid,state,geometry,geoid,city_pop10,nhood_area_m
0,Anchorage,Anchorage,Northeast,267847.0,AK,"POLYGON ((-149.718495442866 61.2350090941569, ...",203000,291826,12729760.0
1,Anchorage,Anchorage,Old Seward-Oceanview,274891.0,AK,"POLYGON ((-149.88726129547 61.11603087070581, ...",203000,291826,16147180.0
2,Anchorage,Anchorage,Portage Valley,275077.0,AK,"POLYGON ((-148.918401065296 60.90632882965091,...",203000,291826,300498100.0
3,Anchorage,Anchorage,Glen Alps,267840.0,AK,"POLYGON ((-149.69367817045 61.1085876564845, -...",203000,291826,3458132.0
4,Anchorage,Anchorage,Campbell Park,267835.0,AK,"POLYGON ((-149.785669185941 61.1808342938339, ...",203000,291826,8091302.0


In [15]:
#gdf.to_file('output_shp/nhoods_pops')

## Get networks

In [16]:
# where to save networks
state_to_fips = {data['abbreviation']:fips for fips, data in fips_to_state.items()}
gdf['state_folder'] = gdf['state'].map(lambda x: '{}_{}'.format(state_to_fips[x], x))
gdf['city_folder'] = gdf.apply(lambda row: '{}_{}'.format(row['geoid'], row['city']).replace(' ', '-'), axis=1)
gdf['nhood_folder'] = gdf.apply(lambda row: '{}/{}/{}'.format(row['state_folder'], 
                                                              row['city_folder'], 
                                                              row['nhood'].replace(' ', '-')), axis=1)

In [17]:
# create list of queries
queries = gdf.apply(lambda row: {'nhood_folder':row['nhood_folder'], 
                                 'geometry':row['geometry'],
                                 'nhood_area_m':row['nhood_area_m'],
                                 'city_pop10':row['city_pop10']}, axis=1).tolist()
queries[0]

{'city_pop10': 291826,
 'geometry': <shapely.geometry.polygon.Polygon at 0x1f51060fcc0>,
 'nhood_area_m': 12729758.67861682,
 'nhood_folder': '02_AK/0203000_Anchorage/Northeast'}

In [18]:
start_time = time.time()
for query in queries:
    try:
        # load graph and save it if it hasn't already been saved in the output_path
        if not os.path.exists('{}/{}.graphml'.format(output_folder, query['nhood_folder'])):
            geometry = query['geometry'].buffer(0) #fix trivially invalid geometries (nested shells, ring self-intersections)
            G = ox.graph_from_polygon(polygon=geometry, network_type=network_type, 
                                      name=query['nhood_folder'], retain_all=retain_all)
            G.graph['nhood_area_m'] = query['nhood_area_m']
            G.graph['city_pop10'] = query['city_pop10']
            ox.save_graph_shapefile(G, folder=output_folder, filename=query['nhood_folder'])
            ox.save_graphml(G, folder=output_folder, filename='{}.graphml'.format(query['nhood_folder']))
    except Exception as e:
        print('"{}" failed: {}'.format(query['nhood_folder'], e))
print('Finished making graphs in {:,.2f} seconds'.format(time.time()-start_time))

"06_CA/0613392_Chula-Vista/Golf-Course" failed: No geometry data set yet (expected in column 'geometry'.
"06_CA/0618100_Davis/Dos-Pinos" failed: Wrong number of items passed 0, placement implies 1
"06_CA/0618100_Davis/South-Cape" failed: 'node'
"06_CA/0618100_Davis/Wildhorse-Gc" failed: No geometry data set yet (expected in column 'geometry'.
"12_FL/1224000_Fort-Lauderdale/Bay-Colony-Club" failed: No geometry data set yet (expected in column 'geometry'.
"12_FL/1224000_Fort-Lauderdale/Birch-Park" failed: No geometry data set yet (expected in column 'geometry'.
"12_FL/1212875_Clearwater/Chateaux-De-Ville" failed: No geometry data set yet (expected in column 'geometry'.
"12_FL/1212875_Clearwater/Clearwater-Golf-View" failed: 'node'
"12_FL/1212875_Clearwater/Countryside-Woods" failed: 'node'
"12_FL/1212875_Clearwater/Drew-Park-Plaza" failed: No geometry data set yet (expected in column 'geometry'.
"12_FL/1212875_Clearwater/Eagles-Glen" failed: Wrong number of items passed 0, placement impl

TopologyException: Input geom 0 is invalid: Self-intersection at or near point -82.531022810978214 27.349486967079315 at -82.531022810978214 27.349486967079315
Self-intersection at or near point -82.531022810978214 27.349486967079315


"12_FL/1264175_Sarasota/Original-Gillespie-Park" failed: The operation 'GEOSDifference_r' could not be performed. Likely cause is invalidity of the geometry <shapely.geometry.polygon.Polygon object at 0x000001F51C77A588>
"12_FL/1264175_Sarasota/Uplands" failed: Wrong number of items passed 0, placement implies 1
"18_IN/1825000_Fort-Wayne/Villas-At-Chandlers-Cove" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Hillside-Acres" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Hazelwood" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Illsley-Place" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Poplar-Ridge" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Wilmarbee" failed: No geometry data set yet (expected in column 'geometry'.
"18_IN/1825000_Fort-Wayne/Aldale-Acres" failed:

TopologyException: Input geom 0 is invalid: Self-intersection at or near point -122.63506693005999 45.465978576687938 at -122.63506693005999 45.465978576687938
Self-intersection at or near point -122.63506693005999 45.465978576687938


"41_OR/4148650_Milwaukie/Ardenwald" failed: The operation 'GEOSDifference_r' could not be performed. Likely cause is invalidity of the geometry <shapely.geometry.polygon.Polygon object at 0x000001F51B673BE0>
"41_OR/4159000_Portland/Government-Island" failed: 'node'
"47_TN/4752006_Nashville/Rayon-City" failed: No geometry data set yet (expected in column 'geometry'.
"48_TX/4865000_San-Antonio/Colonies-Village-Vance-Jackson" failed: Wrong number of items passed 0, placement implies 1
"48_TX/4865000_San-Antonio/Culebra-Park-Third-World" failed: No geometry data set yet (expected in column 'geometry'.
"48_TX/4865000_San-Antonio/Devonshire-S" failed: No geometry data set yet (expected in column 'geometry'.
"48_TX/4865000_San-Antonio/Dignowity-Hill-St-Paul-Square" failed: 'node'
"48_TX/4865000_San-Antonio/Encino-Creek" failed: No geometry data set yet (expected in column 'geometry'.
"48_TX/4865000_San-Antonio/Hampshire-House" failed: 'node'
"48_TX/4865000_San-Antonio/Hidden-Oaks-Estates" fai