This is a script to merge census tracts and the isochrones generated from those tracts. It combines each isochrone and tract into a single polygon using their unique GEOID, then finds the number of grocery stores within each tract, and finally merges the store count data with the original tract shapefile.

In [38]:
import pandas as pd
import geopandas as gpd
import cenpy as cp
import numpy as np
import os
from shapely.geometry.multipolygon import MultiPolygon
from shapely.geometry import Point

In [27]:
def concat_int_cols(df, col1, col2, col3, fill1=2, fill2=3, fill3=6, return_col='GEOID'):
    """ Concatenate integer columns using zfill """
    df[[col1, col2, col3]] = df[[col1, col2, col3]].astype(str)
    df[col1] = df[col1].str.zfill(fill1)
    df[col2] = df[col2].str.zfill(fill2)
    df[col3] = df[col3].str.zfill(fill3)
    df[return_col] = df[[col1, col2, col3]].apply(lambda x: ''.join(x), axis=1)
    df[[col1, col2, col3, return_col]] = df[[col1, col2, col3, return_col]].astype(int)
    return df

In [3]:
# Import all shapefiles for merging isochrones and tracts
tracts = gpd.read_file(os.path.join('shapefiles', 'ti_2015_chi_tracts.shp'))
isos = gpd.read_file(os.path.join('shapefiles', 'isochrones.shp'))
tracts = tracts[['GEOID', 'geometry']]
isos = isos[['GEOID', 'geometry']]

In [5]:
# Concatenate tracts and isochones
gdf = gpd.GeoDataFrame(pd.concat([tracts, isos], ignore_index=True))
gdf.index = gdf['GEOID']

In [6]:
# Merge the isochrone and tract polygons
fix = []
GEOID_list = gdf['GEOID'].unique()
for i, id in enumerate(GEOID_list):
    tmp = gdf[gdf['GEOID']==id].geometry
    sh = MultiPolygon([x.buffer(0) for x in tmp.geometry])
    fix.append({'GEOID': id, 'geometry':sh})
merged = gpd.GeoDataFrame(fix, columns=['GEOID', 'geometry'])
merged = merged.set_geometry('geometry')
merged['geometry'] = merged.geometry.buffer(0)
merged.crs = tracts.crs

In [1]:
# Create shapefile of points from the lat, long of grocery store queries
df = pd.read_csv(os.path.join('data', 'all_markets.csv'))

geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
df = df.drop(['longitude', 'latitude'], axis=1)
points = gpd.GeoDataFrame(df, crs=merged.crs, geometry=geometry)
points.to_file(os.path.join('shapefiles', 'final_chi_points.shp'))

NameError: name 'pd' is not defined

In [35]:
# Query census API to get tract level population
api_conn = cp.base.Connection('ACSSF5Y2015')
pop = api_conn.query(['B01001_001E'], geo_unit='tract:*', geo_filter = {'state':'17'})
pop.rename(
    columns={
        'B01001_001E': 'POP',
        'state': 'STATEFIP',
        'county': 'COUNTY',
        'tract': 'TRACT'
        },
        inplace=True)
pop = concat_int_cols(pop, 'STATEFIP', 'COUNTY', 'TRACT')
pop['GEOID'] = pop['GEOID'].astype(str)

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  df[cols] = df[cols].convert_objects(convert_numeric=convert_numeric)


In [32]:
# Merge the grocery store points with the merged tracts to determine counts, output final shapefile
final = gpd.sjoin(merged, points, how='left', op='contains')
final = pd.DataFrame(final)
final = final.groupby('GEOID').size().reset_index(name='counts')

In [40]:
# Merge all the files together, calculate the stores per 1K population, then output to shapefile
tracts = gpd.read_file(os.path.join('shapefiles', 'ti_2015_chi_tracts.shp'))
tracts = tracts.merge(final, on='GEOID')
tracts = gpd.GeoDataFrame(tracts.merge(pop, on='GEOID'))
tracts['STORES_PER_1000'] = tracts['counts'] / tracts['POP'] * 1000
tracts = tracts.replace([np.inf, -np.inf], 0)
tracts.to_file(os.path.join('shapefiles', 'final_chi_tracts.shp'))