In [46]:
%matplotlib inline
from geopandas import GeoDataFrame, GeoSeries, read_file, overlay
from geopandas.tools import sjoin
from shapely.geometry import Point
from pandas import to_numeric
from time import time

import jupynbimp
import restaurants_data_cleaning, census2010_data_cleaning, acs2013_data_cleaning

In [3]:
# Projected CRS: WGS 84/UTM Zone 12N
CRS = {'GCS':'+init=epsg:4326', 'projected':'+init=epsg:32612'}
USE_CACHE=True
SQ_METERS_PER_SQ_KM=1000000

In [4]:
def processingTime(start, finish):
    print('processing time: ' + str(round(finish - start, 2)) + 's')

def normalizeByArea(geoDataFrame, column):
    return geoDataFrame.apply(
        lambda row: row[column]/row.area_sqkm, axis=1
    ) 

## Transform data into consolidated shapefile

### Yelp Restaurants

In [5]:
restaurants = restaurants_data_cleaning.getData(fromCache=USE_CACHE)
restaurants = GeoDataFrame(
    restaurants,
    geometry=[Point(x,y) 
              for x, y in zip(restaurants.longitude,
                              restaurants.latitude)
             ],
    crs=CRS['GCS']
)
restaurants.to_crs(CRS['projected'], inplace=True)
restaurants.drop(['latitude', 'longitude'], axis=1, inplace=True)

### Add Proximity Variables 

In [67]:
# Phoenix Convention Center in projected coordinates
phoenixCBD = GeoSeries(Point(400557, 3701492), crs=CRS['projected'])

In [72]:
start = time()
restaurants['dist_CBD'] = restaurants.geometry.apply(
    lambda point: phoenixCBD.distance(point)
)
processingTime(start, time())

processing time: 0.98s


In [21]:
# Retrieved 17/07/2016 from http://download.geofabrik.de/north-america/us/arizona.html
roads = read_file('/arizona-latest/roads.shp', 
                  vfs='zip://../data/shapefiles/arizona-latest.zip'
                 ).to_crs(CRS['projected'])

In [19]:
motorwayExits = (roads[roads['type'] == 'motorway_link'])

In [20]:
start = time()
restaurants['dist_mwy_exit'] = restaurants.geometry.apply(
    lambda point: motorwayExits.distance(point).min()
)
processingTime(start, time())

processing time: 386.01s


### Census 2010

In [35]:
census2010 = census2010_data_cleaning.getData(fromCache=USE_CACHE)

# Retrieved 10-07-2016 from https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=Block+Groups
blockGroups2010 = read_file('/tl_2010_04_bg10.shp', 
                            vfs='zip://../data/shapefiles/tl_2010_04_bg10.zip')
blockGroups2010.GEOID10 = to_numeric(blockGroups2010.GEOID10)
blockGroups2010 = (blockGroups2010[['GEOID10','geometry']]
                   .rename(columns={'GEOID10':'GEOID'})
                   .to_crs(CRS['projected'])
                  )
blockGroups2010['area_sqkm'] = blockGroups2010.geometry.area/SQ_METERS_PER_SQ_KM 
blockGroups2010 = blockGroups2010.merge(census2010, on='GEOID')
   
# Normalize count data by block group area
blockGroups2010['population_density'] = normalizeByArea(blockGroups2010,'population_total')
blockGroups2010['home_mortgage_density'] = normalizeByArea(blockGroups2010,'home_mortgages')
blockGroups2010['home_owner_density'] = normalizeByArea(blockGroups2010,'home_owners')
blockGroups2010['renter_density'] = normalizeByArea(blockGroups2010,'renters')
blockGroups2010['household_density'] = normalizeByArea(blockGroups2010,'total_households')
blockGroups2010['family_household_density'] = normalizeByArea(blockGroups2010,'family_households')
blockGroups2010['single_household_density'] = normalizeByArea(blockGroups2010,'single_households')
blockGroups2010['hispanic_latino_population_density'] = normalizeByArea(blockGroups2010,'population_hispanic_latino')
blockGroups2010['white_population_density'] = normalizeByArea(blockGroups2010,'population_white')
blockGroups2010['black_population_density'] = normalizeByArea(blockGroups2010,'population_black')
blockGroups2010['native_american_population_density'] = normalizeByArea(blockGroups2010,'population_native_american')
blockGroups2010['asian_population_density'] = normalizeByArea(blockGroups2010,'population_asian')

blockGroups2010.drop(['population_total',
                      'home_mortgages',
                      'home_owners',
                      'renters',
                      'total_households',
                      'family_households',
                      'single_households','population_hispanic_latino',
                      'population_white',
                      'population_black',
                      'population_native_american',
                      'population_asian'
                     ], 
                     axis=1, inplace=True
                    )

### ACS 2013

In [36]:
acs2013 = acs2013_data_cleaning.getData(fromCache=USE_CACHE) 

# Retrieved 10-07-2016 https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2013&layergroup=Block+Groups
blockGroups2013 = read_file('/tl_2013_04_bg.shp', 
                            vfs='zip://../data/shapefiles/tl_2013_04_bg.zip')
blockGroups2013.GEOID = to_numeric(blockGroups2010.GEOID)
blockGroups2013 = (blockGroups2013[['GEOID','geometry']]
                   .to_crs(CRS['projected'])
                  )

blockGroups2013['area_sqkm'] = blockGroups2013.geometry.area/SQ_METERS_PER_SQ_KM 
blockGroups2013 = blockGroups2013.merge(acs2013, on='GEOID')
  
# Normalize count data by block group area
blockGroups2013['density_education_highschool'] = normalizeByArea(blockGroups2013,'education_highschool')
blockGroups2013['density_education_undergraduate'] = normalizeByArea(blockGroups2013,'education_undergraduate')
blockGroups2013['density_education_postgraduate'] = normalizeByArea(blockGroups2013,'education_postgraduate')

blockGroups2013.drop(['education_highschool',
                      'education_undergraduate',
                      'education_postgraduate'
                     ], 
                     axis=1, inplace=True
                    )

## Spatial Join Restaurant Locations on Census Block Groups

In [74]:
start = time()
restaurants_census2010 = (sjoin(restaurants[['business_id', 'geometry']], blockGroups2010, how='inner')
                          .drop(['geometry', 'index_right', 'GEOID', 'area_sqkm'], axis=1)
                         )
restaurants_acs2013 = (sjoin(restaurants[['business_id', 'geometry']], blockGroups2013, how='inner')
                       .drop(['geometry', 'index_right', 'GEOID', 'area_sqkm'], axis=1)
                      )
restaurants_demographics = GeoDataFrame(restaurants_census2010.merge(restaurants_acs2013, on='business_id')
                                        .merge(restaurants, on='business_id'),
                                        crs=CRS['projected']
                                       )
processingTime(start, time())

processing time: 6.41s


## Save as shapefile

In [75]:
restaurants_demographics.to_file('../data/shapefiles/restaurants_cleaned/restaurants.shp')