In [30]:
%matplotlib inline
from geopandas import GeoDataFrame, read_file
from geopandas.tools import sjoin
from shapely.geometry import Point
from pandas import to_numeric
from time import time

import jupynbimp
import restaurants_data_cleaning, census2010_data_cleaning, acs2013_data_cleaning

In [31]:
# Projected CRS: WGS 84/UTM Zone 12N
CRS = {'GCS':'+init=epsg:4326', 'projected':'+init=epsg:32612'}
USE_CACHE=True
SQ_METERS_PER_SQ_KM=1000000

In [32]:
def processingTime(start, finish):
    print('processing time: ' + str(round(finish - start, 2)) + 's')

def normalizeByArea(geoDataFrame, column):
    return geoDataFrame.apply(
        lambda row: row[column]/row.area_sqkm, axis=1
    ) 

## Transform data into consolidated shapefile

### Yelp Restaurants

In [34]:
restaurants = restaurants_data_cleaning.getData(fromCache=USE_CACHE)
restaurants = GeoDataFrame(
    restaurants,
    geometry=[Point(x,y) 
              for x, y in zip(restaurants.longitude,
                              restaurants.latitude)
             ],
    crs=CRS['GCS']
)
restaurants.to_crs(CRS['projected'], inplace=True)
restaurants.drop(['latitude', 'longitude'], axis=1, inplace=True)

### Add Proximity Variables 

In [77]:
# Retrieved 17/07/2016 from http://download.geofabrik.de/north-america/us/arizona.html
majorHighways = read_file('/arizona-latest/roads.shp', 
                          vfs='zip://../data/shapefiles/arizona-latest.zip'
                         )
majorHighways = (majorHighways[majorHighways['maxspeed'] >= 100]
                 .to_crs(CRS['projected'])
                )

start = time()
restaurants['dist_maj_hwy'] = restaurants.geometry.apply(
    lambda point: majorRoads.distance(point).min()
)
processingTime(start, time())

processing time: 357.65s


### Census 2010

In [35]:
census2010 = census2010_data_cleaning.getData(fromCache=USE_CACHE)

# Retrieved 10-07-2016 from https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=Block+Groups
blockGroups2010 = read_file('/tl_2010_04_bg10.shp', 
                            vfs='zip://../data/shapefiles/tl_2010_04_bg10.zip')
blockGroups2010.GEOID10 = to_numeric(blockGroups2010.GEOID10)
blockGroups2010 = (blockGroups2010[['GEOID10','geometry']]
                   .rename(columns={'GEOID10':'GEOID'})
                   .to_crs(CRS['projected'])
                  )
blockGroups2010['area_sqkm'] = blockGroups2010.geometry.area/SQ_METERS_PER_SQ_KM 
blockGroups2010 = blockGroups2010.merge(census2010, on='GEOID')
   
# Normalize count data by block group area
blockGroups2010['population_density'] = normalizeByArea(blockGroups2010,'population_total')
blockGroups2010['home_mortgage_density'] = normalizeByArea(blockGroups2010,'home_mortgages')
blockGroups2010['home_owner_density'] = normalizeByArea(blockGroups2010,'home_owners')
blockGroups2010['renter_density'] = normalizeByArea(blockGroups2010,'renters')
blockGroups2010['household_density'] = normalizeByArea(blockGroups2010,'total_households')
blockGroups2010['family_household_density'] = normalizeByArea(blockGroups2010,'family_households')
blockGroups2010['single_household_density'] = normalizeByArea(blockGroups2010,'single_households')
blockGroups2010['hispanic_latino_population_density'] = normalizeByArea(blockGroups2010,'population_hispanic_latino')
blockGroups2010['white_population_density'] = normalizeByArea(blockGroups2010,'population_white')
blockGroups2010['black_population_density'] = normalizeByArea(blockGroups2010,'population_black')
blockGroups2010['native_american_population_density'] = normalizeByArea(blockGroups2010,'population_native_american')
blockGroups2010['asian_population_density'] = normalizeByArea(blockGroups2010,'population_asian')

blockGroups2010.drop(['population_total',
                      'home_mortgages',
                      'home_owners',
                      'renters',
                      'total_households',
                      'family_households',
                      'single_households','population_hispanic_latino',
                      'population_white',
                      'population_black',
                      'population_native_american',
                      'population_asian'
                     ], 
                     axis=1, inplace=True
                    )

### ACS 2013

In [36]:
acs2013 = acs2013_data_cleaning.getData(fromCache=USE_CACHE) 

# Retrieved 10-07-2016 https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2013&layergroup=Block+Groups
blockGroups2013 = read_file('/tl_2013_04_bg.shp', 
                            vfs='zip://../data/shapefiles/tl_2013_04_bg.zip')
blockGroups2013.GEOID = to_numeric(blockGroups2010.GEOID)
blockGroups2013 = (blockGroups2013[['GEOID','geometry']]
                   .to_crs(CRS['projected'])
                  )

blockGroups2013['area_sqkm'] = blockGroups2013.geometry.area/SQ_METERS_PER_SQ_KM 
blockGroups2013 = blockGroups2013.merge(acs2013, on='GEOID')
  
# Normalize count data by block group area
blockGroups2013['density_education_highschool'] = normalizeByArea(blockGroups2013,'education_highschool')
blockGroups2013['density_education_undergraduate'] = normalizeByArea(blockGroups2013,'education_undergraduate')
blockGroups2013['density_education_postgraduate'] = normalizeByArea(blockGroups2013,'education_postgraduate')

blockGroups2013.drop(['education_highschool',
                      'education_undergraduate',
                      'education_postgraduate'
                     ], 
                     axis=1, inplace=True
                    )

## Spatial Join Restaurant Locations on Census Block Groups

In [79]:
start = time()
restaurants_census2010 = (sjoin(restaurants[['business_id', 'geometry']], blockGroups2010, how='inner')
                          .drop(['geometry', 'index_right', 'GEOID', 'area_sqkm'], axis=1)
                         )
restaurants_acs2013 = (sjoin(restaurants[['business_id', 'geometry']], blockGroups2013, how='inner')
                       .drop(['geometry', 'index_right', 'GEOID', 'area_sqkm'], axis=1)
                      )
restaurants_demographics = GeoDataFrame(restaurants_census2010.merge(restaurants_acs2013, on='business_id')
                                        .merge(restaurants, on='business_id'),
                                        crs=CRS['projected']
                                       )
processingTime(start, time())

processing time: 6.16s


In [80]:
restaurants_demographics

Unnamed: 0,business_id,median_age,average_household_size,population_density,home_mortgage_density,home_owner_density,renter_density,household_density,family_household_density,single_household_density,...,name,rating,alcohol,price_range,attire,takeout,waiter_service,outdoor_seating,geometry,dist_maj_hwy
0,x5Mv61CnZLohZWxfCVCPTQ,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Domino's Pizza,-1.386294,0,2,casual,True,False,False,POINT (400244.9491822217 3704960.015533127),1906.964491
1,KPoTixdjoJxSqRSEApSAGg,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Wild Thaiger,3.034213,2,2,casual,True,True,True,POINT (400267.6733166573 3704788.713454979),1734.326517
2,Vvh2Hd4SsZjEEfKLNKAEWw,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Subway,2.484907,0,,casual,True,False,False,POINT (400280.6015136967 3704952.583430165),1896.420387
3,x8WoC_7WHzXNmt6J13Nbvw,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Jordan's Mexican Food,-1.386294,0,1,casual,,False,,POINT (400314.014571388 3704749.978199139),1691.659300
4,oSKT5oZX1NWYaHwOWL5lag,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Ichi Ban Japanese Restaurant & Sushi,1.965913,1,2,casual,True,True,False,POINT (400244.9150034722 3704959.406001454),1906.360341
5,QAe9UHKsxPdXQovBpaznMQ,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Schlotzskys Deli,-1.282475,0,1,casual,True,False,False,POINT (400240.1912067185 3704645.397384088),1593.988804
6,KIbFaBrZDX7rrYJInkv5Kg,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Del Taco,0.000000,0,1,casual,True,False,False,POINT (400672.1663762873 3704972.762577508),1886.779947
7,cN6aBxe2mQvrQlzk26LyRQ,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Durant's,6.317165,2,3,dressy,False,True,False,POINT (400268.7285709459 3704709.19574977),1655.025026
8,TCqkBVN84Ek0oLmZGAX5xA,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Lisa G Cafe and Wine Bar,4.060443,2,2,casual,True,True,True,POINT (401036.0086786781 3704411.102884501),1346.948867
9,MJ6_uOT55dq5ALphFqMVkw,45.8,1.65,1073.235548,381.808760,162.028087,526.190224,649.716587,200.529811,354.536706,...,Lenny's Burger Shop,5.153292,0,1,casual,True,False,True,POINT (400282.9138216604 3704992.900651154),1936.377137


## Save as shapefile

In [81]:
restaurants_demographics.to_file('../data/shapefiles/restaurants_cleaned/restaurants.shp')