In [12]:
# Dependencies
import numpy as np
import pandas as pd
from census import Census
from us import states
import requests
from requests_toolbelt.threaded import pool
from multiprocessing.pool import ThreadPool
import time 

# Census API Key
c = Census("85ac64b6b5a9c0901b00329d1ef41f0c53ccfc98", year=2015)

In [13]:
# Retrieve Census Data (Reference: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b)
zip_census = c.acs5.get(( "B19013_001E",
                          "B19301_001E",
                          "B15003_002E",
                          "B15003_017E",
                          "B15003_018E",
                          "B15003_022E",
                          "B15003_021E",
                          "B15003_023E",
                          "B15003_024E",
                          "B15003_025E",
                          "B17001_002E",
                          "B23025_002E",
                          "B23025_005E",
                          "B01002_001E",
                          "B01002_002E",
                          "B01002_003E",
                          "B01003_001E",
                          "B25064_001E",
                          "B25077_001E",
                          "B25077_001E",
                          "B02001_002E",
                          "B02001_003E",
                          "B02001_004E",
                          "B02001_005E",
                          "B03001_003E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
zip_census = pd.DataFrame(zip_census)

# Column Reordering
zip_census = zip_census.rename(columns={"B19013_001E": "Household Income", 
                                      "B19301_001E": "Income Per Capita",
                                      "B15003_002E": "Education None",
                                      "B15003_017E": "Education High School",
                                      "B15003_018E": "Education GED",
                                      "B15003_022E": "Education Bachelors",
                                      "B15003_021E": "Education Associates",
                                      "B15003_023E": "Education Masters",
                                      "B15003_024E": "Education Professional",
                                      "B15003_025E": "Education Doctorate",
                                      "B17001_002E": "Poverty",
                                      "B23025_002E": "Employment Labor Force",
                                      "B23025_005E": "Employment Unemployed",
                                      "B01002_001E": "Median Age",
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "B01003_001E": "Population",
                                      "B25064_001E": "Median Gross Rent",
                                      "B25077_001E": "Median Home Value",
                                      "B02001_002E": "White Population",
                                      "B02001_003E": "Black Population",
                                      "B02001_004E": "Native American Population",
                                      "B02001_005E": "Asian Population",
                                      "B03001_003E": "Hispanic Population",
                                      "zip code tabulation area": "zipcode"})

# Visualize Data
zip_census.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Education Professional,Education Doctorate,Poverty,Household Income,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,59,65,10861,10816,7453,6162,2213,359,104500,601
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,206,258,21720,16079,8474,15289,3807,395,90200,602
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,283,280,25459,16804,10179,17058,3991,384,128400,603
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,17,32,4070,12512,5863,1679,179,280,108500,606
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,38,144,14005,17475,8452,9542,1495,424,113200,610


In [14]:
# Geocode
zip_lat_lng = pd.read_excel("../Raw/Latitude_Longitude_Zip.xlsx", converters={"zipcode": str})
zip_lat_lng.head()

Unnamed: 0,zipcode,lat,lng,city,state
0,601,18.180555,-66.749961,ADJUNTAS,PR
1,602,18.361945,-67.175597,AGUADA,PR
2,603,18.455183,-67.119887,AGUADILLA,PR
3,606,18.158345,-66.932911,MARICAO,PR
4,610,18.295366,-67.125135,ANASCO,PR


In [15]:
# Merge Data
zip_census_geocoded = zip_census.merge(zip_lat_lng, on="zipcode", how="left")
zip_census_geocoded.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,7453,6162,2213,359,104500,601,18.180555,-66.749961,ADJUNTAS,PR
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,8474,15289,3807,395,90200,602,18.361945,-67.175597,AGUADA,PR
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,10179,17058,3991,384,128400,603,18.455183,-67.119887,AGUADILLA,PR
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,5863,1679,179,280,108500,606,18.158345,-66.932911,MARICAO,PR
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,8452,9542,1495,424,113200,610,18.295366,-67.125135,ANASCO,PR


In [16]:
# Filter to a State
zip_data = zip_census_geocoded[zip_census_geocoded["state"] == "WV"]

In [17]:
# Build URLs
max_distance_meters = 8000
city_amenity = "Hospial"
gkey = "{API_KEY}"
zip_data["Google_URL"] = "https://maps.googleapis.com/maps/api/place/radarsearch/json?location=" + zip_census_geocoded["lat"].map(str) + "," + zip_census_geocoded["lng"].map(str) + "&radius=" + str(max_distance_meters) + "&keyword=" + city_amenity + "&key=" + gkey

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [18]:
zip_data.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state,Google_URL
7633,43.3,42.3,44.3,20891,17585,2852,31,98,116,142,...,8176,437,565.0,66300.0,24701,37.305951,-81.204941,BLUEFIELD,WV,https://maps.googleapis.com/maps/api/place/rad...
7634,23.6,22.7,33.2,2007,1842,113,0,13,32,0,...,781,51,635.0,124500.0,24712,37.463059,-81.01501,ATHENS,WV,https://maps.googleapis.com/maps/api/place/rad...
7635,35.2,36.0,23.4,127,127,0,0,0,0,8,...,37,11,,82600.0,24714,37.484469,-81.190073,BEESON,WV,https://maps.googleapis.com/maps/api/place/rad...
7636,52.4,60.0,47.8,366,359,6,0,1,0,2,...,103,2,570.0,63800.0,24715,37.340392,-81.324618,BRAMWELL,WV,https://maps.googleapis.com/maps/api/place/rad...
7637,54.3,55.2,52.0,460,364,96,0,0,0,0,...,148,10,,,24716,37.480042,-81.371826,BUD,WV,https://maps.googleapis.com/maps/api/place/rad...


In [19]:
# Sample Test
test_run = zip_data.sample(n=15)
test_run["Count"] = ""

In [28]:
# Single Query Approach

# Loop through and run Google search to get counts for each record 
for index, row in test_run.iterrows():

        # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
        target_url ="https://maps.googleapis.com/maps/api/place/radarsearch/json?location=%s,%s&radius=8000&type=bank&key=%s" % (test_run.loc[index]["lat"], test_run.loc[index]["lng"], gkey)

        # This link helps to handily see the JSON generated for each query
        print("Now retrieving city #%s" % (test_run.loc[index]["zipcode"]))
        print(target_url)

        # Run a request to grab the JSON at the target URL
        radar_data = requests.get(target_url).json()

        # Measure radar_data count on the number of results in the retrieved area
        count = len(radar_data["results"])

        print("Final Count: " + str(count))
        print("")

        # Store the bank count into the Data Frame
        test_run.set_value(index, "Count", count)

        # Reset bank_count (so there is no chance that a previous record is influencing a latter one)
        count = 0

# Visualize
test_run.head()


Now retrieving city #26585
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=39.618451,-80.430782&radius=8000&type=bank&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
Final Count: 2

Now retrieving city #25108
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.973583,-81.881188&radius=8000&type=bank&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
Final Count: 0

Now retrieving city #26055
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=39.7543,-80.782572&radius=8000&type=bank&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
Final Count: 0

Now retrieving city #25564
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=38.272188,-81.901012&radius=8000&type=bank&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
Final Count: 1

Now retrieving city #25831
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.959313,-80.930792&radius=8000&type=bank&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
Final Count: 0

Now ret

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state,Google_URL,Count
8262,56.2,57.5,55.9,554,554,0,0,0,0,21,...,10,,125000,26585,39.618451,-80.430782,METZ,WV,https://maps.googleapis.com/maps/api/place/rad...,2
7788,48.1,37.9,55.3,616,583,0,0,0,0,0,...,0,,87800,25108,37.973583,-81.881188,HEWETT,WV,https://maps.googleapis.com/maps/api/place/rad...,0
8086,43.1,39.6,44.8,2102,2072,0,0,0,10,70,...,48,548.0,87700,26055,39.7543,-80.782572,PROCTOR,WV,https://maps.googleapis.com/maps/api/place/rad...,0
7944,28.4,27.1,30.2,2083,2083,0,0,0,0,48,...,37,698.0,77900,25564,38.272188,-81.901012,SOD,WV,https://maps.googleapis.com/maps/api/place/rad...,1
8007,52.0,51.1,52.8,1351,1351,0,0,0,14,17,...,40,656.0,88300,25831,37.959313,-80.930792,DANESE,WV,https://maps.googleapis.com/maps/api/place/rad...,0


In [29]:
# Multithreading Approach

# Make API Calls
p = pool.Pool.from_urls(test_run["Google_URL"].values, num_processes=35)
p.join_all()

# Array to hold all retrieved data
amenity_counts = []

# Loop through pooled results
for response in p.responses():
    print(response.request_kwargs['url'])
    print(len(response.json()["results"]))
    count = {
        "Google_URL": response.request_kwargs['url'],
        "Count": len(response.json()["results"])
    }
    amenity_counts.append(count)
    
full_data = pd.DataFrame(amenity_counts)

https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.973583,-81.881188&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
1
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.721101,-81.77832&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
2
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=39.7543,-80.782572&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
1
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.920527,-81.767625&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
1
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=38.165242,-81.521007&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
7
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.959313,-80.930792&radius=8000&keyword=Hospial&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
1
https://maps.google