# Access Inequality Maps 
---
The below script allows us to quickly uncover zip codes that lack access to common amenities (supermarkets, banks, schools, hospitals, etc). The script retrieves data from the 2015 US Census, Google Distance Matrix API, and Google Radar API to identify the number of such amenities and distance to the neareset one. The final dataset can be easily mapped or correlated against demographic information.

### Dependencies

In [27]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time 
from census import Census
from us import states
from requests_toolbelt.threaded import pool
from multiprocessing.pool import ThreadPool

In [28]:
# Census API Key
c = Census("85ac64b6b5a9c0901b00329d1ef41f0c53ccfc98", year=2015)

# Google API Key
gkey = "{API KEY}"

### Parameters

In [48]:
# Required Parameter
max_distance_meters = 40233
city_amenity = "Grocery"

# Optional Parameters
restrict_to_state = True
state = "WV"

### Retrieve Census Data

In [30]:
# Retrieve Census Data (Reference: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b)
zip_census = c.acs5.get(( "B19013_001E",
                          "B19301_001E",
                          "B15003_002E",
                          "B15003_017E",
                          "B15003_018E",
                          "B15003_022E",
                          "B15003_021E",
                          "B15003_023E",
                          "B15003_024E",
                          "B15003_025E",
                          "B17001_002E",
                          "B23025_002E",
                          "B23025_005E",
                          "B01002_001E",
                          "B01002_002E",
                          "B01002_003E",
                          "B01003_001E",
                          "B25064_001E",
                          "B25077_001E",
                          "B25077_001E",
                          "B02001_002E",
                          "B02001_003E",
                          "B02001_004E",
                          "B02001_005E",
                          "B03001_003E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
zip_census = pd.DataFrame(zip_census)

# Column Reordering
zip_census = zip_census.rename(columns={"B19013_001E": "Household Income", 
                                      "B19301_001E": "Income Per Capita",
                                      "B15003_002E": "Education None",
                                      "B15003_017E": "Education High School",
                                      "B15003_018E": "Education GED",
                                      "B15003_022E": "Education Bachelors",
                                      "B15003_021E": "Education Associates",
                                      "B15003_023E": "Education Masters",
                                      "B15003_024E": "Education Professional",
                                      "B15003_025E": "Education Doctorate",
                                      "B17001_002E": "Poverty",
                                      "B23025_002E": "Employment Labor Force",
                                      "B23025_005E": "Employment Unemployed",
                                      "B01002_001E": "Median Age",
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "B01003_001E": "Population",
                                      "B25064_001E": "Median Gross Rent",
                                      "B25077_001E": "Median Home Value",
                                      "B02001_002E": "White Population",
                                      "B02001_003E": "Black Population",
                                      "B02001_004E": "Native American Population",
                                      "B02001_005E": "Asian Population",
                                      "B03001_003E": "Hispanic Population",
                                      "zip code tabulation area": "zipcode"})

# Visualize Data
zip_census.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Education Professional,Education Doctorate,Poverty,Household Income,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,59,65,10861,10816,7453,6162,2213,359,104500,601
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,206,258,21720,16079,8474,15289,3807,395,90200,602
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,283,280,25459,16804,10179,17058,3991,384,128400,603
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,17,32,4070,12512,5863,1679,179,280,108500,606
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,38,144,14005,17475,8452,9542,1495,424,113200,610


### Retrieve Geocode Data

In [31]:
zip_lat_lng = pd.read_excel("../../Raw/Latitude_Longitude_Zip.xlsx", converters={"zipcode": str})
zip_lat_lng.head()

Unnamed: 0,zipcode,lat,lng,city,state
0,601,18.180555,-66.749961,ADJUNTAS,PR
1,602,18.361945,-67.175597,AGUADA,PR
2,603,18.455183,-67.119887,AGUADILLA,PR
3,606,18.158345,-66.932911,MARICAO,PR
4,610,18.295366,-67.125135,ANASCO,PR


In [32]:
zip_census_geocoded = zip_census.merge(zip_lat_lng, on="zipcode", how="left")
zip_census_geocoded.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,7453,6162,2213,359,104500,601,18.180555,-66.749961,ADJUNTAS,PR
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,8474,15289,3807,395,90200,602,18.361945,-67.175597,AGUADA,PR
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,10179,17058,3991,384,128400,603,18.455183,-67.119887,AGUADILLA,PR
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,5863,1679,179,280,108500,606,18.158345,-66.932911,MARICAO,PR
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,8452,9542,1495,424,113200,610,18.295366,-67.125135,ANASCO,PR


### Filter Data by State

In [50]:
# Apply filter if user wishes
if restrict_to_state:
    zip_data = zip_census_geocoded[zip_census_geocoded["state"] == state]
else:
    state = "USA"

In [51]:
zip_data.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state
7633,43.3,42.3,44.3,20891,17585,2852,31,98,116,142,...,19285,8176,437,565.0,66300.0,24701,37.305951,-81.204941,BLUEFIELD,WV
7634,23.6,22.7,33.2,2007,1842,113,0,13,32,0,...,18581,781,51,635.0,124500.0,24712,37.463059,-81.01501,ATHENS,WV
7635,35.2,36.0,23.4,127,127,0,0,0,0,8,...,13413,37,11,,82600.0,24714,37.484469,-81.190073,BEESON,WV
7636,52.4,60.0,47.8,366,359,6,0,1,0,2,...,18980,103,2,570.0,63800.0,24715,37.340392,-81.324618,BRAMWELL,WV
7637,54.3,55.2,52.0,460,364,96,0,0,0,0,...,24869,148,10,,,24716,37.480042,-81.371826,BUD,WV


In [52]:
zip_data.count()

Median Age                    696
Median Male Age               688
Median Female Age             687
Population                    706
White Population              706
Black Population              706
Native American Population    706
Asian Population              706
Hispanic Population           706
Education None                706
Education High School         706
Education GED                 706
Education Associates          706
Education Bachelors           706
Education Masters             706
Education Professional        706
Education Doctorate           706
Poverty                       706
Household Income              621
Income Per Capita             695
Employment Labor Force        706
Employment Unemployed         706
Median Gross Rent             380
Median Home Value             604
zipcode                       706
lat                           706
lng                           706
city                          706
state                         706
dtype: int64

### Create Query URLs

In [59]:
zip_data["Google_URL"] = "https://maps.googleapis.com/maps/api/place/radarsearch/json?location=" + zip_data["lat"].map(str) + "," + state_data["lng"].map(str) + "&radius=" + str(max_distance_meters) + "&keyword=" + city_amenity + "&key=" + gkey

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [60]:
zip_data.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state,Google_URL
7633,43.3,42.3,44.3,20891,17585,2852,31,98,116,142,...,8176,437,565.0,66300.0,24701,37.305951,-81.204941,BLUEFIELD,WV,https://maps.googleapis.com/maps/api/place/rad...
7634,23.6,22.7,33.2,2007,1842,113,0,13,32,0,...,781,51,635.0,124500.0,24712,37.463059,-81.01501,ATHENS,WV,https://maps.googleapis.com/maps/api/place/rad...
7635,35.2,36.0,23.4,127,127,0,0,0,0,8,...,37,11,,82600.0,24714,37.484469,-81.190073,BEESON,WV,https://maps.googleapis.com/maps/api/place/rad...
7636,52.4,60.0,47.8,366,359,6,0,1,0,2,...,103,2,570.0,63800.0,24715,37.340392,-81.324618,BRAMWELL,WV,https://maps.googleapis.com/maps/api/place/rad...
7637,54.3,55.2,52.0,460,364,96,0,0,0,0,...,148,10,,,24716,37.480042,-81.371826,BUD,WV,https://maps.googleapis.com/maps/api/place/rad...


### Perform API Calls (Multi-Threaded Approach)

In [75]:
# Make API Calls
p = pool.Pool.from_urls(zip_data["Google_URL"].values, num_processes=35)
p.join_all()

### Combine With Original Data

In [76]:
# Array to hold all retrieved data
amenity_counts = []

# Loop through pooled results
for response in p.responses():
    print(response.request_kwargs['url'])
    print(len(response.json()["results"]))
    count = {
        "Google_URL": response.request_kwargs['url'],
        "Count": len(response.json()["results"])
    }
    amenity_counts.append(count)
    
full_data = pd.DataFrame(amenity_counts)

https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.352268,-81.80478&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
81
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.399166,-81.771251&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
86
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.293267,-81.623592&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
97
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.357412,-81.720113&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
88
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.333874,-81.55862&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
116
https://maps.googleapis.com/maps/api/place/radarsearch/json?location=37.305951,-81.204941&radius=40233&keyword=Grocery&key=AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw
132
http

In [79]:
# Merge the Amenity count data to the original data
final_dataset = zip_data.merge(full_data, on="Google_URL", how="inner") 

In [80]:
final_dataset.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state,Google_URL,Count
0,43.3,42.3,44.3,20891,17585,2852,31,98,116,142,...,437,565.0,66300.0,24701,37.305951,-81.204941,BLUEFIELD,WV,https://maps.googleapis.com/maps/api/place/rad...,132
1,23.6,22.7,33.2,2007,1842,113,0,13,32,0,...,51,635.0,124500.0,24712,37.463059,-81.01501,ATHENS,WV,https://maps.googleapis.com/maps/api/place/rad...,155
2,35.2,36.0,23.4,127,127,0,0,0,0,8,...,11,,82600.0,24714,37.484469,-81.190073,BEESON,WV,https://maps.googleapis.com/maps/api/place/rad...,170
3,52.4,60.0,47.8,366,359,6,0,1,0,2,...,2,570.0,63800.0,24715,37.340392,-81.324618,BRAMWELL,WV,https://maps.googleapis.com/maps/api/place/rad...,131
4,54.3,55.2,52.0,460,364,96,0,0,0,0,...,10,,,24716,37.480042,-81.371826,BUD,WV,https://maps.googleapis.com/maps/api/place/rad...,140


In [81]:
final_dataset.count()

Median Age                    696
Median Male Age               688
Median Female Age             687
Population                    706
White Population              706
Black Population              706
Native American Population    706
Asian Population              706
Hispanic Population           706
Education None                706
Education High School         706
Education GED                 706
Education Associates          706
Education Bachelors           706
Education Masters             706
Education Professional        706
Education Doctorate           706
Poverty                       706
Household Income              621
Income Per Capita             695
Employment Labor Force        706
Employment Unemployed         706
Median Gross Rent             380
Median Home Value             604
zipcode                       706
lat                           706
lng                           706
city                          706
state                         706
Google_URL    

### Export the Data

In [83]:
final_dataset.to_csv("%s-%s.csv" %(state, city_amenity))