# Access Inequality Maps 
---
The below script allows us to quickly uncover zip codes that lack access to common amenities (supermarkets, banks, schools, hospitals, etc). The script retrieves data from the 2015 US Census, Google Distance Matrix API, and Google Radar API to identify the number of such amenities and distance to the neareset one. The final dataset can be easily mapped or correlated against demographic information.

### Dependencies

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time 
from census import Census
from us import states
from requests_toolbelt.threaded import pool
from multiprocessing.pool import ThreadPool

In [2]:
# Census API Key
c = Census("85ac64b6b5a9c0901b00329d1ef41f0c53ccfc98", year=2015)

# Google API Key
gkey = "AIzaSyA_Clyz3478YAUnsESNHE5dyktvvMoa-vw"

### Parameters

In [8]:
# Required Parameter
max_distance_meters = 32186
city_amenity = "Hospital"
restrict_to_state = False

# Optional Parameters
state = "TX"

### Retrieve Census Data

In [4]:
# Retrieve Census Data (Reference: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b)
zip_census = c.acs5.get(( "B19013_001E",
                          "B19301_001E",
                          "B15003_002E",
                          "B15003_017E",
                          "B15003_018E",
                          "B15003_022E",
                          "B15003_021E",
                          "B15003_023E",
                          "B15003_024E",
                          "B15003_025E",
                          "B17001_002E",
                          "B23025_002E",
                          "B23025_005E",
                          "B01002_001E",
                          "B01002_002E",
                          "B01002_003E",
                          "B01003_001E",
                          "B25064_001E",
                          "B25077_001E",
                          "B25077_001E",
                          "B02001_002E",
                          "B02001_003E",
                          "B02001_004E",
                          "B02001_005E",
                          "B03001_003E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
zip_census = pd.DataFrame(zip_census)

# Column Reordering
zip_census = zip_census.rename(columns={"B19013_001E": "Household Income", 
                                      "B19301_001E": "Income Per Capita",
                                      "B15003_002E": "Education None",
                                      "B15003_017E": "Education High School",
                                      "B15003_018E": "Education GED",
                                      "B15003_022E": "Education Bachelors",
                                      "B15003_021E": "Education Associates",
                                      "B15003_023E": "Education Masters",
                                      "B15003_024E": "Education Professional",
                                      "B15003_025E": "Education Doctorate",
                                      "B17001_002E": "Poverty",
                                      "B23025_002E": "Employment Labor Force",
                                      "B23025_005E": "Employment Unemployed",
                                      "B01002_001E": "Median Age",
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "B01003_001E": "Population",
                                      "B25064_001E": "Median Gross Rent",
                                      "B25077_001E": "Median Home Value",
                                      "B02001_002E": "White Population",
                                      "B02001_003E": "Black Population",
                                      "B02001_004E": "Native American Population",
                                      "B02001_005E": "Asian Population",
                                      "B03001_003E": "Hispanic Population",
                                      "zip code tabulation area": "zipcode"})

# Visualize Data
zip_census.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Education Professional,Education Doctorate,Poverty,Household Income,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,59,65,10861,10816,7453,6162,2213,359,104500,601
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,206,258,21720,16079,8474,15289,3807,395,90200,602
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,283,280,25459,16804,10179,17058,3991,384,128400,603
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,17,32,4070,12512,5863,1679,179,280,108500,606
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,38,144,14005,17475,8452,9542,1495,424,113200,610


### Retrieve Geocode Data

In [5]:
zip_lat_lng = pd.read_excel("../../Raw/Latitude_Longitude_Zip.xlsx", converters={"zipcode": str})
zip_lat_lng.head()

Unnamed: 0,zipcode,lat,lng,city,state
0,601,18.180555,-66.749961,ADJUNTAS,PR
1,602,18.361945,-67.175597,AGUADA,PR
2,603,18.455183,-67.119887,AGUADILLA,PR
3,606,18.158345,-66.932911,MARICAO,PR
4,610,18.295366,-67.125135,ANASCO,PR


In [12]:
zip_census_geocoded = zip_census.merge(zip_lat_lng, on="zipcode", how="left")
zip_census_geocoded.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,7453,6162,2213,359,104500,601,18.180555,-66.749961,ADJUNTAS,PR
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,8474,15289,3807,395,90200,602,18.361945,-67.175597,AGUADA,PR
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,10179,17058,3991,384,128400,603,18.455183,-67.119887,AGUADILLA,PR
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,5863,1679,179,280,108500,606,18.158345,-66.932911,MARICAO,PR
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,8452,9542,1495,424,113200,610,18.295366,-67.125135,ANASCO,PR


### Filter Data by State

In [13]:
# Apply filter if user wishes
if restrict_to_state:
    zip_data = zip_census_geocoded[zip_census_geocoded["state"] == state]
else:
    zip_data = zip_census_geocoded
    state = "USA"

In [14]:
zip_data.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Income Per Capita,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,7453,6162,2213,359,104500,601,18.180555,-66.749961,ADJUNTAS,PR
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,8474,15289,3807,395,90200,602,18.361945,-67.175597,AGUADA,PR
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,10179,17058,3991,384,128400,603,18.455183,-67.119887,AGUADILLA,PR
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,5863,1679,179,280,108500,606,18.158345,-66.932911,MARICAO,PR
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,8452,9542,1495,424,113200,610,18.295366,-67.125135,ANASCO,PR


In [17]:
zip_data.count()

Median Age                    32615
Median Male Age               32343
Median Female Age             32300
Population                    33120
White Population              33120
Black Population              33120
Native American Population    33120
Asian Population              33120
Hispanic Population           33120
Education None                33120
Education High School         33120
Education GED                 33120
Education Associates          33120
Education Bachelors           33120
Education Masters             33120
Education Professional        33120
Education Doctorate           33120
Poverty                       33120
Household Income              31067
Income Per Capita             32542
Employment Labor Force        33120
Employment Unemployed         33120
Median Gross Rent             27431
Median Home Value             30793
zipcode                       33120
lat                           33120
lng                           33120
city                        

### Create Query URLs

In [18]:
zip_data["Google_URL"] = "https://maps.googleapis.com/maps/api/place/radarsearch/json?location=" + zip_data["lat"].map(str) + "," + zip_data["lng"].map(str) + "&radius=" + str(max_distance_meters) + "&keyword=" + city_amenity + "&key=" + gkey

In [None]:
zip_data.head()

Unnamed: 0,Median Age,Median Male Age,Median Female Age,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Labor Force,Employment Unemployed,Median Gross Rent,Median Home Value,zipcode,lat,lng,city,state,Google_URL
0,37.6,36.5,39.0,17982,16956,102,14,0,17942,456,...,6162,2213,359,104500,601,18.180555,-66.749961,ADJUNTAS,PR,https://maps.googleapis.com/maps/api/place/rad...
1,39.5,38.6,40.3,40260,23144,693,8,72,37681,955,...,15289,3807,395,90200,602,18.361945,-67.175597,AGUADA,PR,https://maps.googleapis.com/maps/api/place/rad...
2,39.9,37.7,41.4,52408,36177,2057,76,655,50437,1014,...,17058,3991,384,128400,603,18.455183,-67.119887,AGUADILLA,PR,https://maps.googleapis.com/maps/api/place/rad...
3,40.8,41.0,40.3,6331,4399,151,0,0,6323,188,...,1679,179,280,108500,606,18.158345,-66.932911,MARICAO,PR,https://maps.googleapis.com/maps/api/place/rad...
4,40.2,36.9,42.2,28328,17791,950,0,26,27999,377,...,9542,1495,424,113200,610,18.295366,-67.125135,ANASCO,PR,https://maps.googleapis.com/maps/api/place/rad...


### Perform API Calls (Multi-Threaded Approach)

In [None]:
# Make API Calls
p = pool.Pool.from_urls(zip_data["Google_URL"].values, num_processes=35)
p.join_all()

### Combine With Original Data

In [None]:
# Array to hold all retrieved data
amenity_counts = []

# Loop through pooled results
for response in p.responses():
    print(response.request_kwargs['url'])
    print(len(response.json()["results"]))
    count = {
        "Google_URL": response.request_kwargs['url'],
        "Count": len(response.json()["results"])
    }
    amenity_counts.append(count)
    
full_data = pd.DataFrame(amenity_counts)

In [None]:
# Merge the Amenity count data to the original data
final_dataset = zip_data.merge(full_data, on="Google_URL", how="inner") 

In [None]:
final_dataset.head()

In [None]:
final_dataset.count()

### Export the Data

In [None]:
final_dataset.to_csv("%s-%s.csv" %(state, city_amenity))