# Unemployment & Education heatmaps

In [58]:
# Dependencies
from census import Census
from config import (census_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from us import states
from uszipcode import SearchEngine

# Census API Key
c = Census(census_key, year=2015)

## Data Retrieval

In [59]:
# Run Census Search to retrieve data on all zip codes (2015 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("B01003_001E", "B23025_005E", "B15003_002E", "B15003_017E", "B15003_022E", "B15003_023E",
                         "B15003_022E", "B15003_023E", "B15003_024E", "B15003_025E", "B25113_003E", "B25113_004E",
                         "B25113_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B23025_005E": "Unemployment Count",
                                      "zip code tabulation area": "Zipcode",
                                      "B15003_002E" : "Education None Count", "B15003_017E": "High School Count",
                                      "B15003_022E": "Bachelors Count", "B15003_023E": "Masters Count",
                                      "B15003_024E" : "Professional Count", "B15003_025E": "Doctorate Count",
                                      "B25113_003E" : "Median Gross Rent (2010-2014)",
                                      "B25113_004E": "Median Gross Rent (2000-2009)",
                                      "B25113_002E": "Median Gross Rent (moved in 2015 and later)"})

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add % of people with specific level of education (education none count/population)
census_pd["% of Pop with no education"] = 100 * \
    census_pd["Education None Count"].astype(
        int) / census_pd["Population"].astype(int)

census_pd["% of Pop with high school"] = 100 * \
    census_pd["High School Count"].astype(
        int) / census_pd["Population"].astype(int)

census_pd["% of Pop with college and higher"] = 100 * \
    (census_pd["Bachelors Count"].astype(int)+census_pd["Masters Count"].astype(int)+census_pd["Doctorate Count"].astype(int)+
     census_pd["Professional Count"].astype(int)) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Unemployment Rate","% of Pop with no education", "% of Pop with high school",
                       "% of Pop with college and higher", "Median Gross Rent (2010-2014)", "Median Gross Rent (2000-2009)", 
                       "Median Gross Rent (moved in 2015 and later)"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Unemployment Rate,% of Pop with no education,% of Pop with high school,% of Pop with college and higher,Median Gross Rent (2010-2014),Median Gross Rent (2000-2009),Median Gross Rent (moved in 2015 and later)
0,601,17982.0,12.306751,2.535869,16.777889,11.633856,,,
1,602,40260.0,9.456036,2.372081,11.847988,14.677099,,,
2,603,52408.0,7.61525,1.934819,15.85445,15.749504,461.0,298.0,528.0
3,606,6331.0,2.827357,2.969515,22.761017,7.186858,250.0,241.0,
4,610,28328.0,5.277464,1.330839,20.146145,10.240751,,,


In [60]:
search = SearchEngine(simple_zipcode=True)

county_zip = []
state_zip = []
lat_zip = []
long_zip = []

for zip in census_pd['Zipcode']:
    zipcode = search.by_zipcode(zip)
    
    zipcode = zipcode.to_dict()
    
    county_zip.append(zipcode['county'])
    state_zip.append(zipcode['state'])
    lat_zip.append(zipcode['lat'])
    long_zip.append(zipcode['lng'])
    
census_pd['County'] = county_zip
census_pd['State'] = state_zip
census_pd['Latitude'] = lat_zip
census_pd['Longitude'] = long_zip

county_list = []

census_pd = census_pd[census_pd.State != 'PR']
census_pd = census_pd[census_pd.State != 'HI']

for county in census_pd['County']:
    full_name = county.split(' ')
    county_list.append(full_name[0])
    
census_pd['County'] = county_list

census_pd.head()

Unnamed: 0,Zipcode,Population,Unemployment Rate,% of Pop with no education,% of Pop with high school,% of Pop with college and higher,Median Gross Rent (2010-2014),Median Gross Rent (2000-2009),Median Gross Rent (moved in 2015 and later),County,State,Latitude,Longitude
131,1001,17438.0,3.67588,0.269526,20.030967,20.673242,945.0,863.0,1213.0,Hampden,MA,42.07,-72.63
132,1002,29780.0,4.361988,0.456682,5.050369,30.34587,1203.0,915.0,1403.0,Hampshire,MA,42.38,-72.52
133,1003,11241.0,8.451205,0.0,0.0,0.409216,,,,Hampshire,MA,42.39,-72.53
134,1005,5201.0,4.345318,0.0,24.975966,15.900788,920.0,915.0,,Worcester,MA,42.42,-72.12
135,1007,14838.0,3.309071,0.107831,12.946489,27.982208,856.0,892.0,,Hampshire,MA,42.3,-72.4


## Combine Data

In [61]:
# Import the original data we analyzed earlier. Use dtype="object" to match other
foodpop_data_original = pd.read_csv(
    "Resources/Clean_data/foodpop_data.csv", dtype="object", encoding="utf-8")

# Visualize
print(len(foodpop_data_original))
foodpop_data_original.head()

3143


Unnamed: 0.1,Unnamed: 0,County,People 1 Mile and More,People 1/2 Mile and More,People 10 Miles and More,People 20 Miles and More,Population,State
0,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC
1,1,Acadia,11284.81213,17453.87802,233.7670294,0.0,61773,LA
2,2,Accomack,10809.97223,11984.53711,252.378241,0.0,33164,VA
3,3,Ada,28009.707110000003,76201.38101,48.99173978,0.532825746,392365,ID
4,4,Adair,1358.349543,1541.07575,157.9810003,0.0,7682,IA


In [62]:
# Import the original data we analyzed earlier. Use dtype="object" to match other
murder_data_original = pd.read_csv(
    "Resources/Clean_data/murder_data.csv", dtype="object", encoding="utf-8")

lat_murder = []
lng_murder = []

for i in range (len(murder_data_original['City'])):
    
    location = search.by_city_and_state(murder_data_original['City'][i], murder_data_original['State'][i])
    try:
        zipcode = location[0]
        lat_murder.append(zipcode.lat)
        lng_murder.append(zipcode.lng)
    except IndexError:
        lat_murder.append('Nan')
        lng_murder.append('Nan')
        
    
murder_data_original['Latitude'] = lat_murder
murder_data_original['Longitude'] = lng_murder

# Visualize
print(len(murder_data_original))
murder_data_original.head()

79


Unnamed: 0.1,Unnamed: 0,City,"Murder Rate (per 100,000)",Murders,Population,Source,State,Latitude,Longitude
0,0,Chicago,19.7,378,2720546.0,https://portal.chicagopolice.org/portal/page/p...,Illinois,41.88,-87.62
1,1,Orlando,26.9,19,270934.0,OPD,Florida,28.54,-81.37
2,2,Memphis,24.1,114,655770.0,MPD,Tennessee,35.17,-90.07
3,3,Phoenix,7.1,72,1563025.0,PPD,Arizona,33.45,-112.08
4,4,Las Vegas,20.0,90,623747.0,http://www.lvmpd.com/Sections/Homicide/Homicid...,Nevada,36.17,-115.13


In [63]:
# Import the original data we analyzed earlier. Use dtype="object" to match other
unemployment_data_original = pd.read_csv(
    "Resources/Clean_data/unemployment_data.csv", dtype="object", encoding="utf-8")

# Visualize
print(len(unemployment_data_original))
unemployment_data_original.head()

3149


Unnamed: 0.1,Unnamed: 0,Civilian labour force,County,Employed poulation,Rate of Unemployment,State,Unemployed population
0,2,25602.0,Autauga,24272.0,5.0,AL,1330.0
1,3,87705.0,Baldwin,82843.0,6.0,AL,4862.0
2,4,8609.0,Barbour,7844.0,9.0,AL,765.0
3,5,8572.0,Bibb,8005.0,7.0,AL,567.0
4,6,24473.0,Blount,23152.0,5.0,AL,1321.0


In [64]:
# Import the original data we analyzed earlier. Use dtype="object" to match other
rent_data_original = pd.read_csv(
    "Resources/Clean_data/rent_data.csv", dtype="object", encoding="utf-8")

lat_rent = []
lng_rent = []

for i in range (len(rent_data_original['City'])):
    result = search.by_city_and_state(rent_data_original['City'][i], rent_data_original['State'][i])
    
    try:
        zipcode = result[0]
        lat_rent.append(zipcode.lat)
        lng_rent.append(zipcode.lng)
    except IndexError:
        lat_rent.append('NaN')
        lng_rent.append('NaN')
    
rent_data_original['Latitude'] = lat_rent
rent_data_original['Longitude'] = lng_rent

# Visualize
print(len(rent_data_original))
rent_data_original.head()


1630


Unnamed: 0.1,Unnamed: 0,City,State,County,Average Rent,Latitude,Longitude
0,0,New York,NY,Queens,2978,40.75,-73.99
1,1,Los Angeles,CA,Los Angeles,3195,33.97,-118.25
2,2,Chicago,IL,Cook,1758,41.88,-87.62
3,3,Houston,TX,Harris,1490,29.75,-95.37
4,4,San Antonio,TX,Bexar,1115,29.48,-98.53


In [65]:
# Merge the two data sets along zip code
foodpop_unemployment_complete = pd.merge(
    foodpop_data_original, unemployment_data_original, how="left", left_on=["County", "State"], right_on=["County", "State"])

# Save the revised Data Frame as a csv
foodpop_unemployment_complete.to_csv(
    "Resources/Clean_data/foodpop_unemployment_complete.csv", encoding="utf-8", index=False)

# Visualize
foodpop_unemployment_complete.head()

Unnamed: 0,Unnamed: 0_x,County,People 1 Mile and More,People 1/2 Mile and More,People 10 Miles and More,People 20 Miles and More,Population,State,Unnamed: 0_y,Civilian labour force,Employed poulation,Rate of Unemployment,Unemployed population
0,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,9637.0,7.0,712.0
1,1,Acadia,11284.81213,17453.87802,233.7670294,0.0,61773,LA,1135,25858.0,24088.0,7.0,1770.0
2,2,Accomack,10809.97223,11984.53711,252.378241,0.0,33164,VA,2870,15877.0,15012.0,5.0,865.0
3,3,Ada,28009.707110000003,76201.38101,48.99173978,0.532825746,392365,ID,567,217052.0,209058.0,4.0,7994.0
4,4,Adair,1358.349543,1541.07575,157.9810003,0.0,7682,IA,808,4226.0,4089.0,3.0,137.0


In [66]:
# Merge the two data sets along zip code
census_data_complete = pd.merge(
    foodpop_unemployment_complete, census_pd, how="left", left_on=["County", "State"], right_on = ["County", "State"])

# Save the revised Data Frame as a csv
census_data_complete.to_csv(
    "Resources/bank_data_with_employment.csv", encoding="utf-8", index=False)

# Visualize
census_data_complete.head()

Unnamed: 0,Unnamed: 0_x,County,People 1 Mile and More,People 1/2 Mile and More,People 10 Miles and More,People 20 Miles and More,Population_x,State,Unnamed: 0_y,Civilian labour force,...,Population_y,Unemployment Rate,% of Pop with no education,% of Pop with high school,% of Pop with college and higher,Median Gross Rent (2010-2014),Median Gross Rent (2000-2009),Median Gross Rent (moved in 2015 and later),Latitude,Longitude
0,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,...,13005.0,4.452134,0.745867,22.045367,8.273741,493.0,446.0,,34.2,-82.4
1,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,...,2487.0,7.760354,0.603136,21.833534,5.548854,563.0,562.0,,34.1,-82.6
2,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,...,3078.0,6.562703,0.064977,20.012995,5.45809,770.0,541.0,,34.4,-82.3
3,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,...,1697.0,1.767826,0.589275,11.962286,13.376547,718.0,386.0,,34.31,-82.43
4,0,Abbeville,9451.794654,10638.20832,2422.056046,0.0,25417,SC,2360,10349.0,...,121.0,7.438017,2.479339,14.049587,0.0,,563.0,,34.21,-82.648


## Heatmap test runs (unemployment and no education)

In [67]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [68]:
# Store 'Lat' and 'Lng' into  locations 
census_data_complete = census_data_complete.dropna()

locations = census_data_complete[["Latitude", "Longitude"]].astype(float)

In [69]:
unemployment_rate = census_data_complete["Unemployment Rate"].astype(float)


# Create an unemployment Heatmap layer
fig = gmaps.figure()

unemployment_layer = gmaps.heatmap_layer(locations, weights=unemployment_rate, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 100
heat_layer.point_radius = 1

fig.add_layer(unemployment_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [70]:
noeducation_percent = census_data_complete["% of Pop with no education"].astype(float)


# Create an unemployment Heatmap layer
fig = gmaps.figure()

noeducation_layer = gmaps.heatmap_layer(locations, weights=noeducation_percent, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 100
heat_layer.point_radius = 1

fig.add_layer(noeducation_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [71]:
food_access = census_data_complete["People 1 Mile and More"].astype(float)


# Create an unemployment Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=food_access, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 100
heat_layer.point_radius = 1

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [72]:
# Store 'Lat' and 'Lng' into  locations
test2_pd = murder_data_original.copy()

test2_pd = test2_pd.dropna()

locations_murder = test2_pd[["Latitude", "Longitude"]].astype(float)

# Convert bank rate to list
murder_rate = test2_pd["Murder Rate (per 100,000)"].tolist()

In [73]:
# Create bank symbol layer
murder_layer = gmaps.symbol_layer(
    locations_murder, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=4,
    info_box_content=[f"Murder rate: {rate}" for rate in murder_rate]
)


fig = gmaps.figure()
fig.add_layer(murder_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [74]:
rent_data_original.dropna()

# Store 'Lat' and 'Lng' into  locations
test3_pd = rent_data_original.copy()

test3_pd = test3_pd.dropna()

locations_rent = test3_pd[["Latitude", "Longitude"]].astype(float)

# Convert bank rate to list
rent_rate = test3_pd["Average Rent"].tolist()

In [75]:
# Create bank symbol layer
rent_layer = gmaps.symbol_layer(
    locations_rent, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=2,
    info_box_content=[f"Avg Rent: {rate}" for rate in rent_rate]
)


fig = gmaps.figure()
fig.add_layer(rent_layer)

fig

InvalidPointException: nan is not a valid latitude. Latitudes must lie between -90 and 90.

In [76]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(unemployment_layer)
fig.add_layer(murder_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [77]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(noeducation_layer)
fig.add_layer(murder_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(noeducation_layer)
fig.add_layer(rent_layer)

fig

In [None]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(unemployment_layer)
fig.add_layer(rent_layer)

fig