# Poverty Rates in Newark NJ
---
The below script explores the poverty rates in Newark NJ.

In this script, we retrieved and plotted data from the 2019 US Census and Google Places API to show the relationship between various socioeconomic parameters across 10 zip codes in Newark. We used Pandas, Numpy, Matplotlib, Requests, Census API, and Google API to accomplish our task.

In [11]:
# Dependencies
from census import Census
from config import (census_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt
from pprint import pprint

# Census API Key
c = Census(census_key, year=2019)

## Data Retrieval

In [12]:
#census_data = c.acs5.get(("B01003_001E", "B17001_002E"), {newark_zip_codes})
census_data = c.acs5.get(("B01003_001E", "B17001_002E"), {'for': 'zip code tabulation area:''07017, 07102, 07103, 07104, 07105, 07106, 07107, 07108, 07112, 07114'})
#Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B17001_002E": "Poverty Count",
                                      'zip code tabulation area': "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)
# group poverty rate to zipcode, to generate heat layer 
# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Poverty Rate"]]

# Visualize
print(len(census_pd))
census_pd.head(11)

CensusException: error: ambiguous geography "zip code tabulation area:07017, 07102, 07103, 07104, 07105, 07106, 07107, 07108, 07112, 07114". You must either specify a wildcard or full qualify it with state

In [13]:
# Run Census Search to retrieve data on Newark zip codes (2019 ACS5 Census)

newark_zip_codes = ["07017", "07102", "07103", "07104", "07105", "07106", "07107", "07108", "07112", "07114"]

census_data = c.acs5.get(("B01003_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})
census_data
census_pd = pd.DataFrame(census_data)
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B17001_002E": "Poverty Count",
                                      'zip code tabulation area': "Zip Code"})
# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

census_pd = census_pd[census_pd['Zip Code'].isin(newark_zip_codes)]
#df[df['A'].isin([3, 6])]
census_pd.head(11)
census_pd.to_csv('output/output.csv')


## Combine Data

In [14]:
# Import the data of abandoned properties in Newark
census_data_original = pd.read_csv(
    "data/newark_abandoned_data.csv", dtype="object", encoding="utf-8")

# Visualize
census_data_original.head()

Unnamed: 0,ID,Lot,Block,Status,Owner Name,Address,City,State,Zip Code,Latitude,Longitude,Date,Year,Month
0,1,19.0,18.0,Vacant Only,"HG JETSON URBAN RENE,",589 BROAD ST,Newark,NJ,7102,40.74162384615384,-74.16890080769231,2020-01-23,2020,2020-01
1,2,24.0,43.0,Vacant Only,"FOREST HILLS HOLDINGS, LLC",29 BURNET ST,Newark,NJ,7102,40.746275857142855,-74.17367742857142,2020-01-24,2020,2020-01
2,3,55.0,45.0,Vacant Only,"CAMPBELL, FREDERICK",24 EAGLES ST,Newark,NJ,7102,40.74601485416667,-74.17252522916667,2020-02-10,2020,2020-02
3,4,29.0,64.0,Vacant Only,"WEST, MICHAEL",7 WARREN ST,Newark,NJ,7102,40.73913753846154,-74.17237438461538,2019-03-29,2019,2019-03
4,5,19.0,98.0,Vacant Only,MADISON DEVELOPMENT LLC,789 SOUTH ORANGE AVE,Newark,NJ,7106,40.74496745,-74.2198612,2019-10-18,2019,2019-10


In [15]:
# Merge the two data sets along zip code
census_data_complete = pd.merge(
    census_data_original, census_pd, how="left", on=["Zip Code", "Zip Code"])

# Remove rows missing data
census_data_complete = census_data_complete.dropna()

# Visualize
census_data_complete.head()

Unnamed: 0,ID,Lot,Block,Status,Owner Name,Address,City,State,Zip Code,Latitude,Longitude,Date,Year,Month,Population,Poverty Count,state,Poverty Rate
0,1,19.0,18.0,Vacant Only,"HG JETSON URBAN RENE,",589 BROAD ST,Newark,NJ,7102,40.74162384615384,-74.16890080769231,2020-01-23,2020,2020-01,13505.0,3935.0,34,29.137357
1,2,24.0,43.0,Vacant Only,"FOREST HILLS HOLDINGS, LLC",29 BURNET ST,Newark,NJ,7102,40.746275857142855,-74.17367742857142,2020-01-24,2020,2020-01,13505.0,3935.0,34,29.137357
2,3,55.0,45.0,Vacant Only,"CAMPBELL, FREDERICK",24 EAGLES ST,Newark,NJ,7102,40.74601485416667,-74.17252522916667,2020-02-10,2020,2020-02,13505.0,3935.0,34,29.137357
3,4,29.0,64.0,Vacant Only,"WEST, MICHAEL",7 WARREN ST,Newark,NJ,7102,40.73913753846154,-74.17237438461538,2019-03-29,2019,2019-03,13505.0,3935.0,34,29.137357
4,5,19.0,98.0,Vacant Only,MADISON DEVELOPMENT LLC,789 SOUTH ORANGE AVE,Newark,NJ,7106,40.74496745,-74.2198612,2019-10-18,2019,2019-10,33851.0,7827.0,34,23.121917


## Poverty Rate "Heat Map"

In [16]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [27]:
# Converting Lat/Long coordinates from strings to decimal numbers, then grouped by zip code and averaged to get
# a "central" geographical coordinate for the zip code:
census_data_complete["Latitude"] = pd.to_numeric(census_data_complete["Latitude"], downcast = "float")
census_data_complete["Longitude"] = pd.to_numeric(census_data_complete["Longitude"], downcast = "float")
census_data_complete = census_data_complete.groupby("Zip Code").mean()
census_data_complete.to_csv("output/newark_poverty_population.csv")
census_data_complete.head(11)

Unnamed: 0_level_0,Latitude,Longitude,Population,Poverty Count,Poverty Rate
Zip Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7017,40.765034,-74.191154,36471.0,5858.0,16.062077
7102,40.734715,-74.173546,13505.0,3935.0,29.137357
7103,40.738434,-74.203232,32881.0,10983.0,33.402269
7104,40.763977,-74.170395,51075.0,12676.0,24.818404
7105,40.728134,-74.153351,52008.0,11304.0,21.735118
7106,40.744968,-74.219864,33851.0,7827.0,23.121917
7107,40.754181,-74.192093,37684.0,10950.0,29.057425
7108,40.724895,-74.205986,23118.0,8457.0,36.581884
7112,40.712593,-74.199753,25016.0,5280.0,21.106492
7114,40.721378,-74.183418,12024.0,2456.0,20.425815


In [18]:
# Store 'Lat' and 'Lng' into  locations 
locations = census_data_complete[["Latitude", "Longitude"]]

# Convert Poverty Rate to float and store

census_data_complete = census_data_complete.dropna()
poverty_rate = census_data_complete["Poverty Rate"].astype(float)

#poverty_rate_zip = census_data_complete[["Zip Code","Poverty Rate"]].astype(float)

#poverty_rate_zip

In [19]:
zip_locations = []
for zipcode in poverty_rate_zip["Zip Code"]:
    zip_geocode = gmaps.geocoding.geocode(address = str(zipcode))
    zip_locations.append(zip_geocode)
zip_locations

NameError: name 'poverty_rate_zip' is not defined

In [21]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=poverty_rate, 
                                 dissipating=False, max_intensity=60,
                                 point_radius = .01)

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

### Analysis (data from 2019):
* Ranking of poverty rates by zip code, from highest to lowest: 
    * 108, 103, 102, 107, 104, 106, 105, 112, 114, 017
* A wide range of population amongst zip codes, population density not uniform 
    * For example: although 108 has the highest poverty rate, more people living in poverty in 107
* Newark's Average Poverty Rate: 25.5%
    * Which is 16% higher than the state average and 15% higher than the national average
* 79,726 Newarkers are within the poverty threshhold