In [15]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states
import seaborn as sns
import gmaps
# Census & gmaps API Keys
from config import (gkey, ckey)

#Configure gmaps
gmaps.configure(api_key=gkey)


In [16]:
c = Census(ckey, year=2017)
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                         "B19301_001E",
                         "B17001_002E",
                         "B23025_005E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", 
                                      "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode","Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]



census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,601,17599.0,38.9,11757.0,7041.0,11282.0,64.105915,13.943974
1,602,39209.0,40.9,16190.0,8978.0,20428.0,52.100283,6.473004
2,603,50135.0,40.4,16645.0,10897.0,25176.0,50.216416,7.156677
3,606,6304.0,42.8,13387.0,5960.0,4092.0,64.911168,3.236041
4,610,27590.0,41.4,18741.0,9266.0,12553.0,45.498369,5.342515


In [17]:
zipcode_data = pd.read_csv("zipcodes.csv")

zipcode_pd = pd.DataFrame(zipcode_data, dtype = 'object')
zipcode_pd

Unnamed: 0,city,zip_code
0,Apex,27523
1,Apex,27539
2,Apex,27502
3,Bahama,27503
4,Butner,27509
...,...,...
89,Stem,27581
90,Wake Forest,27587
91,Wake Forest,27588
92,Willow Spring,27592


In [18]:
zipcode_pd = zipcode_pd.rename(columns={"zip_code": "Zipcode"})
zipcode_pd.head()



Unnamed: 0,city,Zipcode
0,Apex,27523
1,Apex,27539
2,Apex,27502
3,Bahama,27503
4,Butner,27509


In [19]:
dtype = dict(Zipcode=int)

merged_census=zipcode_pd.astype(dtype).merge(census_pd.astype(dtype), 'inner')

merged_census.head()

Unnamed: 0,city,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,Apex,27523,11310.0,39.9,95110.0,43776.0,519.0,4.588859,4.084881
1,Apex,27539,22303.0,37.5,100099.0,41574.0,1272.0,5.703269,2.51984
2,Apex,27502,38104.0,38.0,103673.0,41962.0,2152.0,5.647701,1.852824
3,Bahama,27503,6056.0,45.3,79071.0,29300.0,175.0,2.889696,1.188904
4,Butner,27509,6047.0,34.0,41069.0,15477.0,1466.0,24.243426,2.381346


In [20]:
ral_data = merged_census.loc[merged_census['city'] == 'Raleigh']
del ral_data["Median Age"]
del ral_data["Poverty Count"]
del ral_data["city"]



In [35]:

dist= ["North", "North", "Northwest", "North", "Northeast", "North", "North", "Northeast", "Southwest", "Northwest", "Southwest", "Downtown", "Downtown", "Downtown", "Southeast"]
lat = [35.923982, 35.842711, 35.904084, 35.902182,35.870606, 35.851876, 35.94882,35.816936, 35.683682, 35.818811, 35.737593, 35.809159, 35.790371, 35.775211, 35.743602 ]
lng = [-78.716682, -78.63185,-78.768873,-78.627742, -78.533618, -78.702013, -78.613315, -78.563843, -78.664144, -78.71404, -78.721192, -78.645188, -78.652752, -78.634324, -78.536408]
ral_data.loc[:, "district"] = dist
ral_data.loc[:, "lat"] = lat
ral_data.loc[:, "lng"] = lng
ral_data.reset_index(drop=True)


Unnamed: 0,Zipcode,Population,Household Income,Per Capita Income,Poverty Rate,Unemployment Rate,district,lat,lng
0,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682
1,27609,33103.0,57924.0,43475.0,11.301091,2.051174,North,35.842711,-78.63185
2,27617,18380.0,80471.0,55611.0,6.430903,1.218716,Northwest,35.904084,-78.768873
3,27615,42863.0,78709.0,48411.0,5.88153,2.286354,North,35.902182,-78.627742
4,27616,52679.0,64212.0,27117.0,11.88709,3.883901,Northeast,35.870606,-78.533618
5,27612,37384.0,73402.0,47259.0,7.158143,2.126578,North,35.851876,-78.702013
6,27614,33242.0,102950.0,54074.0,3.011251,1.618435,North,35.94882,-78.613315
7,27604,46908.0,54081.0,26951.0,14.161763,3.180694,Northeast,35.816936,-78.563843
8,27603,53693.0,60924.0,29626.0,19.514648,3.177323,Southwest,35.683682,-78.664144
9,27607,29067.0,74786.0,32774.0,11.996422,1.448378,Northwest,35.818811,-78.71404


In [36]:

#Exporting Data to CSV
ral_data.to_csv("Raleigh_zip_pop.csv")


In [37]:
#getting Crime data from district_crime.csv
crime_dist = pd.read_csv("Crime Data/district_crime.csv")
crime_ral_dist = pd.DataFrame(crime_dist)
crime_ral_dist.head()

Unnamed: 0,district,description,count
0,Downtown,Other crimes,145
1,Downtown,Deceased Person,34
2,Downtown,Missing Person,31
3,Downtown,Assault,20
4,Downtown,Sex crime,15


In [46]:
complete_ral_data = pd.merge(ral_data, crime_ral_dist, on='district', how='outer')
complete_ral_data

Unnamed: 0,Zipcode,Population,Household Income,Per Capita Income,Poverty Rate,Unemployment Rate,district,lat,lng,description,count
0,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682,Other crimes,157
1,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682,Deceased Person,88
2,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682,Missing Person,80
3,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682,Assault,37
4,27613,43484.0,87774.0,47204.0,3.971576,2.410082,North,35.923982,-78.716682,Sex crime,17
...,...,...,...,...,...,...,...,...,...,...,...
70,27610,75991.0,48897.0,20280.0,19.443092,4.409733,Southeast,35.743602,-78.536408,Other crimes,174
71,27610,75991.0,48897.0,20280.0,19.443092,4.409733,Southeast,35.743602,-78.536408,Missing Person,131
72,27610,75991.0,48897.0,20280.0,19.443092,4.409733,Southeast,35.743602,-78.536408,Assault,57
73,27610,75991.0,48897.0,20280.0,19.443092,4.409733,Southeast,35.743602,-78.536408,Deceased Person,55


In [69]:
counter = census_data["Poverty Rate"].tolist().astype(int)
#creating a map
marker_locations = complete_ral_data[['lat', 'lng']]

#Create a marker_layer using the poverty list to fill the info box
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Poverty Rate: {rate}" for rate in counter])
fig.add_layer(markers)
fig

TypeError: list indices must be integers or slices, not str

In [66]:

# #plot population vs crime counts by zipcode
# plt.figure(figsize=(12,8))
# #plt.scatter plot
# ax = sns.regplot(x=crime_count, y=pop_count)
# #plt.xlim(0,2.6e7)
# plt.xlabel('Population', fontsize=14)
# plt.xticks(fontsize=14)
# plt.ylabel('Crimes by Zipcodes', fontsize=14)
# plt.yticks(fontsize=18)
# plt.title('Population vs Crimes Count by Zipcode', fontsize=18)
# plt.legend()
# plt.show()