# HeatMaps
#### Generate heatmaps using the population census and school enrollment numbers
#### author : suchita mujumdar

In [1]:
# Dependencies
#https://github.com/datamade/census
from census import Census
from keys import censusKey
from keys import googleKey

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from us import states


In [2]:
df_full = pd.read_csv('data/AustinSchools.csv')

In [3]:
df_full.columns

Index(['Unnamed: 0', 'gsId', 'name', 'type', 'gradeRange', 'enrollment',
       'gsRating', 'parentRating', 'city', 'state', 'districtId', 'district',
       'districtNCESId', 'address', 'phone', 'fax', 'website', 'ncesId', 'lat',
       'lon', 'schoolName', 'latitude', 'longitude', 'headOfficialName',
       'freeAndReducedPriceLunch', 'idea', 'plan504',
       'percentTeachersInFirstSecondYear', 'White, non-Hispanic',
       'Black, non-Hispanic', 'Hispanic', 'Multiracial', 'Asian',
       'Native American or Native Alaskan',
       'Native Hawaiian or Other Pacific Islander'],
      dtype='object')

#### Create a dataset with only the columns that we need

In [4]:
df = df_full[['schoolName', 'latitude', 'longitude','enrollment','type']]
df.head()

Unnamed: 0,schoolName,latitude,longitude,enrollment,type
0,Blackshear Elementary School,30.266968,-97.72217,302.0,public
1,Bryker Woods Elementary School,30.304869,-97.750626,400.0,public
2,Casis Elementary School,30.304344,-97.76505,793.0,public
3,Maplewood Elementary School,30.293337,-97.714905,480.0,public
4,Mathews Elementary School,30.278215,-97.760796,425.0,public


In [5]:
df.dtypes

schoolName     object
latitude      float64
longitude     float64
enrollment    float64
type           object
dtype: object

In [76]:
df["enrollment"].describe()

count     310.000000
mean      568.248387
std       490.360530
min         0.000000
25%       253.250000
50%       508.500000
75%       749.250000
max      2912.000000
Name: enrollment, dtype: float64

In [77]:
# Add in Percent enrollment (School enrollment / Total enrollment)
df["Percent enrollment"] = 100 * df["enrollment"] / df["enrollment"].sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [80]:
df["Percent enrollment"].describe()

count    310.000000
mean       0.322581
std        0.278366
min        0.000000
25%        0.143764
50%        0.288663
75%        0.425331
max        1.653071
Name: Percent enrollment, dtype: float64

In [78]:
df.head()

Unnamed: 0,schoolName,latitude,longitude,enrollment,type,Percent enrollment
0,Blackshear Elementary School,30.266968,-97.72217,302.0,public,0.171438
1,Bryker Woods Elementary School,30.304869,-97.750626,400.0,public,0.22707
2,Casis Elementary School,30.304344,-97.76505,793.0,public,0.450167
3,Maplewood Elementary School,30.293337,-97.714905,480.0,public,0.272484
4,Mathews Elementary School,30.278215,-97.760796,425.0,public,0.241262


#### Create a dataset for the total population in Austin in the different zip codes using the census API

In [6]:
# Census API Key
c = Census(censusKey.census_key, 2017)
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# B01003_001E  Total Population

census_data = c.acs5.get(('NAME',"B01003_001E"), {'for': 'zip code tabulation area:*'})


In [7]:
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

In [8]:
# Column Reordering
del(census_pd['NAME'])
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "zip code tabulation area": "Zipcode"})

In [9]:
census_pd["Zipcode"] = pd.to_numeric(census_pd["Zipcode"],errors='coerce')

In [10]:
census_pd.head()

Unnamed: 0,Population,Zipcode
0,17599.0,601
1,39209.0,602
2,50135.0,603
3,6304.0,606
4,27590.0,610


In [11]:
# All austin area zipcodes

zipcodes = [78610, 78613, 78617, 78641, 78652, 78653, 78660, 78664, 78681, 
            78701, 78702, 78703, 78704, 78705, 78712, 78717, 78719, 78721, 
            78722, 78723, 78724, 78725, 78726, 78727, 78728, 78729, 78730,
            78731, 78732, 78733, 78734, 78735, 78736, 78737, 78738, 78739,
            78741, 78742, 78744, 78745, 78746, 78747, 78748, 78749, 78750, 
            78751, 78752, 78753, 78754, 78756, 78757, 78758, 78759]

#Read more: http://www.city-data.com/zipmaps/Austin-Texas.html


In [12]:
austin_pd = census_pd[census_pd['Zipcode'].isin(zipcodes)]

In [13]:
austin_pd = austin_pd.reset_index()
austin_pd.head()

Unnamed: 0,index,Population,Zipcode
0,27315,32692.0,78610
1,27318,77779.0,78613
2,27322,27900.0,78617
3,27342,60135.0,78641
4,27349,4965.0,78652


In [14]:
austin_pd = austin_pd[['Zipcode','Population']]
austin_pd.head()

Unnamed: 0,Zipcode,Population
0,78610,32692.0
1,78613,77779.0
2,78617,27900.0
3,78641,60135.0
4,78652,4965.0


#### Get all the zipcode coordinates from a csv


In [38]:
allcoordinates_df = pd.read_csv('data/coordinates.csv')

In [42]:
allcoordinates_df = allcoordinates_df.rename(columns={"ZIP":"Zipcode"})

In [44]:
allcoordinates_df.dtypes

Zipcode      int64
LAT        float64
LNG        float64
dtype: object

In [45]:
austin_pd.dtypes

Zipcode         int64
Population    float64
dtype: object

In [46]:
AustinZipCo_df = pd.merge(austin_pd, allcoordinates_df, on='Zipcode', how='inner')

In [47]:
AustinZipCo_df.iloc[0]

Zipcode       78610.000000
Population    32692.000000
LAT              30.078514
LNG             -97.838012
Name: 0, dtype: float64

In [48]:
AustinZipCo_df.head()

Unnamed: 0,Zipcode,Population,LAT,LNG
0,78610,32692.0,30.078514,-97.838012
1,78613,77779.0,30.503949,-97.8242
2,78617,27900.0,30.147375,-97.600145
3,78641,60135.0,30.562168,-97.907817
4,78652,4965.0,30.132805,-97.874687


In [54]:
# Add in Percent Population (Zipcode Population / Total Population)
AustinZipCo_df["Percent Population"] = 100 * \
    AustinZipCo_df["Population"].astype(
        int) / AustinZipCo_df["Population"].sum()


In [60]:
AustinZipCo_df['Percent Population'].describe()

count    53.000000
mean      1.886792
std       1.323600
min       0.034500
25%       0.997570
50%       1.592082
75%       2.499298
max       5.702213
Name: Percent Population, dtype: float64

#### A Heat Map for the population in Austin

In [49]:
# Configure gmaps
gmaps.configure(api_key=googleKey.gkey)

In [57]:
# Store 'Lat' and 'Lng' into  locations 
locations = AustinZipCo_df[["LAT", "LNG"]].astype(float)

# Convert Population to float and store
# HINT: be sure to handle NaN values
population = AustinZipCo_df["Percent Population"].astype(float)

In [74]:
# Create a population Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=population, 
                                 dissipating=False, max_intensity=6,
                                 point_radius = 0.05)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 6
heat_layer.point_radius = 0.05

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [82]:
# Store 'Lat' and 'Lng' into  locations 
locations = df[["latitude", "longitude"]].astype(float)

# Convert enrollment to float and store
# HINT: be sure to handle NaN values
enrollment = df["Percent enrollment"].astype(float)

In [85]:
# Create an enrollment Heatmap layer
fig2 = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=enrollment, 
                                 dissipating=False, max_intensity=1.6,
                                 point_radius = 0.01)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 1.6
heat_layer.point_radius = 0.01

fig2.add_layer(heat_layer)

fig2

Figure(layout=FigureLayout(height='420px'))