# HeatMaps
#### Generate heatmaps using the population census and school enrollment numbers
#### author : suchita mujumdar

In [1]:
# Dependencies
#https://github.com/datamade/census
from census import Census
from keys import censusKey
from keys import googleKey

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from us import states


In [2]:
df_full = pd.read_csv('data/AustinSchools.csv')

In [3]:
df_full.columns

Index(['Unnamed: 0', 'gsId', 'name', 'type', 'gradeRange', 'enrollment',
       'gsRating', 'parentRating', 'city', 'state', 'districtId', 'district',
       'districtNCESId', 'address', 'phone', 'fax', 'website', 'ncesId', 'lat',
       'lon', 'schoolName', 'latitude', 'longitude', 'headOfficialName',
       'freeAndReducedPriceLunch', 'idea', 'plan504',
       'percentTeachersInFirstSecondYear', 'White, non-Hispanic',
       'Black, non-Hispanic', 'Hispanic', 'Multiracial', 'Asian',
       'Native American or Native Alaskan',
       'Native Hawaiian or Other Pacific Islander'],
      dtype='object')

#### Create a dataset with only the columns that we need

In [4]:
df = df_full[['schoolName', 'latitude', 'longitude','enrollment','type']]
df.head()

Unnamed: 0,schoolName,latitude,longitude,enrollment,type
0,Blackshear Elementary School,30.266968,-97.72217,302.0,public
1,Bryker Woods Elementary School,30.304869,-97.750626,400.0,public
2,Casis Elementary School,30.304344,-97.76505,793.0,public
3,Maplewood Elementary School,30.293337,-97.714905,480.0,public
4,Mathews Elementary School,30.278215,-97.760796,425.0,public


In [5]:
df['type'].value_counts()

public     197
private    100
charter     52
Name: type, dtype: int64

In [6]:
df.dtypes

schoolName     object
latitude      float64
longitude     float64
enrollment    float64
type           object
dtype: object

In [7]:
df["enrollment"].describe()

count     310.000000
mean      568.248387
std       490.360530
min         0.000000
25%       253.250000
50%       508.500000
75%       749.250000
max      2912.000000
Name: enrollment, dtype: float64

In [8]:
# Add in Percent enrollment (School enrollment / Total enrollment)
df["Percent enrollment"] = 100 * df["enrollment"] / df["enrollment"].sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [9]:
df["Percent enrollment"].describe()

count    310.000000
mean       0.322581
std        0.278366
min        0.000000
25%        0.143764
50%        0.288663
75%        0.425331
max        1.653071
Name: Percent enrollment, dtype: float64

In [10]:
df.head()

Unnamed: 0,schoolName,latitude,longitude,enrollment,type,Percent enrollment
0,Blackshear Elementary School,30.266968,-97.72217,302.0,public,0.171438
1,Bryker Woods Elementary School,30.304869,-97.750626,400.0,public,0.22707
2,Casis Elementary School,30.304344,-97.76505,793.0,public,0.450167
3,Maplewood Elementary School,30.293337,-97.714905,480.0,public,0.272484
4,Mathews Elementary School,30.278215,-97.760796,425.0,public,0.241262


In [11]:
# Configure gmaps
gmaps.configure(api_key=googleKey.gkey)

In [13]:
figure_layout = {
'width': '1000px',
'height': '1000px',
'border': '1px solid black',
'padding': '1px'
}
fig = gmaps.figure(layout=figure_layout)
fig

Figure(layout=FigureLayout(border='1px solid black', height='1000px', padding='1px', width='1000px'))

In [14]:
fig = gmaps.figure(layout=figure_layout)
public_schools = df[df['type'] == 'public']
public_schools_locations = public_schools[['latitude', 'longitude']]
public_schools_layer = gmaps.symbol_layer( public_schools_locations, 
    info_box_content=public_schools['schoolName'],
    fill_color='red', stroke_color='red', scale=2)
fig.add_layer(public_schools_layer)
public_school_student_enrollment = gmaps.heatmap_layer(public_schools_locations,
                        weights=public_schools['Percent enrollment'],
                        max_intensity=1.7, point_radius=0.01,dissipating=False)

fig.add_layer(public_school_student_enrollment)
fig

Figure(layout=FigureLayout(border='1px solid black', height='1000px', padding='1px', width='1000px'))

In [68]:
fig2 = gmaps.figure(layout=figure_layout)
private_schools = df[df['type'] == 'private']
private_schools_locations = private_schools[['latitude', 'longitude']]
private_schools_layer = gmaps.symbol_layer(private_school_locations, 
    #info_box_content=private_schools['schoolName'], 
    fill_color='lime', stroke_color='red', scale=2)

fig2.add_layer(private_schools_layer)
private_school_student_enrollment = gmaps.heatmap_layer(private_school_locations,
                        weights=private_schools['Percent enrollment'],
                        max_intensity=1.7, point_radius=0.01,dissipating=False)
fig2.add_layer(private_school_student_enrollment)
fig2

Figure(layout=FigureLayout(border='1px solid black', height='1000px', padding='1px', width='1000px'))

In [69]:
fig3 = gmaps.figure(layout=figure_layout)
charter_schools = df[df['type'] == 'charter']
charter_schools_locations = charter_schools[['latitude', 'longitude']]
charter_schools_layer = gmaps.symbol_layer(
charter_schools_locations, fill_color='purple', stroke_color='purple', scale=2
)
fig3.add_layer(charter_schools_layer)
charter_school_student_enrollment = gmaps.heatmap_layer(charter_schools_locations,
                        weights=charter_schools['Percent enrollment'],
                        max_intensity=1.7, point_radius=0.01,dissipating=False)
fig3.add_layer(charter_school_student_enrollment)
fig3

Figure(layout=FigureLayout(border='1px solid black', height='1000px', padding='1px', width='1000px'))

#### Create a dataset for the total population in Austin in the different zip codes using the census API

In [24]:
# Census API Key
c = Census(censusKey.census_key, 2017)
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# B01003_001E  Total Population

census_data = c.acs5.get(('NAME',"B01003_001E"), {'for': 'zip code tabulation area:*'})


In [25]:
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

In [26]:
# Column Reordering
del(census_pd['NAME'])
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "zip code tabulation area": "Zipcode"})

In [27]:
census_pd["Zipcode"] = pd.to_numeric(census_pd["Zipcode"],errors='coerce')

In [28]:
census_pd.head()

Unnamed: 0,Population,Zipcode
0,17599.0,601
1,39209.0,602
2,50135.0,603
3,6304.0,606
4,27590.0,610


In [29]:
# All austin area zipcodes

zipcodes = [78610, 78613, 78617, 78641, 78652, 78653, 78660, 78664, 78681, 
            78701, 78702, 78703, 78704, 78705, 78712, 78717, 78719, 78721, 
            78722, 78723, 78724, 78725, 78726, 78727, 78728, 78729, 78730,
            78731, 78732, 78733, 78734, 78735, 78736, 78737, 78738, 78739,
            78741, 78742, 78744, 78745, 78746, 78747, 78748, 78749, 78750, 
            78751, 78752, 78753, 78754, 78756, 78757, 78758, 78759]

#Read more: http://www.city-data.com/zipmaps/Austin-Texas.html


In [30]:
austin_pd = census_pd[census_pd['Zipcode'].isin(zipcodes)]

In [31]:
austin_pd = austin_pd.reset_index()
austin_pd.head()

Unnamed: 0,index,Population,Zipcode
0,27315,32692.0,78610
1,27318,77779.0,78613
2,27322,27900.0,78617
3,27342,60135.0,78641
4,27349,4965.0,78652


In [32]:
austin_pd = austin_pd[['Zipcode','Population']]
austin_pd.head()

Unnamed: 0,Zipcode,Population
0,78610,32692.0
1,78613,77779.0
2,78617,27900.0
3,78641,60135.0
4,78652,4965.0


#### Get all the zipcode coordinates from a csv


In [33]:
allcoordinates_df = pd.read_csv('data/coordinates.csv')

In [34]:
allcoordinates_df = allcoordinates_df.rename(columns={"ZIP":"Zipcode"})

In [35]:
allcoordinates_df.dtypes

Zipcode      int64
LAT        float64
LNG        float64
dtype: object

In [36]:
austin_pd.dtypes

Zipcode         int64
Population    float64
dtype: object

In [37]:
AustinZipCo_df = pd.merge(austin_pd, allcoordinates_df, on='Zipcode', how='inner')

In [38]:
AustinZipCo_df.iloc[0]

Zipcode       78610.000000
Population    32692.000000
LAT              30.078514
LNG             -97.838012
Name: 0, dtype: float64

In [40]:
AustinZipCo_df.head()

Unnamed: 0,Zipcode,Population,LAT,LNG
0,78610,32692.0,30.078514,-97.838012
1,78613,77779.0,30.503949,-97.8242
2,78617,27900.0,30.147375,-97.600145
3,78641,60135.0,30.562168,-97.907817
4,78652,4965.0,30.132805,-97.874687


In [41]:
# Add in Percent Population (Zipcode Population / Total Population)
AustinZipCo_df["Percent Population"] = 100 * \
    AustinZipCo_df["Population"].astype(
        int) / AustinZipCo_df["Population"].sum()


In [42]:
AustinZipCo_df['Percent Population'].describe()

count    53.000000
mean      1.886792
std       1.323600
min       0.034500
25%       0.997570
50%       1.592082
75%       2.499298
max       5.702213
Name: Percent Population, dtype: float64

#### A Heat Map for the population in Austin

In [50]:
# Store 'Lat' and 'Lng' into  locations 
locations = AustinZipCo_df[["LAT", "LNG"]].astype(float)

# Convert Population to float and store
# HINT: be sure to handle NaN values
population = AustinZipCo_df["Percent Population"].astype(float)

In [70]:
fig4 = gmaps.figure(layout=figure_layout)
population_layer = gmaps.heatmap_layer(locations, weights=population, 
                                 dissipating=False, max_intensity=6,
                                 point_radius = 0.05)

fig4.add_layer(population_layer)

fig4

Figure(layout=FigureLayout(border='1px solid black', height='1000px', padding='1px', width='1000px'))

In [53]:
help(gmaps.figure)

Help on function figure in module gmaps.figure:

figure(display_toolbar=True, display_errors=True, zoom_level=None, tilt=45, center=None, layout=None, map_type='ROADMAP', mouse_handling='COOPERATIVE')
    Create a gmaps figure
    
    This returns a `Figure` object to which you can add data layers.
    
    :param display_toolbar:
        Boolean denoting whether to show the toolbar. Defaults to True.
    :type display_toolbar: boolean, optional
    
    :param display_errors:
        Boolean denoting whether to show errors that arise in the client.
        Defaults to True.
    :type display_errors: boolean, optional
    
    :param zoom_level:
        Integer between 0 and 21 indicating the initial zoom level.
        High values are more zoomed in.
        By default, the zoom level is chosen to fit the data passed to the
        map. If specified, you must also specify the map center.
    :type zoom_level: int, optional
    
    :param tilt:
        Tilt can be either 0 or 45 indi