In [1]:
## Heatmap
### Number of sales by Zip, layering in demographic information

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import gmaps
import json
import csv
import os

# Import API key
from api_keys import g_key

In [3]:
# Source data location
housingFile = 'source_data/housing_data_cleaned.csv'

# Read in source
housingDF = pd.read_csv(housingFile, low_memory=False)
housingDF.head()

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,topography,total_area,total_livable_area,type_heater,unit,view_type,year_built,year_built_estimate,zip_code,zoning
0,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.0,49200,0,4.0,0.0,...,F,938.0,1344.0,A,,I,1895,Y,19144.0,RSA5
1,,RES CONDO 3 STY MAS+OTH,Single Family,337.0,Y,0.0,45000,0,4.0,0.0,...,,0.0,947.0,,B307,I,1970,Y,19152.0,RM2
2,,ROW 2 STY MASONRY,Single Family,201.0,,70.0,0,0,4.0,0.0,...,F,1044.0,1190.0,,,I,1940,Y,19140.0,RM1
3,H,ROW B/GAR 2 STY MASONRY,Single Family,281.0,N,95.5,0,0,4.0,0.0,...,F,1686.53,1633.0,B,,I,1940,Y,19141.0,RSA3
4,,ROW 2 STY MASONRY,Single Family,293.0,,112.5,0,0,4.0,0.0,...,F,2165.62,1320.0,B,,I,1940,Y,19124.0,RSA5


In [4]:
# Bring in demographics dataframe
demographicsFile = 'source_data/philly_demographics.csv'

# Read in demographics df
demographicsDF = pd.read_csv(demographicsFile)
demographicsDF.head()

Unnamed: 0,City,Zip,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
0,Philadelphia,19102,Center City,39.948498,-75.16683,90750,0.73,0.04,0.05,0.16,0.01,0.27
1,Philadelphia,19103,Center City West,39.953663,-75.17399,73611,0.72,0.06,0.06,0.12,0.04,0.28
2,Philadelphia,19103,Center City West,39.95386,-75.16713,73611,0.72,0.06,0.06,0.12,0.04,0.28
3,Philadelphia,19104,"University City, Mantua, Powelton",39.956417,-75.20855,25865,0.35,0.42,0.05,0.14,0.04,0.65
4,Philadelphia,19106,"Old City, Society Hill",39.94912,-75.14397,109393,0.77,0.08,0.06,0.06,0.02,0.23


In [5]:
# Make the name of the zip column the same
demographicsDF_update = demographicsDF.rename(columns={'Zip': 'zip_code'})
demographicsDF_update.head()

Unnamed: 0,City,zip_code,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
0,Philadelphia,19102,Center City,39.948498,-75.16683,90750,0.73,0.04,0.05,0.16,0.01,0.27
1,Philadelphia,19103,Center City West,39.953663,-75.17399,73611,0.72,0.06,0.06,0.12,0.04,0.28
2,Philadelphia,19103,Center City West,39.95386,-75.16713,73611,0.72,0.06,0.06,0.12,0.04,0.28
3,Philadelphia,19104,"University City, Mantua, Powelton",39.956417,-75.20855,25865,0.35,0.42,0.05,0.14,0.04,0.65
4,Philadelphia,19106,"Old City, Society Hill",39.94912,-75.14397,109393,0.77,0.08,0.06,0.06,0.02,0.23


In [6]:
# Merge dataframes on column (zip)
mergedDF = pd.merge(housingDF, demographicsDF_update, on='zip_code', how='inner')
mergedDF

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
0,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.00,49200,0,4.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
1,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.00,49200,0,4.0,0.0,...,Germantown,40.030370,-75.165700,33786,0.16,0.75,0.04,0.02,0.03,0.84
2,C,SEMI/DET 2.5 STY MASONRY,Single Family,252.0,N,96.74,0,0,7.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
3,C,SEMI/DET 2.5 STY MASONRY,Single Family,252.0,N,96.74,0,0,7.0,0.0,...,Germantown,40.030370,-75.165700,33786,0.16,0.75,0.04,0.02,0.03,0.84
4,,SEMI/DET 2 STY MASONRY,Single Family,242.0,,85.70,0,0,4.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130845,,APTS 5-50 UNITS MASONRY,Multi Family,8.0,,161.50,0,0,4.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130846,0,RES CONDO 5+ STY MASONRY,Single Family,4.0,Y,0.00,568911,0,3.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130847,,RES CONDO 5+ STY MASONRY,Single Family,4.0,Y,0.00,388995,0,3.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130848,0,RES CONDO 5+ STY MAS+OTH,Single Family,4.0,Y,0.00,0,0,1.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27


In [7]:
# Dropping empty rows
#mergedDF = mergedDF.dropna()

# NOT WORKING RN

In [8]:
# Sort df by number of sales
#mergedDF.sort_values(by=['sale_price'])
mergedDF.sort_values(by=['sale_price', 'zip_code'])

# Reindex dataframe
mergedDF.reset_index(drop=True)

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
0,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.00,49200,0,4.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
1,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.00,49200,0,4.0,0.0,...,Germantown,40.030370,-75.165700,33786,0.16,0.75,0.04,0.02,0.03,0.84
2,C,SEMI/DET 2.5 STY MASONRY,Single Family,252.0,N,96.74,0,0,7.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
3,C,SEMI/DET 2.5 STY MASONRY,Single Family,252.0,N,96.74,0,0,7.0,0.0,...,Germantown,40.030370,-75.165700,33786,0.16,0.75,0.04,0.02,0.03,0.84
4,,SEMI/DET 2 STY MASONRY,Single Family,242.0,,85.70,0,0,4.0,0.0,...,Germantown,40.033259,-75.177865,33786,0.16,0.75,0.04,0.02,0.03,0.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130845,,APTS 5-50 UNITS MASONRY,Multi Family,8.0,,161.50,0,0,4.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130846,0,RES CONDO 5+ STY MASONRY,Single Family,4.0,Y,0.00,568911,0,3.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130847,,RES CONDO 5+ STY MASONRY,Single Family,4.0,Y,0.00,388995,0,3.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27
130848,0,RES CONDO 5+ STY MAS+OTH,Single Family,4.0,Y,0.00,0,0,1.0,0.0,...,Center City,39.948498,-75.166830,90750,0.73,0.04,0.05,0.16,0.01,0.27


In [9]:
## Creating Heat Map

# Need to convert percentages
print(mergedDF.dtypes)
#print(mergedDF.sale_price)
#print(mergedDF.MEDINC)

basements                     object
building_code_description     object
category_code_description     object
census_tract                 float64
central_air                   object
depth                        float64
exempt_building                int64
exempt_land                    int64
exterior_condition           float64
fireplaces                   float64
frontage                     float64
fuel                          object
garage_spaces                float64
garage_type                   object
geographic_ward              float64
interior_condition           float64
location                      object
market_value                   int64
market_value_date            float64
number_of_bathrooms          float64
number_of_bedrooms           float64
number_of_rooms              float64
number_stories               float64
quality_grade                float64
sale_date                      int64
sale_price                     int64
street_designation            object
s

In [10]:
# Configure gmaps with API key
gmaps.configure(api_key = g_key)

# Store lat and long into locations
locations = mergedDF[["Lat", "Long"]]
#locations = mergedDF[["zip_code"]]
#pctWhite = mergedDF["P_WHITE"].astype(float)
byZIP = mergedDF["zip_code"].astype(float)
numberSales = mergedDF["sale_price"]
area = mergedDF["total_livable_area"].astype(float)

In [11]:
# Plot heatmap
fig = gmaps.figure()

# Create and add heat layer
heat_layer = gmaps.heatmap_layer(locations, weights = area, dissipating = False, max_intensity = 50, point_radius = 1.5)

fig.add_layer(heat_layer)

# Display
fig

Figure(layout=FigureLayout(height='420px'))

In [26]:
# With more parameters

heatmapDF = mergedDF[(mergedDF['sale_price'] >= 120000) & (mergedDF['sale_price'] <= 900000)]
heatmapDF = heatmapDF[heatmapDF['MEDINC'] >= 35000]
heatmapDF = heatmapDF[heatmapDF['P_BLACK'] >= .40]
heatmapDF.head()

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
30480,,ROW 3 STY MASONRY,Single Family,30.0,,44.0,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.93956,-75.18051,63524,0.45,0.43,0.05,0.04,0.04,0.55
30481,,ROW 3 STY MASONRY,Single Family,30.0,,44.0,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.94559,-75.18106,63524,0.45,0.43,0.05,0.04,0.04,0.55
30482,,ROW 3 STY MASONRY,Single Family,30.0,,44.0,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.937726,-75.167021,63524,0.45,0.43,0.05,0.04,0.04,0.55
30483,,ROW 3 STY MASONRY,Single Family,30.0,,44.0,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.933903,-75.180115,63524,0.45,0.43,0.05,0.04,0.04,0.55
30484,A,ROW 2 STY MASONRY,Single Family,13.0,Y,41.0,0,0,3.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.93956,-75.18051,63524,0.45,0.43,0.05,0.04,0.04,0.55


In [27]:
heatmapDF.dropna()
heatmapDF

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,NABE,Lat,Long,MEDINC,P_WHITE,P_BLACK,P_HISP,P_ASIAN,P_OTHERS,P_MinCOMBINED
30480,,ROW 3 STY MASONRY,Single Family,30.0,,44.00,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.939560,-75.180510,63524,0.45,0.43,0.05,0.04,0.04,0.55
30481,,ROW 3 STY MASONRY,Single Family,30.0,,44.00,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.945590,-75.181060,63524,0.45,0.43,0.05,0.04,0.04,0.55
30482,,ROW 3 STY MASONRY,Single Family,30.0,,44.00,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.937726,-75.167021,63524,0.45,0.43,0.05,0.04,0.04,0.55
30483,,ROW 3 STY MASONRY,Single Family,30.0,,44.00,305046,0,0.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.933903,-75.180115,63524,0.45,0.43,0.05,0.04,0.04,0.55
30484,A,ROW 2 STY MASONRY,Single Family,13.0,Y,41.00,0,0,3.0,0.0,...,"Southwest Center City, Point Breeze, Grays Ferry",39.939560,-75.180510,63524,0.45,0.43,0.05,0.04,0.04,0.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117693,,ROW B/GAR 2 STY MASONRY,Single Family,98.0,Y,108.79,0,0,4.0,0.0,...,Overbrook,39.977297,-75.273268,48369,0.06,0.87,0.04,0.01,0.02,0.94
117697,F,ROW B/GAR 2 STY MASONRY,Single Family,98.0,,92.50,45000,0,4.0,0.0,...,Overbrook,39.977297,-75.273268,48369,0.06,0.87,0.04,0.01,0.02,0.94
117699,,S/D CONV APT 3 STY MAS+OT,Multi Family,114.0,,140.36,0,0,4.0,0.0,...,Overbrook,39.977297,-75.273268,48369,0.06,0.87,0.04,0.01,0.02,0.94
117700,,DET W/D GAR 3 STY MASONRY,Single Family,375.0,,150.00,45000,0,4.0,0.0,...,Overbrook,39.977297,-75.273268,48369,0.06,0.87,0.04,0.01,0.02,0.94


In [28]:
# Adding markers to the map
info_box = """
<d1>
<dt>Neighborhood:</dt><dd>{NABE}</dd>
<dt>Zip code:</dt><dd>{zip_code}</dd>
<dt>Sale price:</dt><dd>{sale_price}</dd>
<dt>Median income:</dt><dd>{MEDINC}</dd>
"""

#Storing the df row
house_info = [info_box.format(**row) for index, row in heatmapDF.iterrows()]

locations = heatmapDF[["Lat", "Long"]]

In [29]:
# Add marker layer ontop of heat map
markers = gmaps.marker_layer(locations)
fig.add_layer(markers)

# Display figure
fig

Figure(layout=FigureLayout(height='420px'))