## Heatmap
* Count of sales by zip
* Average sales by zip
* Most/Least Expensive sales by zip last 5 years

* In a future version, it would be interesting to layer in demographic information
* Would like to put zip codes in a tool tip

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import folium
import json

### Bringing in and cleaning data

In [42]:
# Bring in housing dataframe
housingFile = 'source_data/housing_data_cleaned.csv'

# Read in file
housingDF = pd.read_csv(housingFile, low_memory=False)
housingDF.head()

Unnamed: 0,basements,building_code_description,category_code_description,census_tract,central_air,depth,exempt_building,exempt_land,exterior_condition,fireplaces,...,topography,total_area,total_livable_area,type_heater,unit,view_type,year_built,year_built_estimate,zip_code,zoning
0,D,ROW 3 STY MASONRY,Single Family,241.0,N,67.0,49200,0,4.0,0.0,...,F,938.0,1344.0,A,,I,1895,Y,19144,RSA5
1,,ROW 2 STY MASONRY,Single Family,201.0,,70.0,0,0,4.0,0.0,...,F,1044.0,1190.0,,,I,1940,Y,19140,RM1
2,H,ROW B/GAR 2 STY MASONRY,Single Family,281.0,N,95.5,0,0,4.0,0.0,...,F,1686.53,1633.0,B,,I,1940,Y,19141,RSA3
3,,ROW 2 STY MASONRY,Single Family,293.0,,112.5,0,0,4.0,0.0,...,F,2165.62,1320.0,B,,I,1940,Y,19124,RSA5
4,,ROW 2 STY MASONRY,Single Family,62.0,,79.0,0,0,4.0,0.0,...,F,1264.0,960.0,,,I,1920,,19142,RM1


In [43]:
# Formatting Date
housingDF['sale_date'].head()

0    2020-09-28
1    2020-09-25
2    2020-09-25
3    2020-09-24
4    2020-09-24
Name: sale_date, dtype: object

### Count of sales by zip
* This explores the total number of home sales by zip code
* Across all dates in the file
* This will show overtime, where most homes are sold

In [44]:
# Aggregate total sales by zip code

# Create df to use for grouping
salesCount = housingDF[['zip_code','sale_price']]

salesCount = salesCount.dropna(how='any')

# Convert to string in main ETL
salesCount['zip_code'] = salesCount['zip_code'].astype('Int64').astype('str')


# Create groupby object
salesCount_groupby = salesCount.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
salesCount_groupbyDF = salesCount_groupby.count()
salesCount_groupbyDF = salesCount_groupbyDF.reset_index()
salesCount_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
salesCount_groupbyDF = salesCount_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
salesCount_groupbyDF = salesCount_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

#salesCount_groupbyDF['zip_code'].value_counts()

salesCount_groupbyDF.head()

Unnamed: 0,zip_code,sale_price
0,19102,16
1,19103,223
2,19104,640
3,19106,47
4,19107,67


In [1]:
# locate file
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)

# create base map
salesCount_map = folium.Map(location=[39.9526, -75.1652], zoom_start = 11)

# generate map
choropleth = folium.Choropleth(
    geo_data = data,
    data = salesCount_groupbyDF,
    columns = ['zip_code','sale_price'],
    legend_name='Number of Home Sales by Zip Code',
    key_on = 'feature.properties.CODE',
    fill_color = 'YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    )

folium.LayerControl().add_to(salesCount_map)

# Display map
salesCount_map

NameError: name 'json' is not defined

### Average of sales by zip
* This explores the average sale price by zip code
* Across all dates in the file
* This will show overtime the average price of a residence sold

In [14]:
# Aggregate total sales by zip code

# Create df to use for grouping
salesAvg = housingDF[['zip_code','sale_price']]

salesAvg = salesAvg.dropna(how='any')

# Convert to string in main ETL
salesAvg['zip_code'] = salesAvg['zip_code'].astype('Int64').astype('str')


# Create groupby object
salesAvg_groupby = salesAvg.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
salesAvg_groupbyDF = salesAvg_groupby.mean()
salesAvg_groupbyDF = salesAvg_groupbyDF.reset_index()
salesAvg_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
salesAvg_groupbyDF = salesAvg_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
salesAvg_groupbyDF = salesAvg_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

salesAvg_groupbyDF.head()

Unnamed: 0,zip_code,sale_price
0,19102,910281.2
1,19103,1340693.0
2,19104,270943.1
3,19106,1040698.0
4,19107,979734.3


In [12]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=salesAvg_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Average Home Sales by Zip Code'
)

folium.LayerControl().add_to(m)

m

JSONDecodeError: Expecting value: line 7 column 1 (char 6)

### Most/Least Expensive sales by zip last 5 years
* Exploring the most and least expensive sales by zip code over 5 years
* 2016, 2017, 2018, 2019 and 2020
* This can show a trend in housing sales over time

------
* 2016

In [30]:
## MAX VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2016 = saleDate[(saleDate['sale_date'] >= '2015-12-31') & (saleDate['sale_date'] <= '2016-12-31')]

# Create groupby object
saleDate2016_groupby = saleDate2016.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2016MAX_groupbyDF = saleDate2016_groupby.max()
saleDate2016MAX_groupbyDF = saleDate2016MAX_groupbyDF.reset_index()
saleDate2016MAX_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2016MAX_groupbyDF = saleDate2016MAX_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2016MAX_groupbyDF = saleDate2016MAX_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2016MAX_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2016-06-24,1300000
1,19103,2016-12-09,4500000
2,19104,2016-12-22,1030000
3,19106,2016-08-16,725000
4,19107,2016-12-20,707500


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2016MAX_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Most Expensive Home Sold by Zip, 2016'
)
folium.LayerControl().add_to(m)

m

In [31]:
## MIN VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2016 = saleDate[(saleDate['sale_date'] >= '2015-12-31') & (saleDate['sale_date'] <= '2016-12-31')]

# Create groupby object
saleDate2016_groupby = saleDate2016.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2016MIN_groupbyDF = saleDate2016_groupby.min()
saleDate2016MIN_groupbyDF = saleDate2016MIN_groupbyDF.reset_index()
saleDate2016MIN_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2016MIN_groupbyDF = saleDate2016MIN_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2016MIN_groupbyDF = saleDate2016MIN_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2016MIN_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2016-03-31,302500
1,19103,2016-01-18,164900
2,19104,2016-01-05,1107
3,19106,2016-02-26,365000
4,19107,2016-01-18,200000


In [28]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2016MIN_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Least Expensive Home Sold by Zip, 2016'
)
folium.LayerControl().add_to(m)

m

JSONDecodeError: Expecting value: line 7 column 1 (char 6)

### Most/Least Expensive sales by zip last 5 years
* 2017

In [32]:
## MAX VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2017 = saleDate[(saleDate['sale_date'] > '2016-12-31') & (saleDate['sale_date'] <= '2017-12-31')]

# Create groupby object
saleDate2017_groupby = saleDate2017.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2017MAX_groupbyDF = saleDate2017_groupby.max()
saleDate2017MAX_groupbyDF = saleDate2017MAX_groupbyDF.reset_index()
saleDate2017MAX_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2017MAX_groupbyDF = saleDate2017MAX_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2017MAX_groupbyDF = saleDate2017MAX_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2017MAX_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2017-09-20,1305000
1,19103,2017-12-14,5290300
2,19104,2017-12-28,1625000
3,19106,2017-12-27,2168000
4,19107,2017-11-15,825000


In [29]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2017MAX_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Most Expensive Homes Sold by Zip, 2017'
)
folium.LayerControl().add_to(m)

m

JSONDecodeError: Expecting value: line 7 column 1 (char 6)

In [33]:
## MIN VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2017 = saleDate[(saleDate['sale_date'] > '2016-12-31') & (saleDate['sale_date'] <= '2017-12-31')]

# Create groupby object
saleDate2017_groupby = saleDate2017.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2017MIN_groupbyDF = saleDate2017_groupby.min()
saleDate2017MIN_groupbyDF = saleDate2017MIN_groupbyDF.reset_index()
saleDate2017MIN_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2017MIN_groupbyDF = saleDate2017MIN_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2017MIN_groupbyDF = saleDate2017MIN_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2017MIN_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2017-01-09,420000
1,19103,2017-01-31,265000
2,19104,2017-01-01,5000
3,19106,2017-04-21,195000
4,19107,2017-02-15,260000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2017MIN_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Least Expensive Homes Sold by Zip, 2017'
)
folium.LayerControl().add_to(m)

m

### Most/Least Expensive sales by zip last 5 years
* 2018

In [34]:
## MAX VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2018 = saleDate[(saleDate['sale_date'] > '2017-12-31') & (saleDate['sale_date'] <= '2018-12-31')]

# Create groupby object
saleDate2018_groupby = saleDate2018.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2018MAX_groupbyDF = saleDate2018_groupby.max()
saleDate2018MAX_groupbyDF = saleDate2018MAX_groupbyDF.reset_index()
saleDate2018MAX_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2018MAX_groupbyDF = saleDate2018MAX_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2018MAX_groupbyDF = saleDate2018MAX_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2018MAX_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2018-12-04,2035000
1,19103,2018-12-17,7000000
2,19104,2018-12-28,3300000
3,19106,2018-12-27,2000000
4,19107,2018-12-19,6500000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2018MAX_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Most Expensive Homes Sold by Zip, 2018'
)
folium.LayerControl().add_to(m)

m

In [35]:
## MIN VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2018 = saleDate[(saleDate['sale_date'] > '2017-12-31') & (saleDate['sale_date'] <= '2018-12-31')]

# Create groupby object
saleDate2018_groupby = saleDate2018.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2018MIN_groupbyDF = saleDate2018_groupby.min()
saleDate2018MIN_groupbyDF = saleDate2018MIN_groupbyDF.reset_index()
saleDate2018MIN_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2018MIN_groupbyDF = saleDate2018MIN_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2018MIN_groupbyDF = saleDate2018MIN_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2018MIN_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2018-04-26,295000
1,19103,2018-01-03,105800
2,19104,2018-01-05,1000
3,19106,2018-01-05,404500
4,19107,2018-01-19,238500


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2018MIN_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Least Expensive Homes Sold by Zip, 2018'
)
folium.LayerControl().add_to(m)

m

### Most/Least Expensive sales by zip last 5 years
* 2019

In [36]:
## MAX VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2019 = saleDate[(saleDate['sale_date'] > '2018-12-31') & (saleDate['sale_date'] <= '2019-12-31')]

# Create groupby object
saleDate2019_groupby = saleDate2019.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2019MAX_groupbyDF = saleDate2019_groupby.max()
saleDate2019MAX_groupbyDF = saleDate2019MAX_groupbyDF.reset_index()
saleDate2019MAX_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2019MAX_groupbyDF = saleDate2019MAX_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2019MAX_groupbyDF = saleDate2019MAX_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2019MAX_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2019-05-22,825000
1,19103,2019-12-30,2700000
2,19104,2019-12-20,5520000
3,19106,2019-12-12,2200000
4,19107,2019-10-25,405000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2019MAX_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Most Expensive Homes Sold by Zip, 2019'
)
folium.LayerControl().add_to(m)

m

In [37]:
## MIN VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2019 = saleDate[(saleDate['sale_date'] > '2018-12-31') & (saleDate['sale_date'] <= '2019-12-31')]

# Create groupby object
saleDate2019_groupby = saleDate2019.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2019MIN_groupbyDF = saleDate2019_groupby.min()
saleDate2019MIN_groupbyDF = saleDate2019MIN_groupbyDF.reset_index()
saleDate2019MIN_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2019MIN_groupbyDF = saleDate2019MIN_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2019MIN_groupbyDF = saleDate2019MIN_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2019MIN_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2019-05-22,825000
1,19103,2019-01-03,310000
2,19104,2019-01-08,2500
3,19106,2019-01-24,410000
4,19107,2019-03-26,134000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2019MIN_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Least Expensive Homes Sold by Zip, 2019'
)
folium.LayerControl().add_to(m)

m

### Most/Least Expensive sales by zip last 5 years
* 2020

In [38]:
## MAX VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2020 = saleDate[(saleDate['sale_date'] > '2019-12-31') & (saleDate['sale_date'] <= '2020-12-31')]

# Create groupby object
saleDate2020_groupby = saleDate2020.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2020MAX_groupbyDF = saleDate2020_groupby.max()
saleDate2020MAX_groupbyDF = saleDate2020MAX_groupbyDF.reset_index()
saleDate2020MAX_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2020MAX_groupbyDF = saleDate2020MAX_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2020MAX_groupbyDF = saleDate2020MAX_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2020MAX_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2020-01-07,1595000
1,19103,2020-08-24,2425000
2,19104,2020-09-22,1350000
3,19106,2020-02-21,2100000
4,19107,2020-06-25,1330000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2020MAX_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Most Expensive Homes Sold by Zip, 2020'
)
folium.LayerControl().add_to(m)

m

In [39]:
## MIN VALUES
# Aggregate total sales by date

# Create df to use for grouping
saleDate = housingDF[['zip_code', 'sale_date','sale_price']]

saleDate = saleDate.dropna(how='any')

# Convert to string in main ETL
saleDate['zip_code'] = saleDate['zip_code'].astype('Int64').astype('str')

# Filter dataframe (2016 - 2020)
saleDate2020 = saleDate[(saleDate['sale_date'] > '2019-12-31') & (saleDate['sale_date'] <= '2020-12-31')]

# Create groupby object
saleDate2020_groupby = saleDate2020.groupby('zip_code')

# Create grouped data frame with count of sales, mean, median sale price
saleDate2020MIN_groupbyDF = saleDate2020_groupby.min()
saleDate2020MIN_groupbyDF = saleDate2020MIN_groupbyDF.reset_index()
saleDate2020MIN_groupbyDF

# Add zip codes 19109 and 19102, so that GeoJSON will work - not in original df but in GeoJSON file
saleDate2020MIN_groupbyDF = saleDate2020MIN_groupbyDF.append({'zip_code':'19109','sale_price':0},ignore_index=True)
saleDate2020MIN_groupbyDF = saleDate2020MIN_groupbyDF.append({'zip_code':'19112','sale_price':0},ignore_index=True)

saleDate2020MIN_groupbyDF.head()

Unnamed: 0,zip_code,sale_date,sale_price
0,19102,2020-01-07,1595000
1,19103,2020-01-06,505000
2,19104,2020-01-04,1500
3,19106,2020-01-17,600000
4,19107,2020-03-07,250000


In [None]:
# Load in GeoJSON file for map
with open('source_data/Zipcodes_Poly.geojson','r') as jsonFile:
    data = json.load(jsonFile)
    
# Create folium map
m = folium.Map(location=[39.9526, -75.1652],zoom_start=11)
m.choropleth(
    geo_data=data,
    fill_opacity=0.7,
    line_opacity=0.2,
    data=saleDate2020MIN_groupbyDF,
    key_on='feature.properties.CODE',
    columns=['zip_code','sale_price'],
    fill_color='YlGnBu',
    legend_name='Least Expensive Homes Sold by Zip, 2020'
)
folium.LayerControl().add_to(m)

m

## FInal observations
* xx