# Average Days on Market using Analytics
Apply analytics on real estate economic indicators to assess the market

<img src="https://bobashworth.com/wp-content/uploads/2016/01/Home-Sales-Tips-SWFL.jpg">

## Resources
- https://www.zillow.com/research/data/
- https://developers.google.com/maps/documentation/geocoding/start
- https://www.geeksforgeeks.org/get-post-requests-using-python/

## Widgets

In [5]:
# remove widgets
dbutils.widgets.removeAll()

## Imports

In [7]:
# general libraries
from datetime import datetime

# manipulating data
import pandas as pd
import numpy as np

# visualization
import plotly.express as px

# http contents
import requests 

## Functions

In [9]:
def get_location_info(location, google_api_key):
  # api-endpoint 
  URL = "https://maps.googleapis.com/maps/api/geocode/json"

  # defining a params dict for the parameters to be sent to the API 
  PARAMS = {'address':location, 'key':google_api_key} 

  # sending get request and saving the response as response object 
  r = requests.get(url = URL, params = PARAMS) 
  
  # get data from response
  data = r.json()
  
  # county
  county = data['results'][0]['address_components'][1]['long_name']
  
  # location
  location_dict = data['results'][0]['geometry']['location']
  lattitude = location_dict['lat']
  longitude = location_dict['lng']
  
  return county, lattitude, longitude

In [10]:
def save_pandas_df(df, file_name, file_path='dbfs:/FileStore/tables/'):
  # Create a Spark DataFrame from a pandas DataFrame using Arrow
  df_spark = spark.createDataFrame(df)
  df_spark.write.format("com.databricks.spark.csv").mode('overwrite').option("header", "true").save(file_path + file_name)
  print('Saved file!')

## Data

### Load

In [13]:
# read csv file
df_county = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load('/FileStore/tables/DaysOnZillow_County.csv').toPandas()
# show table
display(df_county)

SizeRank,RegionID,RegionName,RegionType,StateName,2010-01,2010-02,2010-03,2010-04,2010-05,2010-06,2010-07,2010-08,2010-09,2010-10,2010-11,2010-12,2011-01,2011-02,2011-03,2011-04,2011-05,2011-06,2011-07,2011-08,2011-09,2011-10,2011-11,2011-12,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,2014-12,2015-01,2015-02,2015-03,2015-04,2015-05,2015-06,2015-07,2015-08,2015-09,2015-10,2015-11,2015-12,2016-01,2016-02,2016-03,2016-04,2016-05,2016-06,2016-07,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
1,3101,Los Angeles County,County,CA,121.0,130.0,120.0,107.0,87.0,94.0,88.0,89.0,92.0,97.0,100.0,107.0,112.0,118.0,114.0,95.0,94.0,100.0,101.0,99.0,106.0,108.0,108.0,113.0,115.0,116.0,113.0,99.0,93.0,90.0,90.0,86.0,85.0,85.0,83.0,88.0,92.0,89.0,75.0,68.0,64.0,66.0,65.0,63.0,66.0,68.0,70.0,73.0,83.0,82.0,65.0,65.0,64.0,68.0,69.0,69.0,72.0,75.0,76.0,81.0,90.0,90.0,70.0,63.0,65.0,67.0,68.0,68.0,69.0,72.0,77.0,81.0,86.0,90.0,68.0,65.0,66.0,66.0,67.0,67.0,70.0,72.0,75.0,78.0,85.0,86.0,66.0,61.0,60.0,59.0,60.0,60.0,64.0,64.0,64.0,69.0,75.0,66.0,55.0,55.0,55.0,56.0,58.0,58.0,62.0,67.0,69.0,74.0,85.0,88.0,69.0,64.0,62.0,62.0,64.0,65.0,68.0,69.0,67.0,73.0
2,139,Cook County,County,IL,185.0,188.0,193.0,199.0,176.0,131.0,135.0,119.0,126.5,133.0,140.0,155.0,152.0,151.0,148.0,148.0,131.0,127.5,129.0,135.0,141.0,148.5,141.0,142.0,145.0,144.0,150.0,139.0,123.0,113.0,118.0,122.0,118.5,124.0,125.0,133.5,139.0,140.0,138.0,128.5,106.0,104.0,110.0,105.0,110.0,108.0,103.0,106.0,120.0,126.0,129.0,106.0,91.0,87.0,94.0,99.0,106.0,107.0,108.0,115.0,126.0,126.0,125.0,99.0,92.0,92.0,99.0,104.0,109.0,111.0,111.0,116.0,127.0,133.0,129.0,97.0,92.0,93.0,93.0,96.0,100.0,105.0,110.0,116.0,125.0,132.0,117.0,88.0,81.0,85.0,87.0,89.0,92.0,96.0,98.0,104.0,120.0,131.0,99.0,71.0,77.0,83.0,84.0,86.0,91.0,94.0,99.0,111.0,119.0,132.0,127.0,80.0,73.0,77.0,84.0,88.0,94.0,99.0,98.0,106.0
3,1090,Harris County,County,TX,126.0,130.0,126.0,110.0,92.0,98.0,98.0,98.0,106.0,115.0,123.0,128.0,134.0,127.0,125.0,117.0,110.0,104.0,107.0,111.0,116.0,119.0,124.0,123.0,121.0,118.0,111.0,96.0,90.0,90.0,93.0,101.0,101.0,101.0,98.0,101.0,102.0,102.0,86.0,75.0,71.0,70.0,69.0,67.0,70.0,75.0,76.0,78.0,84.0,81.0,64.0,64.0,62.0,62.0,59.0,58.0,63.0,67.0,69.0,70.0,76.0,69.0,59.0,61.0,60.0,61.0,62.0,62.0,64.0,69.5,76.0,80.0,88.0,89.0,72.0,67.0,65.0,69.0,69.0,69.0,72.0,81.0,82.0,83.0,88.0,90.0,72.0,69.0,65.0,66.0,67.0,69.0,79.0,89.0,91.0,81.0,82.0,89.0,68.0,59.0,60.0,63.0,67.0,67.0,72.0,80.0,86.0,88.0,91.0,90.0,81.0,72.0,69.0,68.0,67.0,70.0,80.0,88.0,90.0,92.0
4,2402,Maricopa County,County,AZ,110.0,114.0,116.0,115.0,108.0,108.0,94.0,96.0,100.0,98.0,95.0,105.0,113.0,112.0,117.0,103.0,103.0,100.0,92.0,92.0,89.0,88.0,89.0,89.0,96.0,102.0,93.0,84.0,87.0,85.0,83.0,79.0,81.0,81.0,82.0,88.0,94.0,92.0,85.0,78.0,71.0,69.0,69.0,67.0,72.0,73.0,75.0,77.0,90.0,94.0,81.0,80.0,83.0,87.0,88.0,89.0,89.0,88.0,81.0,82.0,92.0,86.0,71.0,71.0,68.0,71.0,65.0,65.0,68.0,67.0,69.0,73.0,81.0,82.0,69.0,68.0,70.0,70.0,71.0,72.0,72.0,69.0,68.0,71.0,79.0,78.0,65.0,64.0,64.0,64.0,63.0,62.0,63.0,62.0,61.0,65.0,74.0,69.0,58.0,55.0,56.0,56.0,57.0,59.0,60.0,58.0,58.0,63.0,72.0,71.0,61.0,60.0,57.0,58.0,58.0,56.0,53.0,52.0,52.0,55.0
5,2841,San Diego County,County,CA,105.0,112.0,97.0,108.0,91.0,93.0,76.0,82.0,95.0,97.0,103.0,108.0,111.0,113.0,115.0,99.0,97.0,102.0,105.0,101.0,99.0,105.0,101.0,117.0,125.0,125.0,111.0,98.0,89.0,90.0,90.0,90.0,92.0,88.0,89.0,91.0,88.0,85.0,67.0,62.0,62.0,59.0,59.0,58.0,60.0,63.0,67.0,74.0,82.0,77.0,60.0,62.0,62.0,64.0,63.0,66.0,71.0,76.0,75.0,80.0,87.0,86.0,67.0,69.0,66.0,59.0,59.0,64.0,68.0,72.0,74.0,77.0,83.0,81.0,62.0,60.0,60.0,59.0,62.0,62.0,65.0,69.0,68.0,71.0,80.0,74.0,55.0,52.0,51.0,51.0,53.0,53.0,55.0,57.0,58.0,60.0,64.0,54.0,48.0,48.0,49.0,51.0,54.0,52.0,57.0,69.0,68.0,75.0,83.0,83.0,63.0,58.0,56.0,56.0,57.0,60.0,61.0,63.0,63.0,66.0
6,1286,Orange County,County,CA,129.0,153.0,132.0,79.0,80.0,91.0,87.0,90.0,94.0,101.0,105.0,107.0,117.0,124.0,118.0,96.0,96.0,104.0,103.0,108.0,111.0,111.0,116.0,120.0,120.0,124.0,124.0,97.0,92.0,87.0,85.0,87.0,84.0,86.0,86.0,87.0,87.0,75.0,60.0,58.0,57.0,56.0,57.0,59.0,64.0,68.0,70.0,77.0,85.0,82.0,67.0,62.0,65.0,66.0,69.0,74.0,77.0,83.0,87.0,89.0,97.0,96.0,68.0,63.0,63.0,63.0,63.0,66.0,67.0,76.0,82.0,87.0,90.0,83.0,62.0,62.0,62.0,64.0,65.0,69.0,72.0,80.0,81.0,82.0,90.0,79.0,59.0,56.0,55.0,58.0,58.0,62.0,64.0,66.5,65.0,68.0,76.0,62.0,50.0,51.0,51.0,54.0,58.0,61.0,67.0,70.0,75.0,78.0,89.0,92.0,69.0,63.0,61.0,66.0,68.0,70.0,70.0,75.0,76.0,76.0
7,581,Kings County,County,NY,154.0,160.0,188.0,173.0,183.0,174.0,164.0,170.0,201.0,187.0,184.0,167.0,181.0,163.0,189.0,202.5,208.5,192.0,173.0,163.0,192.0,207.0,208.0,197.0,227.0,219.0,225.0,265.5,237.0,217.0,178.0,172.0,187.0,212.0,230.5,250.0,238.0,220.0,265.0,204.0,180.0,144.0,142.0,159.0,152.0,174.0,150.0,136.0,136.0,148.0,155.0,158.5,143.0,130.0,129.0,134.0,153.0,135.0,149.0,133.5,139.0,152.5,155.0,156.0,154.0,141.0,136.0,142.0,137.0,156.0,161.0,152.0,147.0,155.0,162.0,176.0,157.0,140.0,135.0,146.0,155.0,163.0,189.0,174.0,156.0,163.5,170.0,185.0,164.0,150.5,137.0,133.0,142.0,150.0,161.0,165.0,159.0,169.0,156.5,149.0,121.0,126.0,127.0,136.5,148.0,161.0,155.0,165.0,168.0,180.0,177.0,181.0,165.0,161.0,141.0,147.0,156.0,167.5,186.0,176.0
8,2964,Miami-Dade County,County,FL,202.0,178.0,195.0,188.0,167.0,119.0,104.0,93.0,103.0,103.0,97.0,110.0,115.0,119.0,120.0,128.0,136.0,137.0,148.0,133.0,124.5,118.0,116.0,110.0,120.0,124.0,122.0,121.0,111.0,117.0,119.0,114.0,107.0,83.0,88.0,90.0,109.0,120.0,125.0,112.0,115.0,116.0,122.0,111.0,111.0,110.5,109.0,115.0,124.0,118.0,117.0,114.0,112.0,117.0,115.0,113.0,114.0,111.0,112.0,114.0,117.0,117.0,121.0,111.0,79.0,81.0,93.0,106.0,115.0,120.0,110.0,112.0,129.0,125.0,115.0,103.0,104.0,113.0,112.0,116.0,113.0,119.0,113.0,113.0,121.0,124.0,119.0,116.0,110.0,115.0,117.0,111.0,111.0,124.0,123.5,121.0,121.0,125.0,114.0,107.0,110.0,106.0,110.0,107.0,108.0,106.0,106.0,111.0,117.0,116.0,118.0,109.0,114.0,110.0,108.0,106.0,112.5,110.0,106.5,107.0
9,978,Dallas County,County,TX,122.0,117.5,119.0,101.0,100.0,97.5,97.0,96.0,102.0,109.0,106.0,114.0,134.0,130.0,114.0,110.5,103.0,103.0,105.0,108.0,109.0,108.0,108.0,117.0,115.0,114.0,104.5,82.0,82.0,82.0,82.0,85.0,89.0,91.0,89.0,88.0,93.0,91.0,76.0,69.0,65.0,65.0,66.0,64.0,67.0,67.0,75.0,74.0,79.0,82.0,61.0,59.0,56.0,55.0,57.0,58.0,60.0,66.0,66.0,70.0,73.0,69.0,56.0,53.0,50.0,52.0,54.0,53.0,57.0,62.0,65.0,69.0,75.0,70.0,56.0,55.0,53.0,55.0,56.0,54.0,59.0,61.0,61.0,64.0,68.0,64.0,54.0,49.0,51.0,50.0,51.0,53.0,55.0,58.0,57.0,62.0,65.0,63.0,53.0,48.0,49.0,54.0,53.0,56.0,57.0,60.0,62.0,67.0,71.0,70.0,61.0,61.0,58.0,57.0,58.0,60.0,69.0,73.0,68.0,81.0
10,1347,Queens County,County,NY,,,,,,,90.5,101.0,127.0,139.0,153.0,129.0,163.0,174.0,170.0,172.0,181.0,153.0,155.0,155.5,170.0,159.0,175.5,189.0,183.0,197.0,198.0,190.0,193.0,169.5,164.0,171.0,189.0,198.0,204.0,191.0,215.0,218.0,213.0,210.0,215.0,207.0,148.0,165.0,168.0,172.5,160.0,165.0,164.0,178.0,181.0,175.0,169.5,153.0,146.0,150.0,153.0,168.0,165.0,170.0,174.0,169.5,173.5,183.0,181.0,150.0,148.0,155.0,153.0,150.5,162.5,158.0,170.0,169.0,176.0,170.0,159.0,149.0,147.0,142.0,150.0,155.0,163.0,163.0,169.0,153.0,172.0,171.0,170.0,147.0,151.0,148.0,142.0,149.0,153.0,156.5,169.0,163.0,157.0,162.0,133.0,127.0,137.0,137.0,145.0,157.0,162.0,158.0,166.0,163.0,170.0,173.0,170.0,152.0,159.0,157.0,163.0,169.0,171.0,170.0


In [14]:
# set widgets
dbutils.widgets.dropdown("01) State", "NJ", df_county.sort_values(by=['StateName'])['StateName'].unique().tolist())

In [15]:
# get widget
state = dbutils.widgets.get('01) State')

# filter on state
df = df_county.loc[df_county['StateName'] == state]

# show table
display(df.head())

SizeRank,RegionID,RegionName,RegionType,StateName,2010-01,2010-02,2010-03,2010-04,2010-05,2010-06,2010-07,2010-08,2010-09,2010-10,2010-11,2010-12,2011-01,2011-02,2011-03,2011-04,2011-05,2011-06,2011-07,2011-08,2011-09,2011-10,2011-11,2011-12,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,2014-12,2015-01,2015-02,2015-03,2015-04,2015-05,2015-06,2015-07,2015-08,2015-09,2015-10,2015-11,2015-12,2016-01,2016-02,2016-03,2016-04,2016-05,2016-06,2016-07,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
54,874,Bergen County,County,NJ,179.0,199.5,185.0,199.0,185.0,130.0,135.0,135.0,159.0,151.0,173.0,151.0,156.0,191.0,170.0,198.5,173.0,172.0,130.0,142.0,148.0,169.0,189.0,182.0,163.0,180.0,168.0,198.0,194.0,145.0,140.0,141.0,154.0,166.0,185.0,202.0,203.0,202.0,202.0,194.5,155.0,132.0,129.0,121.0,131.0,134.0,126.0,135.0,152.0,166.0,163.0,156.0,122.0,119.0,112.0,120.0,121.0,131.0,138.0,147.0,140.0,140.0,154.0,153.0,126.0,117.0,119.0,115.0,130.0,141.0,147.0,145.0,159.0,156.0,158.0,159.0,120.0,120.0,124.0,129.0,132.0,141.0,149.0,145.5,143.0,153.0,147.0,125.0,112.5,112.0,113.0,119.0,126.0,123.5,133.0,130.0,127.0,142.0,157.0,145.0,111.0,107.0,106.0,109.0,121.0,117.0,120.0,125.0,122.0,134.0,140.0,133.0,111.0,106.0,112.0,117.0,126.0,128.0,127.0,129.0
64,2802,Middlesex County,County,NJ,165.0,163.0,165.0,141.0,148.0,124.5,129.0,122.0,134.0,150.5,145.0,150.0,187.0,199.0,184.0,184.0,176.0,131.0,130.0,138.0,146.0,159.0,163.0,174.0,181.0,193.5,199.0,180.0,161.0,130.0,134.0,140.5,145.0,143.0,159.0,159.0,183.0,175.0,180.0,169.0,133.0,110.0,117.0,126.0,130.0,123.0,132.0,135.0,138.0,151.0,155.0,152.0,121.0,109.0,106.0,109.0,112.0,124.0,127.5,135.0,150.0,147.0,152.0,138.0,118.0,101.0,112.0,119.0,126.0,131.5,139.0,132.0,150.0,147.0,161.0,155.0,114.0,113.0,121.0,112.0,128.0,137.0,149.0,149.0,152.0,153.0,144.0,125.5,108.0,110.0,107.0,111.0,112.0,120.0,124.0,123.0,133.0,142.0,142.0,112.0,81.0,91.0,98.0,104.0,100.0,103.0,113.0,107.0,109.0,122.0,116.0,102.0,96.0,93.0,100.0,104.0,102.0,109.0,109.0,107.0
73,504,Essex County,County,NJ,173.0,221.0,202.0,180.0,160.0,122.0,131.0,125.0,139.0,144.0,146.0,156.0,169.0,153.0,165.0,155.0,149.0,126.0,136.0,137.0,155.0,173.0,179.0,176.0,152.0,163.0,174.0,167.0,152.0,141.0,134.0,132.0,146.0,164.0,164.0,167.0,182.0,184.0,174.0,166.0,111.0,105.0,98.0,107.0,109.0,121.5,111.0,91.0,127.0,146.5,170.0,160.0,117.0,93.0,94.0,98.0,106.0,122.0,116.5,102.0,126.0,135.5,135.0,128.0,112.0,93.0,97.0,103.0,110.0,133.0,139.0,125.0,160.0,164.0,162.0,149.0,132.0,111.0,103.0,109.0,115.0,126.0,115.0,125.0,134.0,141.5,143.0,124.5,109.5,106.0,105.0,110.0,118.0,114.5,119.0,114.0,128.5,133.0,140.0,116.0,99.0,94.0,93.0,98.0,107.0,119.0,115.0,104.0,119.0,129.5,136.0,113.5,97.5,100.0,99.0,103.0,114.5,120.0,107.0,109.0
94,1106,Hudson County,County,NJ,192.0,154.0,225.0,156.0,210.0,147.0,153.5,142.0,130.0,137.0,131.0,147.0,168.5,157.0,164.0,171.5,176.0,178.0,146.0,157.5,192.0,208.0,171.0,205.0,236.0,211.0,213.0,206.0,176.0,179.0,143.0,152.0,151.0,171.0,178.0,179.0,194.5,174.0,176.0,184.0,124.0,128.0,132.0,132.0,141.0,146.0,152.0,146.0,149.0,144.0,162.5,134.0,107.0,107.0,123.5,135.0,127.0,136.0,127.0,121.0,127.0,136.0,128.0,134.0,103.0,115.0,116.0,121.0,131.0,130.0,123.0,128.0,132.0,145.0,142.0,107.0,97.0,107.0,115.0,114.0,122.5,125.0,127.5,120.0,130.0,139.0,124.0,117.0,97.0,88.0,112.0,108.0,113.0,118.5,111.0,104.0,126.0,130.0,135.0,112.0,91.0,99.0,97.0,108.5,119.0,120.0,127.0,125.0,118.0,142.0,130.0,114.0,105.5,85.0,101.0,100.0,121.0,120.5,122.5,126.0
95,2441,Monmouth County,County,NJ,148.0,179.5,179.0,203.0,195.0,156.0,149.0,128.0,143.0,159.0,170.5,154.0,181.0,191.0,193.0,198.0,205.0,150.5,136.5,153.0,162.0,182.0,182.0,192.0,202.0,209.0,194.0,180.0,167.0,147.0,149.5,138.0,155.5,138.0,143.0,145.0,174.0,179.0,215.0,179.5,152.0,145.5,130.0,121.0,126.5,139.0,128.5,146.0,143.0,155.0,169.0,162.0,142.0,117.0,109.0,119.0,132.0,132.0,139.0,139.0,165.0,168.0,150.0,164.0,138.0,122.0,118.0,119.0,133.0,139.0,148.0,132.0,152.0,140.0,103.0,115.0,113.0,114.0,111.0,124.0,125.0,136.0,135.0,138.5,153.0,145.0,138.0,122.5,107.5,110.0,124.0,113.0,116.0,130.5,125.0,133.0,135.0,140.0,143.0,115.0,107.0,104.0,104.0,102.0,104.0,110.0,116.0,117.0,127.0,128.0,125.0,106.0,102.0,101.0,104.0,106.0,113.0,115.0,129.0,116.0


In [16]:
# read csv file
df_api_keys = spark.read.csv('/FileStore/tables/api_keys.csv', header="true", inferSchema="true").toPandas()
# show api ids
google_api_key = df_api_keys.loc[df_api_keys['Id'] == "Google"]['Key'].iloc[0]
plotly_api_key = df_api_keys.loc[df_api_keys['Id'] == "Plotly"]['Key'].iloc[0]
print('API keys loaded:', df_api_keys['Id'].tolist())

In [17]:
# df_city = spark.read.csv('/FileStore/tables/DaysOnZillow_City.csv', header="true", inferSchema="true").toPandas()
df_city = spark.read.csv('/FileStore/tables/DaysOnZillow_City_Enriched.csv', header="true", inferSchema="true").toPandas()
display(df_city.head())

SizeRank,RegionID,RegionName,RegionType,StateName,2010-01,2010-02,2010-03,2010-04,2010-05,2010-06,2010-07,2010-08,2010-09,2010-10,2010-11,2010-12,2011-01,2011-02,2011-03,2011-04,2011-05,2011-06,2011-07,2011-08,2011-09,2011-10,2011-11,2011-12,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,2014-12,2015-01,2015-02,2015-03,2015-04,2015-05,2015-06,2015-07,2015-08,2015-09,2015-10,2015-11,2015-12,2016-01,2016-02,2016-03,2016-04,2016-05,2016-06,2016-07,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,County,Lattitude,Longitude
1,6181,New York,City,NY,203.0,188.0,214.0,212.0,210.0,181.5,163.0,148.0,154.0,166.0,173.0,159.0,172.0,182.0,186.0,189.0,202.0,184.5,162.0,169.0,177.0,187.0,201.0,206.0,208.0,217.0,216.0,218.0,211.0,196.5,168.0,174.0,189.0,198.5,207.0,206.0,212.0,217.0,215.0,216.5,203.0,167.0,151.0,160.0,158.0,167.0,161.0,154.0,149.0,161.0,170.0,172.0,155.0,136.0,133.0,139.0,147.0,154.0,156.0,159.0,157.0,157.0,171.0,174.0,165.0,144.0,144.0,147.0,152.0,155.0,164.0,160.0,166.0,167.0,174.0,174.0,165.0,147.0,142.0,147.5,154.0,160.0,167.0,172.0,167.0,159.0,175.0,175.0,168.0,147.0,145.0,142.0,147.0,151.0,158.0,160.0,161.0,163.0,160.0,155.0,121.0,127.0,131.0,137.0,151.0,161.0,164.0,165.0,166.0,171.0,178.0,182.0,174.5,159.0,154.0,156.0,165.0,171.0,180.0,180.0,New York,40.7127753,-74.0059728
2,12447,Los Angeles,City,CA,119.0,136.0,128.0,111.0,85.0,94.0,86.0,87.0,90.0,96.0,95.0,110.0,112.0,119.0,118.0,97.0,95.0,102.0,102.0,99.0,104.0,109.0,110.0,111.0,113.0,113.5,119.0,97.0,95.0,87.0,90.0,85.0,84.0,85.0,82.0,88.0,90.0,91.0,76.0,70.0,65.0,67.0,64.0,64.0,65.0,69.0,71.0,74.0,86.0,88.0,66.0,67.0,64.0,66.0,70.0,69.0,71.0,76.0,78.0,81.0,90.0,91.0,69.0,59.0,62.0,65.0,68.0,67.0,69.0,72.0,76.0,79.0,91.0,94.0,67.0,64.0,66.0,65.0,67.0,66.0,70.0,71.0,77.0,78.0,87.0,92.0,67.0,62.0,61.0,60.0,62.0,61.0,66.0,64.0,64.0,67.0,79.0,70.0,55.0,57.0,55.0,56.0,60.0,59.0,62.0,67.0,68.0,75.0,85.0,91.0,71.0,64.0,61.0,62.0,64.0,67.0,69.0,71.0,69.0,75.0,Los Angeles County,34.0522342,-118.2436849
3,39051,Houston,City,TX,134.0,137.0,132.0,112.0,92.0,98.0,97.0,99.0,107.0,117.0,124.0,129.0,133.0,129.0,125.0,113.0,108.0,103.0,104.0,108.0,114.0,119.0,120.0,121.0,117.0,117.0,114.0,95.0,89.0,90.0,91.0,98.0,99.0,97.0,95.0,98.0,99.0,100.0,83.0,73.0,71.0,68.0,69.0,66.0,70.0,75.0,76.0,77.0,83.0,80.0,64.0,65.0,62.0,63.0,59.0,59.0,62.0,66.0,68.0,69.0,75.0,69.0,59.0,63.0,61.0,63.0,65.0,63.0,66.0,70.0,77.0,81.0,89.0,89.0,76.5,69.0,67.0,72.0,71.0,73.0,74.0,84.0,83.0,83.0,90.0,93.0,76.0,70.0,67.0,68.0,68.0,71.0,82.0,89.0,93.0,80.0,87.0,92.0,73.0,61.0,61.0,66.0,70.0,69.0,74.0,80.0,87.0,88.5,92.0,92.0,83.0,74.0,71.0,70.0,67.0,71.0,82.0,86.0,92.0,94.0,Harris County,29.7604267,-95.3698028
4,17426,Chicago,City,IL,186.0,184.0,191.0,190.0,176.0,127.0,128.0,118.5,124.0,129.0,137.0,150.5,145.0,143.0,151.0,146.0,128.0,125.0,127.0,136.0,146.0,145.0,144.0,143.0,137.0,135.0,149.0,141.0,118.0,114.0,121.0,124.0,119.0,119.0,123.0,137.0,137.0,140.0,134.0,117.0,101.0,100.0,108.0,103.0,107.0,105.0,104.0,100.0,118.0,126.0,121.0,99.0,90.0,85.0,93.0,101.0,106.0,107.0,105.0,113.0,123.0,126.0,123.5,93.0,88.0,89.0,98.0,99.0,106.0,107.0,101.0,113.0,125.0,125.0,127.0,86.0,88.0,89.0,92.0,91.0,98.0,100.0,106.0,110.0,122.0,133.0,115.0,83.0,79.0,82.0,84.0,86.0,91.0,94.5,97.0,107.0,118.0,135.0,94.0,66.0,74.0,85.0,86.0,90.0,95.0,98.0,101.0,118.0,125.0,135.0,133.0,75.0,70.0,77.0,85.0,90.0,98.0,105.0,100.0,111.0,Cook County,41.8781136,-87.6297982
5,6915,San Antonio,City,TX,101.0,110.0,120.0,110.0,104.0,105.0,99.0,99.5,104.0,110.0,118.0,125.0,122.5,138.0,136.0,122.5,112.0,108.0,107.0,106.0,104.5,117.0,117.0,119.0,125.0,126.0,122.0,109.0,95.0,92.0,94.0,92.0,93.0,99.0,99.0,112.5,107.0,115.0,105.0,89.0,79.0,78.0,75.0,72.0,75.0,81.0,92.0,83.0,90.0,85.0,74.0,65.0,66.0,64.0,69.0,65.0,68.0,74.0,77.5,76.0,85.0,87.0,66.0,61.0,68.0,68.0,65.0,68.0,74.0,83.0,87.0,82.0,88.0,91.0,80.0,66.0,65.0,67.0,63.0,63.0,66.0,74.0,75.0,78.0,78.0,76.0,67.0,62.0,61.0,57.0,60.0,62.0,64.0,65.0,68.0,78.0,80.0,78.0,63.0,60.0,58.0,57.0,59.0,59.0,62.0,67.0,75.0,72.0,80.0,82.0,68.0,62.0,61.0,61.0,60.0,62.0,70.0,76.0,79.0,84.0,Bexar County,29.4241219,-98.4936282


### Exploration

In [19]:
# get id columns
id_columns = list(df.columns)[:5]
print('ID columns:', id_columns)

# get date range columns
month_columns = list(df.columns)[5:]

# get year from str
year_list = list(set([m.split('-')[0] for m in month_columns]))
year_list.sort()
year_list = year_list[-4:] # trends over last 3 years
trend_year_list = []

for i in range(len(year_list)-1):
    trend_year_list.append(year_list[i] + "-" + year_list[i+1])

# date range
print('Date range from: {0} to {1}'.format(month_columns[1], month_columns[-1]))

In [20]:
# create widgets
dbutils.widgets.dropdown("02) Start Date", month_columns[0], month_columns)
dbutils.widgets.dropdown("03) End Date", month_columns[-1],  month_columns)
dbutils.widgets.dropdown("04) Trend Years", trend_year_list[-1],  trend_year_list)

In [21]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# modify format of data to plot
df_melt = pd.melt(df, 
                 id_vars=id_columns, 
                 value_vars=month_columns, 
                 var_name='DateRange',
                 value_name='DaysOnMarket')

# filter by date range
df_melt = df_melt.loc[(df_melt['DateRange'] >= start_date) & (df_melt['DateRange'] <= end_date)]

# get year
df_melt['Year'] = df_melt.apply(lambda x: x['DateRange'].split('-')[0], axis=1)
# get month
df_melt['Month'] = df_melt.apply(lambda x: x['DateRange'].split('-')[1], axis=1)
# show table
display(df_melt.head())

SizeRank,RegionID,RegionName,RegionType,StateName,DateRange,DaysOnMarket,Year,Month
54,874,Bergen County,County,NJ,2010-01,179.0,2010,1
64,2802,Middlesex County,County,NJ,2010-01,165.0,2010,1
73,504,Essex County,County,NJ,2010-01,173.0,2010,1
94,1106,Hudson County,County,NJ,2010-01,192.0,2010,1
95,2441,Monmouth County,County,NJ,2010-01,148.0,2010,1


In [22]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# get all months to list
month_num = df_melt['Month'].unique().tolist()
print('Month by digit:', month_num[:5])
# modify month name
month_name = [datetime.strftime(datetime.strptime(x, '%m'), '%b') for x in month_num]
print('Month by name:', month_name[:5])

### Days on Market by Month

In [24]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# get median days on market 
df_median_days_on_market = df_melt.groupby(['Month'])['DaysOnMarket'].median().reset_index()\
.rename(columns={'DaysOnMarket': 'DaysOnMarket (Median)'})
display(df_median_days_on_market)

Month,DaysOnMarket (Median)
1,155.0
2,157.0
3,163.0
4,156.0
5,131.0
6,119.0
7,120.0
8,122.0
9,128.75
10,136.0


In [25]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# widget variables to string
start_date_str = datetime.strftime(datetime.strptime(start_date, '%Y-%m'), '%b %Y')
end_date_str = datetime.strftime(datetime.strptime(end_date, '%Y-%m'), '%b %Y')

# plot box & whiskers
fig = px.box(df_melt, 
             x="Month", 
             y="DaysOnMarket", 
             title='{0} Days on Market By Month: {1} - {2}'.format(state, start_date_str, end_date_str))
fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = month_num, # define tick values
        ticktext = month_name # display month by name
    )
)
# show plot
fig.show()

### Days on Market by County

In [27]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# sort counties by size
df_by_size = df_melt[['SizeRank', 'RegionName']].drop_duplicates().sort_values(by=['SizeRank'])
display(df_by_size)

SizeRank,RegionName
54,Bergen County
64,Middlesex County
73,Essex County
94,Hudson County
95,Monmouth County
108,Ocean County
117,Union County
126,Camden County
128,Passaic County
134,Morris County


In [28]:
# get widget variables
state = dbutils.widgets.get('01) State')
start_date = dbutils.widgets.get('02) Start Date')
end_date = dbutils.widgets.get('03) End Date')

# select 5 largest counties
df_size_regions = df_by_size.head()
size_regions = df_size_regions['RegionName'].tolist()
df_top_5_regions = df_melt.loc[df_melt['RegionName'].isin(size_regions)]

# plot
fig = px.line(df_top_5_regions, 
              x="DateRange", 
              y="DaysOnMarket", 
              color="RegionName",
              hover_name="RegionName", 
              title='Top 5 Size: Days on Market by County',
              template="plotly_white")
fig.show()

# End of Part 1 Technical Deep Dive
----------------
# Part 2 Technical Deep Dive Below

### Yearly Change

In [31]:
# get widget variables
state = dbutils.widgets.get('01) State')
trend_years = dbutils.widgets.get('04) Trend Years')

# trend years
start_year = trend_years.split('-')[0]
start_trend = start_year + "-" + "01"
end_year = trend_years.split('-')[1]
end_trend = end_year + "-" + "12"

# modify format of data to plot
df_city_melt = pd.melt(df_city, 
                 id_vars=id_columns + list(df_city.columns)[-3:], 
                 value_vars=month_columns, 
                 var_name='DateRange',
                 value_name='DaysOnMarket')

# filter by date range
df_city_melt = df_city_melt.loc[(df_city_melt['StateName'] == state) & 
                                (df_city_melt['DateRange'] >= start_trend) &
                                (df_city_melt['DateRange'] <= end_trend)]
# get year
df_city_melt['Year'] = df_city_melt.apply(lambda x: x['DateRange'].split('-')[0], axis=1)
# get month
df_city_melt['Month'] = df_city_melt.apply(lambda x: x['DateRange'].split('-')[1], axis=1)

# prior days on market value
df_city_melt = df_city_melt.sort_values(by=['RegionID', 'Month', 'Year']) # sort values
df_city_melt['PriorYearDaysOnMarket'] = df_city_melt['DaysOnMarket'].shift(1) # shift values
df_city_melt['PriorYearDaysOnMarket'] = df_city_melt.apply(lambda x: np.nan if x['Year'] == start_year else x['PriorYearDaysOnMarket'], axis=1)

df_city_melt['PrctChangeDaysOnMarket'] = (df_city_melt['DaysOnMarket'] - df_city_melt['PriorYearDaysOnMarket']) / df_city_melt['PriorYearDaysOnMarket']
df_city_melt['PrctChangeDaysOnMarket'] = df_city_melt.apply(lambda x: np.nan if x['Year'] == start_year else x['PrctChangeDaysOnMarket'], axis=1)

display(df_city_melt)

SizeRank,RegionID,RegionName,RegionType,StateName,County,Lattitude,Longitude,DateRange,DaysOnMarket,Year,Month,PriorYearDaysOnMarket,PrctChangeDaysOnMarket
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2018-01,,2018,1,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2019-01,,2019,1,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2018-02,,2018,2,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2019-02,,2019,2,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2018-03,,2018,3,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2019-03,,2019,3,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2018-04,,2018,4,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2019-04,91.0,2019,4,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2018-05,,2018,5,,
2571,3455,Asbury Park,City,NJ,Ocean Township,40.2203907,-74.0120817,2019-05,113.0,2019,5,,


##### Top 10

In [33]:
# get widget variables
state = dbutils.widgets.get('01) State')
trend_years = dbutils.widgets.get('04) Trend Years')

# remove nulls
region_null_list = df_city_melt.loc[df_city_melt['DaysOnMarket'].isnull()]['RegionName'].unique().tolist()

# group prct change by county
df_prct_chg = df_city_melt.loc[~df_city_melt['RegionName'].isin(region_null_list)].groupby(['RegionName', 'County', 'Lattitude', 'Longitude'])['PrctChangeDaysOnMarket'].mean().reset_index()
df_prct_chg_neg = df_prct_chg.loc[df_prct_chg['PrctChangeDaysOnMarket'] < 0]

# get percentile for prct change
lower_bound = df_prct_chg_neg['PrctChangeDaysOnMarket'].quantile(0.33)
higher_bound = df_prct_chg_neg['PrctChangeDaysOnMarket'].quantile(0.66)
# set values by percentile
df_prct_chg_neg['Value'] = df_prct_chg_neg.apply(lambda x: "High" if x['PrctChangeDaysOnMarket'] <= lower_bound else np.nan, axis=1)
df_prct_chg_neg['Value'] = df_prct_chg_neg.apply(lambda x: "Medium" if (x['PrctChangeDaysOnMarket'] > lower_bound) & (x['PrctChangeDaysOnMarket'] <= higher_bound) else x['Value'], axis=1)
df_prct_chg_neg['Value'] = df_prct_chg_neg.apply(lambda x: "Low" if x['PrctChangeDaysOnMarket'] > higher_bound else x['Value'], axis=1)
# format
df_prct_chg_neg['PrctChangeDaysOnMarket'] = df_prct_chg_neg.apply(lambda x: round(x['PrctChangeDaysOnMarket'], 4), axis=1)
# rank
df_prct_chg_neg['Rank'] = df_prct_chg_neg['PrctChangeDaysOnMarket'].rank()
df_prct_chg_neg = df_prct_chg_neg.sort_values('PrctChangeDaysOnMarket')

display(df_prct_chg_neg[['Rank', 'RegionName', 'County', 'PrctChangeDaysOnMarket', 'Value']].head(10))

Rank,RegionName,County,PrctChangeDaysOnMarket,Value
1.0,North Brunswick Township,Middlesex County,-0.1266,High
2.0,Willingboro Township,Burlington County,-0.1227,High
3.0,Newark,Essex County,-0.1072,High
4.0,Woodbury,Gloucester County,-0.0961,High
5.0,Merchantville,Camden County,-0.0942,High
6.0,Clementon,Camden County,-0.0899,High
7.0,South Plainfield,Middlesex County,-0.0829,High
8.0,Wayne,Passaic County,-0.074,High
9.0,Howell Township,Monmouth County,-0.0715,High
10.0,Burlington Township,Burlington County,-0.0681,High


In [34]:
# get widget variables
state = dbutils.widgets.get('01) State')
trend_years = dbutils.widgets.get('04) Trend Years')

px.set_mapbox_access_token(plotly_api_key)
fig = px.scatter_mapbox(df_prct_chg_neg, 
                        lat="Lattitude",
                        lon="Longitude", 
                        color="Value", 
                        hover_name="RegionName",
                        size_max=15, zoom=6)
fig.show()

##### Deep Dive

In [36]:
# get widget variables
state = dbutils.widgets.get('01) State')
trend_years = dbutils.widgets.get('04) Trend Years')

# get regions to a list
region_list = df_prct_chg_neg.sort_values(by=['RegionName'])['RegionName'].unique().tolist()
print('Count of cities:', len(region_list))

try:
  # remove widget (brand new list)
  dbutils.widgets.remove("05) City")
except:
  pass

# create widget
dbutils.widgets.dropdown("05) City", df_prct_chg_neg.head(1)['RegionName'].iloc[0], region_list)

In [37]:
# get widget variables
state = dbutils.widgets.get('01) State')
trend_years = dbutils.widgets.get('04) Trend Years')
selected_city = dbutils.widgets.get('05) City')

# max days on market value
max_days_on_mrkt = df_city_melt.loc[df_city_melt['RegionName'].isin(region_list)]['DaysOnMarket'].max()

df_graph = df_city_melt.loc[(df_city_melt['RegionName'] == selected_city)][['RegionName', 'StateName',
       'County', 'Month', 'Year', 'DaysOnMarket']]
fig = px.bar(df_graph, 
             x='Month', 
             y='DaysOnMarket', 
             color='Year', 
             barmode='group',
             title='Days on Market Trend from {0} to {1}: {2}'.format(start_year, end_year, selected_city))
fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = month_num, # define tick values
        ticktext = month_name # display month by name
    ),
    yaxis = dict(
        range=[0, max_days_on_mrkt]
    )
)
fig.show()

# End Notebook