In [1]:
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.set_option('display.max_columns', 500)

In [2]:
realtor_data_raw = pd.read_csv("https://econdata.s3-us-west-2.amazonaws.com/Reports/Core/RDC_Inventory_Core_Metrics_State_History.csv")

# for some reason the final row was corrupt with a note
realtor_data = realtor_data_raw.drop(realtor_data_raw.tail(1).index)

df = (realtor_data
    .drop(labels = ['state'], axis = 1)
    .assign(month_date_yyyymm = (realtor_data['month_date_yyyymm'] + "01").astype('datetime64'))
    .rename(columns = {'month_date_yyyymm':'month','state_id':'state'})
    .astype({'state':'category'})
    .sort_values(by = ['state','month'], ascending = True)
    )

In [3]:
df.columns

Index(['month', 'state', 'median_listing_price', 'median_listing_price_mm',
       'median_listing_price_yy', 'active_listing_count',
       'active_listing_count_mm', 'active_listing_count_yy',
       'median_days_on_market', 'median_days_on_market_mm',
       'median_days_on_market_yy', 'new_listing_count', 'new_listing_count_mm',
       'new_listing_count_yy', 'price_increased_count',
       'price_increased_count_mm', 'price_increased_count_yy',
       'price_reduced_count', 'price_reduced_count_mm',
       'price_reduced_count_yy', 'pending_listing_count',
       'pending_listing_count_mm', 'pending_listing_count_yy',
       'median_listing_price_per_square_foot',
       'median_listing_price_per_square_foot_mm',
       'median_listing_price_per_square_foot_yy', 'median_square_feet',
       'median_square_feet_mm', 'median_square_feet_yy',
       'average_listing_price', 'average_listing_price_mm',
       'average_listing_price_yy', 'total_listing_count',
       'total_listing_cou

In [4]:
cols = ['month', 'state',
       'median_listing_price',
       #'median_listing_price_mm', 'median_listing_price_yy',
       'active_listing_count', #'active_listing_count_mm',
       #'active_listing_count_yy', 
       'median_days_on_market',
       #'median_days_on_market_mm', 'median_days_on_market_yy',
       'new_listing_count', 
       #'new_listing_count_mm', 'new_listing_count_yy',
       'price_increased_count', 
       #'price_increased_count_mm','price_increased_count_yy', 
       'price_reduced_count',
       #'price_reduced_count_mm', 'price_reduced_count_yy',
       'pending_listing_count', 
       #'pending_listing_count_mm', 'pending_listing_count_yy',
       #'median_listing_price_per_square_foot',
       #'median_listing_price_per_square_foot_mm','median_listing_price_per_square_foot_yy', 
       'median_square_feet',
       #'median_square_feet_mm', 'median_square_feet_yy',
       'average_listing_price', 
       #'average_listing_price_mm', 'average_listing_price_yy', 
       # 'total_listing_count',
       #'total_listing_count_mm', 'total_listing_count_yy', 'pending_ratio',
       #'pending_ratio_mm', 'pending_ratio_yy', 'quality_flag'
       ]

df = df[cols]

In [5]:
df = df.rename(columns = {'median_listing_price':'med_lp',
                          'active_listing_count':'active_listings',
                          'median_days_on_market':'med_dom',
                          'new_listing_count':'new_listings',
                          'price_increased_count':'price_increases',
                          'price_reduced_count':'price_reductions',
                          'pending_listing_count':'pending_listings',
                          'median_square_feet':'med_sf',
                          'average_listing_price':'avg_lp'})



In [6]:
# Month number and year columns, useful for time series plots
df = df.assign(
        month_num = lambda x: x.month.dt.month,
        year = lambda x: x.month.dt.year)

# percent change month over month
df['pct_ch_med_list_1m'] = df.groupby('state')['med_lp'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_active_list_1m'] = df.groupby('state')['active_listings'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_med_dom_1m'] = df.groupby('state')['med_dom'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_new_list_1m'] = df.groupby('state')['new_listings'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_price_inc_1m'] = df.groupby('state')['price_increases'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_price_red_1m'] = df.groupby('state')['price_reductions'].transform(lambda x: x.pct_change(periods = 1)*100)
df['pct_ch_pending_1m'] = df.groupby('state')['pending_listings'].transform(lambda x: x.pct_change(periods = 1)*100)

df.head()


Unnamed: 0,month,state,med_lp,active_listings,med_dom,new_listings,price_increases,price_reductions,pending_listings,med_sf,avg_lp,month_num,year,pct_ch_med_list_1m,pct_ch_active_list_1m,pct_ch_med_dom_1m,pct_ch_new_list_1m,pct_ch_price_inc_1m,pct_ch_price_red_1m,pct_ch_pending_1m
3841,2016-07-01,AK,289900.0,3745.0,69.0,1268.0,44.0,1188.0,3.0,1800.0,333200.0,7,2016,,,,,,,
3800,2016-08-01,AK,287975.0,3679.0,75.0,1116.0,46.0,1258.0,3.0,1794.0,329918.0,8,2016,-0.664022,-1.76235,8.695652,-11.987382,4.545455,5.892256,0.0
3735,2016-09-01,AK,285000.0,3578.0,80.0,920.0,18.0,1132.0,4.0,1788.0,327484.0,9,2016,-1.033076,-2.745311,6.666667,-17.562724,-60.869565,-10.015898,33.333333
3682,2016-10-01,AK,280000.0,3251.0,86.0,780.0,24.0,896.0,4.0,1772.0,327819.0,10,2016,-1.754386,-9.139184,7.5,-15.217391,33.333333,-20.848057,0.0
3653,2016-11-01,AK,279938.0,2935.0,94.0,630.0,12.0,644.0,4.0,1775.0,329431.0,11,2016,-0.022143,-9.720086,9.302326,-19.230769,-50.0,-28.125,0.0


In [7]:
select_states = ['NE','KA','WY','CA','OR','PA','CO','IL','WI','UT','ID','MI','NY']
select_states

['NE', 'KA', 'WY', 'CA', 'OR', 'PA', 'CO', 'IL', 'WI', 'UT', 'ID', 'MI', 'NY']

In [8]:
fig = px.line(df.query("state in @select_states"),
    x = 'month_num',
    y = 'med_dom',
    color = 'year',
    facet_col = 'state',
    facet_col_wrap = 4,
    title = 'Median Days on Market over Time by State',
    color_discrete_sequence=px.colors.sequential.Inferno
    )
fig.update_yaxes(matches = None)
fig.update_layout(height = 800, width = 1200)
fig.show()

In [9]:
fig = px.line(df.query("state in @select_states"),
    x = 'month_num',
    y = 'new_listings',
    color = 'year',
    facet_col = 'state',
    facet_col_wrap = 4,
    title = 'New Listings over Time by State',
    color_discrete_sequence=px.colors.sequential.Inferno
    )
fig.update_yaxes(matches = None)
fig.update_layout(height = 800, width = 1200)
fig.show()

In [10]:
fig = px.line(df.query("state in @select_states"),
    x = 'month_num',
    y = 'pending_listings',
    color = 'year',
    facet_col = 'state',
    facet_col_wrap = 4,
    title = 'Pending Listings over Time by State',
    color_discrete_sequence=px.colors.sequential.Inferno
    )
fig.update_yaxes(matches = None)
fig.update_layout(height = 800, width = 1200)
fig.show()

In [11]:
fig = px.line(df.query("state in @select_states"),
    x = 'month_num',
    y = 'price_reductions',
    color = 'year',
    facet_col = 'state',
    facet_col_wrap = 4,
    title = 'Price Decreases over Time by State',
    color_discrete_sequence=px.colors.sequential.Inferno
    )
fig.update_yaxes(matches = None)
fig.update_layout(height = 800, width = 1200)
fig.show()

In [12]:
fig = px.line(df.query("state in @select_states"),
    x = 'month_num',
    y = 'med_lp',
    color = 'year',
    facet_col = 'state',
    facet_col_wrap = 4,
    title = 'Median List Prices of Homes Over Time by State',
    color_discrete_sequence=px.colors.sequential.Inferno
    )
fig.update_yaxes(matches = None)
fig.update_layout(height = 800, width = 1200)
fig.show()

In [13]:
c = realtor_data.query("month in ['2016-09-01','2022-09-01']")[['month','state','median_listing_price']].pivot(index = 'state',columns = 'month',values = 'median_listing_price').reset_index()
c['pct_change'] = (c.iloc[:,2] - c.iloc[:,1])/c.iloc[:,1]*100

a = c.sort_values(by = 'pct_change', ascending = False).head(10)
b = c.sort_values(by = 'pct_change', ascending =True).head(10)

a


UndefinedVariableError: name 'month' is not defined

In [14]:
b

NameError: name 'b' is not defined