## Setup

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

## Preprocessing the data

In [18]:
house_prices = pd.read_excel("HousePrices.xlsx")
house_prices = house_prices.rename(columns={'Unnamed: 0': "date"})

In [19]:
house_prices_diff = house_prices.drop(columns="date").apply(lambda x: x.diff())
house_prices_diff["date"] = house_prices["date"]

In [20]:
house_prices_diff

Unnamed: 0,PTrpp,PTgdppercapita,PTgfcfhousvol,PTloanhpr,PTmtgrr,PTunemp,date
0,,,,,,,1988-03-01
1,0.014625,0.007131,-0.007566,0.019051,-0.288497,-0.264240,1988-06-01
2,0.013298,0.019992,0.060268,0.006607,-1.148597,-0.293735,1988-09-01
3,0.019661,0.011734,-0.021246,0.056427,-0.697748,-0.295162,1988-12-01
4,0.009872,0.026327,-0.042270,0.016402,-0.632530,-0.117802,1989-03-01
...,...,...,...,...,...,...,...
130,-0.007414,0.135302,0.011201,0.003844,-0.303783,2.213341,2020-09-01
131,0.026296,0.002670,0.018238,0.009344,0.331295,-0.966803,2020-12-01
132,0.022267,-0.022431,0.049357,0.006038,-0.417235,-0.295931,2021-03-01
133,0.025088,0.042137,0.043464,0.008253,-0.198512,0.013170,2021-06-01


In [22]:
house_prices_diff = house_prices_diff.dropna()

In [23]:
house_prices_diff.columns

Index(['PTrpp', 'PTgdppercapita', 'PTgfcfhousvol', 'PTloanhpr', 'PTmtgrr',
       'PTunemp', 'date'],
      dtype='object')

## Modeling

- Running OLS for the time-series

In [45]:
x=house_prices_diff.loc[:, ~house_prices_diff.columns.isin(['PTrpp', "date"])]
x = sm.add_constant(x)
y=house_prices_diff.loc[:, ['PTrpp']]
model = sm.OLS(y, x).fit()

  x = pd.concat(x[::order], 1)


In [46]:
model.summary()

0,1,2,3
Dep. Variable:,PTrpp,R-squared:,0.242
Model:,OLS,Adj. R-squared:,0.212
Method:,Least Squares,F-statistic:,8.173
Date:,"Tue, 28 Feb 2023",Prob (F-statistic):,1.01e-06
Time:,18:57:48,Log-Likelihood:,386.5
No. Observations:,134,AIC:,-761.0
Df Residuals:,128,BIC:,-743.6
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0026,0.001,1.818,0.071,-0.000,0.005
PTgdppercapita,0.0466,0.059,0.787,0.433,-0.071,0.164
PTgfcfhousvol,0.0547,0.034,1.587,0.115,-0.013,0.123
PTloanhpr,0.0136,0.048,0.286,0.776,-0.081,0.108
PTmtgrr,0.0029,0.002,1.687,0.094,-0.000,0.006
PTunemp,-0.0147,0.003,-5.418,0.000,-0.020,-0.009

0,1,2,3
Omnibus:,4.001,Durbin-Watson:,1.039
Prob(Omnibus):,0.135,Jarque-Bera (JB):,4.234
Skew:,0.184,Prob(JB):,0.12
Kurtosis:,3.789,Cond. No.,50.4


- White test

In [36]:
from statsmodels.stats.diagnostic import het_white

#perform White's test
white_test = het_white(model.resid,  x)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']

#print results of White's test
print(dict(zip(labels, white_test)))

{'Test Statistic': 31.617196354079915, 'Test Statistic p-value': 0.04755200600812489, 'F-Statistic': 1.744796518938365, 'F-Test p-value': 0.03600543680901449}


- Running OLS with robust standard errors (because it has heteroskedasticity)

In [47]:
model_robust = sm.OLS(y, x).fit(cov_type="HC0")

In [48]:
model_robust.summary()

0,1,2,3
Dep. Variable:,PTrpp,R-squared:,0.242
Model:,OLS,Adj. R-squared:,0.212
Method:,Least Squares,F-statistic:,7.595
Date:,"Tue, 28 Feb 2023",Prob (F-statistic):,2.81e-06
Time:,18:58:22,Log-Likelihood:,386.5
No. Observations:,134,AIC:,-761.0
Df Residuals:,128,BIC:,-743.6
Df Model:,5,,
Covariance Type:,HC0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0026,0.002,1.729,0.084,-0.000,0.006
PTgdppercapita,0.0466,0.068,0.687,0.492,-0.086,0.179
PTgfcfhousvol,0.0547,0.033,1.664,0.096,-0.010,0.119
PTloanhpr,0.0136,0.039,0.354,0.723,-0.062,0.089
PTmtgrr,0.0029,0.002,1.555,0.120,-0.001,0.007
PTunemp,-0.0147,0.003,-4.601,0.000,-0.021,-0.008

0,1,2,3
Omnibus:,4.001,Durbin-Watson:,1.039
Prob(Omnibus):,0.135,Jarque-Bera (JB):,4.234
Skew:,0.184,Prob(JB):,0.12
Kurtosis:,3.789,Cond. No.,50.4


In [49]:
#perform White's test
white_test = het_white(model_robust.resid,  x)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']

#print results of White's test
print(dict(zip(labels, white_test)))

{'Test Statistic': 31.617196354079915, 'Test Statistic p-value': 0.04755200600812489, 'F-Statistic': 1.744796518938365, 'F-Test p-value': 0.03600543680901449}
