In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from itertools import product
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.formula.api import ols



# Data Import and Visualisation
- Import data using pandas, and plot using plotly

In [3]:
house_df = pd.read_excel(r'/Users/dhruvpandit/Mirror/University/Masters/Semester 2/MacroEcon I/HousePrices.xlsx', index_col='Unnamed: 0')

In [28]:
fig  = go.Figure()
fig.add_trace(go.Scatter(x=house_df.index, y=house_df['PTrpp'],
                    mode='lines', name='House Prices', ))
fig.update_layout(title={'text':'House Prices From 1988 to 2021 in Portugal', "x" : 0.5},
                   xaxis_title='Year',
                   yaxis_title='Price', font=dict(family='Futura', color='#000000' , size=14))


# Modelling The Data

### Cleaning the data
- Define the regressors from the pandas dataframe
- Define the independent variable from the dataframe

In [8]:
house_df_exog = house_df[['PTgdppercapita', 'PTgfcfhousvol', 'PTloanhpr', 'PTmtgrr', 'PTunemp']]
house_price = house_df['PTrpp']

### Model Fitting
- Add a constant for the MLR model using sm.add_constant and pass in your regressors
- Fit the model
- 

In [9]:
import statsmodels.api as sm
house_df_exog_sm = sm.add_constant(house_df_exog)
model = sm.OLS(house_price, house_df_exog_sm).fit()

In [10]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  PTrpp   R-squared:                       0.821
Model:                            OLS   Adj. R-squared:                  0.814
Method:                 Least Squares   F-statistic:                     118.7
Date:                Tue, 28 Feb 2023   Prob (F-statistic):           1.67e-46
Time:                        18:52:41   Log-Likelihood:                 214.50
No. Observations:                 135   AIC:                            -417.0
Df Residuals:                     129   BIC:                            -399.6
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const              4.7138      0.270     17.

## Conducting Tests
- White test for heteroskedasticity
  - Fit model $Y = \beta_0 + x_1 \hat{\beta_1}  ... + x_k \hat{\beta_k} + \hat{u_i}$ and obtain residuals
  - Run regression of residuals on regressors : $ \hat{u_i}= \tilde{\beta_0} + x_1 \tilde{\beta_1}  ... + x_k \tilde{\beta_k} + \tilde{v_i}$
  - Test overall significance : $H_0 : \tilde{\beta_k} = 0 ; H_a : alternative$
- Breusch pagan test
- Durbin watson:
  - Can only test autocorrelation of order 1, (jan related to feb, monday related to tuesday etc)
  - Can only use it on static models, not dyanmic
  - Alternative used: Breusch godfrey


In [11]:
import statsmodels.stats.diagnostic as smd

In [34]:
wtest = smd.het_white(resid=model.resid, exog = house_df_exog_sm)
print("lm:", wtest[0], "lm_pvalue:", wtest[1])

lm: 89.33427593490512 lm_pvalue: 9.68922456485885e-11


In [19]:
rlm_model = sm.RLM(house_price, house_df_exog_sm, M=sm.robust.norms.LeastSquares()).fit()

rlm_model.summary()

0,1,2,3
Dep. Variable:,PTrpp,No. Observations:,135.0
Model:,RLM,Df Residuals:,129.0
Method:,IRLS,Df Model:,5.0
Norm:,LeastSquares,,
Scale Est.:,mad,,
Cov Type:,H1,,
Date:,"Tue, 28 Feb 2023",,
Time:,19:16:07,,
No. Iterations:,2,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,4.7138,0.270,17.487,0.000,4.185,5.242
PTgdppercapita,0.1198,0.154,0.776,0.438,-0.183,0.422
PTgfcfhousvol,0.0078,0.022,0.355,0.723,-0.035,0.051
PTloanhpr,0.0401,0.033,1.230,0.219,-0.024,0.104
PTmtgrr,-0.0017,0.003,-0.560,0.576,-0.008,0.004
PTunemp,-0.0354,0.003,-10.438,0.000,-0.042,-0.029
