In [1]:

# 1. Packages

import statsmodels.api as sm
import statsmodels.formula.api as smf
import linearmodels.iv.model as lm

In [2]:
# 2. Data
houseprices_object = sm.datasets.get_rdataset(dataname="HousePrices", package="AER", cache=True)
houseprices = houseprices_object.data
print(houseprices.head())



   price  lotsize  bedrooms  bathrooms  stories driveway recreation fullbase  \
0  42000     5850         3          1        2      yes         no      yes   
1  38500     4000         2          1        1      yes         no       no   
2  49500     3060         3          1        1      yes         no       no   
3  60500     6650         3          1        2      yes        yes       no   
4  61000     6360         2          1        1      yes         no       no   

  gasheat aircon  garage prefer  
0      no     no       1     no  
1      no     no       0     no  
2      no     no       0     no  
3      no     no       0     no  
4      no     no       0     no  


In [4]:
# print(houseprices_object.__doc__)

In [5]:
# 3. Model
mlr1 = smf.ols(formula = "price ~ lotsize + bedrooms", data=houseprices).fit()

In [9]:
# 4. Exogeneity
# TSLS
mdatac = sm.add_constant(data=houseprices, prepend=False)
mlr2 = lm.IV2SLS(dependent=mdatac["price"], exog=mdatac[['const', 'bedrooms']], endog=mdatac['lotsize'], instruments=mdatac[['driveway', 'garage']]).fit(cov_type='homoskedastic', debiased=True)

In [10]:
# Wu-Hausman Tests
print(mlr2.wu_hausman())
print(mlr2.wooldridge_regression)

Wu-Hausman test of exogeneity
H0: All endogenous variables are exogenous
Statistic: 50.9308
P-value: 0.0000
Distributed: F(1,542)
Wooldridge's regression test of exogeneity
H0: Endogenous variables are exogenous
Statistic: 50.9046
P-value: 0.0000
Distributed: chi2(1)


In [12]:
# Sargan Over-identifying Restrictions Test
print(mlr2.sargan)

Sargan's test of overidentification
H0: The model is not overidentified.
Statistic: 0.0477
P-value: 0.8271
Distributed: chi2(1)
