In [1]:
%reset -f

In [2]:
import numpy             as np
import statsmodels.api   as sm
import pandas            as pd
import seaborn           as sns

from linearmodels.iv            import IV2SLS
from statsmodels.iolib.summary2 import summary_col
from collections                import OrderedDict
from linearmodels.iv.results    import compare

In [3]:
df = pd.read_csv("SMOKE.csv")

In [4]:
df.columns

Index(['educ', 'cigpric', 'white', 'age', 'income', 'cigs', 'restaurn',
       'lincome', 'agesq', 'lcigpric'],
      dtype='object')

In [5]:
df = sm.add_constant(df)

In [6]:
df.columns

Index(['const', 'educ', 'cigpric', 'white', 'age', 'income', 'cigs',
       'restaurn', 'lincome', 'agesq', 'lcigpric'],
      dtype='object')

In [7]:
df.describe()

Unnamed: 0,const,educ,cigpric,white,age,income,cigs,restaurn,lincome,agesq,lcigpric
count,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0
mean,1.0,12.47088,60.300411,0.878563,41.237918,19304.832714,8.686493,0.246592,9.687316,1990.135068,4.096032
std,0.0,3.057161,4.738469,0.326837,17.027285,9142.95829,13.721516,0.431295,0.712696,1577.165644,0.082919
min,1.0,6.0,44.004002,0.0,17.0,500.0,0.0,0.0,6.214608,289.0,3.78428
25%,1.0,10.0,58.141498,1.0,28.0,12500.0,0.0,0.0,9.433484,784.0,4.06288
50%,1.0,12.0,61.053001,1.0,38.0,20000.0,0.0,0.0,9.903487,1444.0,4.111743
75%,1.0,13.5,63.179001,1.0,54.0,30000.0,20.0,0.0,10.308952,2916.0,4.145972
max,1.0,18.0,70.128998,1.0,88.0,30000.0,80.0,1.0,10.308952,7744.0,4.250336


# (d) fitting the income equation via OLS

In [9]:
X_d   = df[['const', 'cigs', 'educ', 'age', 'agesq']]
Y_d   = df[['lincome']]
M_OLS = sm.OLS(Y_d, X_d, missing='drop')

OLS_result = M_OLS.fit()
print(OLS_result.summary())

                            OLS Regression Results                            
Dep. Variable:                lincome   R-squared:                       0.165
Model:                            OLS   Adj. R-squared:                  0.161
Method:                 Least Squares   F-statistic:                     39.61
Date:                Tue, 20 Apr 2021   Prob (F-statistic):           2.68e-30
Time:                        02:13:21   Log-Likelihood:                -798.50
No. Observations:                 807   AIC:                             1607.
Df Residuals:                     802   BIC:                             1630.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.7954      0.170     45.741      0.0

# (e) reduced form

In [10]:
X_e   = df[['const', 'educ', 'age', 'agesq', 'lcigpric', 'restaurn']]
Y_e   = df[['cigs']]
M_Red = sm.OLS(Y_e, X_e, missing='drop')

M_Red_result = M_Red.fit()
print(M_Red_result.summary())

                            OLS Regression Results                            
Dep. Variable:                   cigs   R-squared:                       0.051
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                     8.610
Date:                Tue, 20 Apr 2021   Prob (F-statistic):           5.86e-08
Time:                        02:16:11   Log-Likelihood:                -3237.0
No. Observations:                 807   AIC:                             6486.
Df Residuals:                     801   BIC:                             6514.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5801     23.696      0.067      0.9

# (f) : 2SLS

In [11]:
M_2SLS = IV2SLS(dependent=df['lincome'],
                exog=df[['const', 'educ', 'age', 'agesq']],
                endog=df[['cigs']],
                instruments=df[['lcigpric', 'restaurn']]).fit()

print(M_2SLS.summary)


                          IV-2SLS Estimation Summary                          
Dep. Variable:                lincome   R-squared:                     -0.5169
Estimator:                    IV-2SLS   Adj. R-squared:                -0.5245
No. Observations:                 807   F-statistic:                    87.512
Date:                Tue, Apr 20 2021   P-value (F-stat)                0.0000
Time:                        02:18:15   Distribution:                  chi2(4)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const          7.7809     0.2580     30.162     0.0000      7.2753      8.2865
educ           0.0397     0.0155     2.5668     0.01

# check whether the story you compiled is supported by the data

# appearantly, all the IV stories in the previous questions are not supported by the data : all H0 cannot be rejected at 5%. 

In [12]:
M_2SLS.wu_hausman()

Wu-Hausman test of exogeneity
H0: All endogenous variables are exogenous
Statistic: 7.6289
P-value: 0.0059
Distributed: F(1,801)
WaldTestStatistic, id: 0x7fa968231c10

In [13]:
M_2SLS.durbin()

Durbin test of exogeneity
H0: All endogenous variables are exogenous
Statistic: 7.6135
P-value: 0.0058
Distributed: chi2(1)
WaldTestStatistic, id: 0x7fa999791b10

In [16]:
M_2SLS.wooldridge_regression

Wooldridge's regression test of exogeneity
H0: Endogenous variables are exogenous
Statistic: 5.5204
P-value: 0.0188
Distributed: chi2(1)
WaldTestStatistic, id: 0x7fa9387327d0

In [17]:
M_2SLS.wooldridge_score

Wooldridge's score test of exogeneity
H0: Endogenous variables are exogenous
Statistic: 5.4268
P-value: 0.0198
Distributed: chi2(1)
WaldTestStatistic, id: 0x7fa9a8570d10

In [18]:
!jupyter nbconvert --to html W5_Python.ipynb

[NbConvertApp] Converting notebook W5_Python.ipynb to html
[NbConvertApp] Writing 595006 bytes to W5_Python.html
