In [47]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.sandbox.regression.gmm import IVGMM
from statsmodels.sandbox.regression.gmm import IV2SLS

df = pandas.read_stata('../Card1995.dta')

In [48]:
#set up data
X = pandas.DataFrame()
X['lwage'] = df['lwage76']*1.0
X['south'] = df['reg76r']*1.0
X['urban'] = df['smsa76r']*1.0
X['black'] = df['black']*1.0
X['exp'] = (1.0*df['age76'] - 6.0).add(-1.0*df['ed76'])
X['exp_sq'] = (X['exp']**2)/100
X['nearc4a'] = df['nearc4a']*1.0
X['nearc4b'] = df['nearc4b']*1.0
X['educ'] = df['ed76']
X = sm.add_constant(X)

In [49]:
#Part a - run stage 1 model (table 11.32)

#set up variables
X_clean = X.dropna()
y = X_clean['lwage']
X_a =  X_clean[['const','south','urban','black','exp','exp_sq','educ']]
I_a = X_clean[['const','south','urban','black','exp','exp_sq','nearc4a','nearc4b']]

#Reduced form stage 1
model_a_s1 = sm.OLS(X_clean['educ'],I_a).fit()
print model_a_s1.summary()

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.476
Model:                            OLS   Adj. R-squared:                  0.475
Method:                 Least Squares   F-statistic:                     390.0
Date:                Mon, 02 Apr 2018   Prob (F-statistic):               0.00
Time:                        21:20:31   Log-Likelihood:                -6261.0
No. Observations:                3010   AIC:                         1.254e+04
Df Residuals:                    3002   BIC:                         1.259e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         16.6573      0.176     94.579      0.0

In [50]:
#Stage 2
model_a = IV2SLS(y,X_a,instrument=I_a).fit()
print model_a.summary()

                          IV2SLS Regression Results                           
Dep. Variable:                  lwage   R-squared:                       0.145
Model:                         IV2SLS   Adj. R-squared:                  0.143
Method:                     Two Stage   F-statistic:                     111.0
                        Least Squares   Prob (F-statistic):          9.63e-127
Date:                Mon, 02 Apr 2018                                         
Time:                        21:20:32                                         
No. Observations:                3010                                         
Df Residuals:                    3003                                         
Df Model:                           6                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.2680      0.687      4.756      0.0

In [51]:
#part b - add nearc2 as instrument
Xb = X.copy()
Xb['nearc2'] = df['nearc2']
Xb_clean = Xb.dropna() 

model_b = IV2SLS(Xb_clean['lwage'],Xb_clean[['const','south','urban','black','exp','exp_sq','educ']], 
                 instrument=Xb_clean[['const','south','urban','black','exp','exp_sq','nearc4a','nearc4b','nearc2']]).fit()
print model_a.summary()

                          IV2SLS Regression Results                           
Dep. Variable:                  lwage   R-squared:                       0.145
Model:                         IV2SLS   Adj. R-squared:                  0.143
Method:                     Two Stage   F-statistic:                     111.0
                        Least Squares   Prob (F-statistic):          9.63e-127
Date:                Mon, 02 Apr 2018                                         
Time:                        21:20:32                                         
No. Observations:                3010                                         
Df Residuals:                    3003                                         
Df Model:                           6                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.2680      0.687      4.756      0.0

In [52]:
#part c - even more insturments, reduced form
Xc = Xb.copy()
Xc['nearc4a*age76'] = df['nearc4a'].multiply(df['age76']*1.0)
Xc['nearc4a*age76^2/100'] = df['nearc4a'].multiply(df['age76']**2/100)
Xc_clean = Xc.dropna()
y_c = Xc_clean['educ']
X_c = Xc_clean[['const','south','urban','black','exp','exp_sq','educ']]
I_c = Xc_clean[['const','south','urban','black','exp','exp_sq',
                             'nearc4a','nearc4b','nearc2','nearc4a*age76','nearc4a*age76^2/100']]

model_c_s1 = sm.OLS(Xc_clean['educ'], I_c ).fit()
print model_c_s1.summary()

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.649
Model:                            OLS   Adj. R-squared:                  0.648
Method:                 Least Squares   F-statistic:                     555.3
Date:                Mon, 02 Apr 2018   Prob (F-statistic):               0.00
Time:                        21:20:33   Log-Likelihood:                -5657.3
No. Observations:                3010   AIC:                         1.134e+04
Df Residuals:                    2999   BIC:                         1.140e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  18.4171    

In [53]:
#part d - 2SLS
model_d = IV2SLS(Xc_clean['lwage'],Xb_clean[['const','south','urban','black','exp','exp_sq','educ']], 
                 Xc_clean[['const','south','urban','black','exp','exp_sq','nearc4a','nearc4b','nearc2',
                           'nearc4a*age76','nearc4a*age76^2/100']]).fit()
print model_d.summary()

                          IV2SLS Regression Results                           
Dep. Variable:                  lwage   R-squared:                       0.289
Model:                         IV2SLS   Adj. R-squared:                  0.287
Method:                     Two Stage   F-statistic:                     162.3
                        Least Squares   Prob (F-statistic):          4.61e-179
Date:                Mon, 02 Apr 2018                                         
Time:                        21:20:33                                         
No. Observations:                3010                                         
Df Residuals:                    3003                                         
Df Model:                           6                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.5674      0.107     42.653      0.0

In [54]:
#part e - LIML ... Not enough time to implement from scratch in python...

In [55]:
#12.21 part a - just parameters, SE estimates are unimplemented
model_1221_a = LinearIVGMM(y,X_a,I_a)
model_1221_a = LinearIVGMM(y,X_a,I_a).fitgmm(model_1221_a.fitstart())
print '________Linear GMM Parameters________'
print model_1221_a

#results seem to be identical...

________Linear GMM Parameters________
[ 3.26801359 -0.09503551  0.11644814 -0.1017274   0.1193108  -0.23054157
  0.16109165]


In [56]:
#12.21 part b
model_1221_b = LinearIVGMM(y,X_c,I_c)
model_1221_b = LinearIVGMM(y,X_c,I_c).fitgmm(model_1221_b.fitstart())
print '________Linear GMM Parameters________'
print model_1221_b

#results seem to be identical...

________Linear GMM Parameters________
[ 4.56743017 -0.12147865  0.15632193 -0.17966148  0.08764663 -0.22482034
  0.08388591]


In [None]:
#12.21 part c - Also, not implemented yet... in python