In [1]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

FNAME = '../cps09mar.dta'
df0 = pandas.read_stata(FNAME)

df = df0[ (df0.female == 0) & (df0.hisp == 1) & (df0.race == 1) ]
list(df.columns.values)
maritalstatus = pandas.get_dummies(df['marital']).loc[:,(1,2,3,4,5,6)]

  from pandas.core import datetools


In [2]:
#part a - set up dataset
y = df['lwage']
X1 = df.loc[:,('educ','exp')]
X1['exp_sq'] = (X1['exp']**2)/100
X1 = pandas.concat([X1, maritalstatus], axis=1)

In [3]:
#part a - baseline model
X1 = sm.add_constant(X1)
model1 = sm.GLS(y,X1).fit()
model1.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.247
Model:,GLS,Adj. R-squared:,0.246
Method:,Least Squares,F-statistic:,154.2
Date:,"Wed, 21 Mar 2018",Prob (F-statistic):,9.42e-253
Time:,23:43:50,Log-Likelihood:,-3612.0
No. Observations:,4230,AIC:,7244.0
Df Residuals:,4220,BIC:,7307.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.1913,0.045,26.645,0.000,1.104,1.279
educ,0.0874,0.003,31.748,0.000,0.082,0.093
exp,0.0284,0.003,10.556,0.000,0.023,0.034
exp_sq,-0.0371,0.005,-7.187,0.000,-0.047,-0.027
1,0.1818,0.024,7.649,0.000,0.135,0.228
2,-0.4790,0.569,-0.841,0.400,-1.595,0.637
3,-0.0388,0.057,-0.687,0.492,-0.150,0.072
4,0.2372,0.173,1.367,0.172,-0.103,0.577
5,0.0744,0.043,1.731,0.084,-0.010,0.159

0,1,2,3
Omnibus:,1563.506,Durbin-Watson:,1.771
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34292.216
Skew:,-1.234,Prob(JB):,0.0
Kurtosis:,16.729,Cond. No.,1790.0


In [4]:
#part b - constrained least squares, where b4 = b7 and b8 = b9
X2 = X1.loc[:,('educ','exp','exp_sq', 2, 3)]
X2['restriction_14'] = X1[1].add(X1[4])
X2['restriction_56'] = X1[5].add(X1[6])
X2 = sm.add_constant(X2)
model2 = sm.OLS(y,X2).fit()
model2.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.247
Model:,OLS,Adj. R-squared:,0.246
Method:,Least Squares,F-statistic:,198.1
Date:,"Wed, 21 Mar 2018",Prob (F-statistic):,7.28e-255
Time:,23:44:10,Log-Likelihood:,-3612.4
No. Observations:,4230,AIC:,7241.0
Df Residuals:,4222,BIC:,7292.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.1889,0.045,26.650,0.000,1.101,1.276
educ,0.0875,0.003,31.848,0.000,0.082,0.093
exp,0.0285,0.003,10.610,0.000,0.023,0.034
exp_sq,-0.0372,0.005,-7.222,0.000,-0.047,-0.027
2,-0.4786,0.569,-0.841,0.401,-1.595,0.637
3,-0.0392,0.057,-0.693,0.488,-0.150,0.072
restriction_14,0.1815,0.024,7.638,0.000,0.135,0.228
restriction_56,0.0560,0.037,1.495,0.135,-0.017,0.130

0,1,2,3
Omnibus:,1561.624,Durbin-Watson:,1.771
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34178.956
Skew:,-1.233,Prob(JB):,0.0
Kurtosis:,16.706,Cond. No.,1790.0


In [14]:
#part c, efficient minimum distance efficient minimum distance
#weight by covariance matrix from b)
print '----Covariance Matrix----'
print model2.cov_HC1
model3 = sm.GLS(y,X2,weights=model2.cov_HC2).fit()
#python is being tempermental about printing...
print '----Coeficients----'
print model3.params
print '----SE----'
print model3.HC0_se

----Covariance Matrix----
[[  2.11851309e-03  -1.09785353e-04  -6.59448541e-05   9.97838010e-05
   -4.41341897e-04  -2.53590784e-04  -3.92031954e-05   9.77835568e-05]
 [ -1.09785353e-04   8.55903772e-06   8.22721703e-07   1.95080304e-08
    2.13354748e-06   1.02847671e-05  -4.47941771e-06  -1.31776192e-05]
 [ -6.59448541e-05   8.22721703e-07   7.99316543e-06  -1.47643878e-05
    1.34283809e-05  -2.46156801e-05  -2.98631137e-05  -3.48510742e-05]
 [  9.97838010e-05   1.95080304e-08  -1.47643878e-05   3.01590996e-05
   -2.22888466e-05   3.11725450e-05   4.08705312e-05   5.01708658e-05]
 [ -4.41341897e-04   2.13354748e-06   1.34283809e-05  -2.22888466e-05
    3.43193027e-04   2.66645543e-04   2.57421498e-04   2.51392807e-04]
 [ -2.53590784e-04   1.02847671e-05  -2.46156801e-05   3.11725450e-05
    2.66645543e-04   3.07753032e-03   5.02950280e-04   4.96232869e-04]
 [ -3.92031954e-05  -4.47941771e-06  -2.98631137e-05   4.08705312e-05
    2.57421498e-04   5.02950280e-04   6.24045550e-04   5.2

In [8]:
#part d, e 

#Constraint partial derivative wrt experience greater than 0:
# b2 + 2*exp*b3 > 0
# b2 > -2exp*b3

#we can check to see if the costraint binds with equality (it does not)
print model2.params['exp'] > -2*model2.params['exp_sq']*50

False


In [11]:
#since the constraint binds, the model becomes -2*b3*exp^2 + b3*exp_sq

X3 = X1.loc[:,('educ', 2, 3)]
X3['exp_restriction'] = X1['exp_sq'].add(-2*X1['exp_sq'])
X3['restriction_14'] = X1[1].add(X1[4])
X3['restriction_56'] = X1[5].add(X1[6])
X3 = sm.add_constant(X3)
model3 = sm.OLS(y,X3).fit()
model3.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.227
Model:,OLS,Adj. R-squared:,0.226
Method:,Least Squares,F-statistic:,206.9
Date:,"Wed, 21 Mar 2018",Prob (F-statistic):,5.21e-232
Time:,23:46:35,Log-Likelihood:,-3668.1
No. Observations:,4230,AIC:,7350.0
Df Residuals:,4223,BIC:,7395.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.4312,0.039,36.862,0.000,1.355,1.507
educ,0.0860,0.003,30.934,0.000,0.081,0.091
2,-0.5504,0.577,-0.954,0.340,-1.681,0.580
3,0.0441,0.057,0.778,0.436,-0.067,0.155
exp_restriction,-0.0147,0.002,-8.870,0.000,-0.018,-0.011
restriction_14,0.2740,0.022,12.235,0.000,0.230,0.318
restriction_56,0.1561,0.037,4.247,0.000,0.084,0.228

0,1,2,3
Omnibus:,1452.036,Durbin-Watson:,1.766
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27961.334
Skew:,-1.147,Prob(JB):,0.0
Kurtosis:,15.385,Cond. No.,880.0
