In [50]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

FNAME = '../cps09mar.dta'
df0 = pandas.read_stata(FNAME)

df = df0[ (df0.female == 0) & (df0.hisp == 1) & (df0.race == 1) ]
list(df.columns.values)
maritalstatus = pandas.get_dummies(df['marital']).loc[:,(1,2,3,4,5,6)]

In [19]:
#part a - set up dataset
y = df['lwage']
X1 = df.loc[:,('educ','exp')]
X1['exp_sq'] = (X1['exp']**2)/100
X1 = pandas.concat([X1, maritalstatus], axis=1)

In [12]:
#part a - baseline model
X1 = sm.add_constant(X1)
model1 = sm.GLS(y,X1).fit()
model1.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.247
Model:,GLS,Adj. R-squared:,0.246
Method:,Least Squares,F-statistic:,154.2
Date:,"Tue, 20 Mar 2018",Prob (F-statistic):,9.42e-253
Time:,21:58:34,Log-Likelihood:,-3612.0
No. Observations:,4230,AIC:,7244.0
Df Residuals:,4220,BIC:,7307.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.1913,0.045,26.645,0.000,1.104,1.279
educ,0.0874,0.003,31.748,0.000,0.082,0.093
exp,0.0284,0.003,10.556,0.000,0.023,0.034
exp_sq,-0.0371,0.005,-7.187,0.000,-0.047,-0.027
1,0.1818,0.024,7.649,0.000,0.135,0.228
2,-0.4790,0.569,-0.841,0.400,-1.595,0.637
3,-0.0388,0.057,-0.687,0.492,-0.150,0.072
4,0.2372,0.173,1.367,0.172,-0.103,0.577
5,0.0744,0.043,1.731,0.084,-0.010,0.159

0,1,2,3
Omnibus:,1563.506,Durbin-Watson:,1.771
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34292.216
Skew:,-1.234,Prob(JB):,0.0
Kurtosis:,16.729,Cond. No.,1790.0


In [32]:
#part b - constrained least squares, where b4 = b7 and b8 = b9
X2 = X1.loc[:,('educ','exp','exp_sq', 2, 3)]
X2['restriction_14'] = X1[1].add(X1[4])
X2['restriction_56'] = X1[5].add(X1[6])
model2 = sm.OLS(y,X2).fit()
model2.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.951
Model:,OLS,Adj. R-squared:,0.951
Method:,Least Squares,F-statistic:,11810.0
Date:,"Tue, 20 Mar 2018",Prob (F-statistic):,0.0
Time:,22:18:47,Log-Likelihood:,-3941.3
No. Observations:,4230,AIC:,7897.0
Df Residuals:,4223,BIC:,7941.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1447,0.002,78.002,0.000,0.141,0.148
exp,0.0651,0.002,26.123,0.000,0.060,0.070
exp_sq,-0.0893,0.005,-17.310,0.000,-0.099,-0.079
2,-0.1771,0.615,-0.288,0.773,-1.383,1.029
3,0.1083,0.061,1.781,0.075,-0.011,0.227
restriction_14,0.1953,0.026,7.609,0.000,0.145,0.246
restriction_56,0.0426,0.041,1.052,0.293,-0.037,0.122

0,1,2,3
Omnibus:,1335.729,Durbin-Watson:,1.799
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19224.345
Skew:,-1.102,Prob(JB):,0.0
Kurtosis:,13.208,Cond. No.,1790.0


In [48]:
#part c, efficient minimum distance efficient minimum distance - weight by covariance matrix from b)

model3 = sm.GLS(y,X2,weights=model2.cov_HC0).fit()
model3.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.951
Model:,GLS,Adj. R-squared:,0.951
Method:,Least Squares,F-statistic:,11810.0
Date:,"Tue, 20 Mar 2018",Prob (F-statistic):,0.0
Time:,22:33:19,Log-Likelihood:,-3941.3
No. Observations:,4230,AIC:,7897.0
Df Residuals:,4223,BIC:,7941.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1447,0.002,78.002,0.000,0.141,0.148
exp,0.0651,0.002,26.123,0.000,0.060,0.070
exp_sq,-0.0893,0.005,-17.310,0.000,-0.099,-0.079
2,-0.1771,0.615,-0.288,0.773,-1.383,1.029
3,0.1083,0.061,1.781,0.075,-0.011,0.227
restriction_14,0.1953,0.026,7.609,0.000,0.145,0.246
restriction_56,0.0426,0.041,1.052,0.293,-0.037,0.122

0,1,2,3
Omnibus:,1335.729,Durbin-Watson:,1.799
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19224.345
Skew:,-1.102,Prob(JB):,0.0
Kurtosis:,13.208,Cond. No.,1790.0


In [44]:
#part d, e 

#Constraint partial derivative wrt experience greater than 0:
# b2 + 2*exp*b3 > 0
# b2 > -2exp*b3

#we can check to see if the costraint binds with equality (it does not)
print model2.params['exp'] > -2*model2.params['exp_sq']*50

False


In [47]:
#since the constraint binds, the model becomes -2*b3*exp^2 + b3*exp_sq

X3 = X1.loc[:,('educ', 2, 3)]
X3['exp_restriction'] = X1['exp_sq'].add(-2*X1['exp_sq'])
X3['restriction_14'] = X1[1].add(X1[4])
X3['restriction_56'] = X1[5].add(X1[6])
model3 = sm.OLS(y,X3).fit()
model3.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.943
Method:,Least Squares,F-statistic:,11770.0
Date:,"Tue, 20 Mar 2018",Prob (F-statistic):,0.0
Time:,22:32:07,Log-Likelihood:,-4258.1
No. Observations:,4230,AIC:,8528.0
Df Residuals:,4224,BIC:,8566.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1761,0.002,115.699,0.000,0.173,0.179
2,-0.2092,0.663,-0.316,0.752,-1.509,1.090
3,0.4594,0.064,7.190,0.000,0.334,0.585
exp_restriction,-0.0386,0.002,-22.125,0.000,-0.042,-0.035
restriction_14,0.4906,0.025,19.752,0.000,0.442,0.539
restriction_56,0.3440,0.042,8.223,0.000,0.262,0.426

0,1,2,3
Omnibus:,955.114,Durbin-Watson:,1.76
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8447.434
Skew:,-0.823,Prob(JB):,0.0
Kurtosis:,9.725,Cond. No.,878.0
