In [67]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

FNAME = '../cps09mar.dta'
df = pandas.read_stata(FNAME)

In [68]:
list(df.columns.values)
maritalstatus = pandas.get_dummies(df['marital']).loc[:,(1,2,3,4,5,6)]

In [69]:
#part a set up
y = df['lwage']
X1 = df.loc[:,('educ','exp')]
X1['exp_sq'] = X1['exp']**2/100
X1 = pandas.concat([X1, maritalstatus], axis=1)

In [101]:
#part a model
X1 = sm.add_constant(X1)
model1 = sm.GLS(y,X1).fit()
model1.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.249
Model:,GLS,Adj. R-squared:,0.249
Method:,Least Squares,F-statistic:,1868.0
Date:,"Sun, 18 Mar 2018",Prob (F-statistic):,0.0
Time:,21:24:37,Log-Likelihood:,-44860.0
No. Observations:,50742,AIC:,89740.0
Df Residuals:,50732,BIC:,89830.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.9382,0.016,57.226,0.000,0.906,0.970
educ,0.1093,0.001,111.885,0.000,0.107,0.111
exp,0.0303,0.001,35.472,0.000,0.029,0.032
exp_sq,-0.0481,0.002,-28.010,0.000,-0.052,-0.045
1,0.1726,0.007,23.087,0.000,0.158,0.187
2,-0.0205,0.046,-0.444,0.657,-0.111,0.070
3,0.0450,0.025,1.832,0.067,-0.003,0.093
4,-0.0195,0.023,-0.831,0.406,-0.065,0.026
5,0.0334,0.011,3.142,0.002,0.013,0.054

0,1,2,3
Omnibus:,20028.317,Durbin-Watson:,1.744
Prob(Omnibus):,0.0,Jarque-Bera (JB):,518194.773
Skew:,-1.331,Prob(JB):,0.0
Kurtosis:,18.428,Cond. No.,518.0


In [98]:
#constrained least squares b4 = b7 and b8 = b9
X2 = X1.loc[:,('educ','exp','exp_sq', 2, 3)]

X2['r1'] = X1[1].add(X1[4])
X2['r2'] = X1[5].add(X1[6])
#print X2
model2 = sm.OLS(y,X2).fit()
model2.summary()

0,1,2,3
Dep. Variable:,lwage,R-squared:,0.96
Model:,OLS,Adj. R-squared:,0.96
Method:,Least Squares,F-statistic:,173800.0
Date:,"Sun, 18 Mar 2018",Prob (F-statistic):,0.0
Time:,21:00:09,Log-Likelihood:,-46478.0
No. Observations:,50742,AIC:,92970.0
Df Residuals:,50735,BIC:,93030.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1559,0.001,281.903,0.000,0.155,0.157
exp,0.0507,0.001,63.031,0.000,0.049,0.052
exp_sq,-0.0803,0.002,-47.948,0.000,-0.084,-0.077
2,0.0353,0.048,0.741,0.459,-0.058,0.129
3,0.1293,0.025,5.109,0.000,0.080,0.179
r1,0.1846,0.008,23.953,0.000,0.169,0.200
r2,0.0562,0.010,5.406,0.000,0.036,0.077

0,1,2,3
Omnibus:,18869.717,Durbin-Watson:,1.774
Prob(Omnibus):,0.0,Jarque-Bera (JB):,407365.004
Skew:,-1.273,Prob(JB):,0.0
Kurtosis:,16.645,Cond. No.,517.0


In [89]:
#part c, efficient minimum distance efficient minimum distance 

print model2.cov_HC0
model3 = sm.GLS(y,X2,weights=model2.cov_HC0).fit()
model3.summary()

[[  3.28698036e-07  -3.69026338e-07   7.51619439e-07  -1.66450445e-06
   -5.83668907e-07  -9.20579794e-07  -6.50841343e-07]
 [ -3.69026338e-07   8.32773992e-07  -1.79952644e-06  -1.41162013e-06
   -3.04107562e-06  -2.46671789e-06  -2.73251931e-06]
 [  7.51619382e-07  -1.79952644e-06   4.19840808e-06   2.98108353e-06
    5.34867650e-06   4.02226806e-06   4.18326590e-06]
 [ -1.66450457e-06  -1.41162047e-06   2.98108353e-06   1.66969269e-03
    3.62606552e-05   3.65487649e-05   3.57782774e-05]
 [ -5.83669078e-07  -3.04107562e-06   5.34867650e-06   3.62606552e-05
    6.74950134e-04   4.31876506e-05   4.45901569e-05]
 [ -9.20579680e-07  -2.46671789e-06   4.02226760e-06   3.65487649e-05
    4.31876506e-05   5.48057615e-05   4.46921840e-05]
 [ -6.50841343e-07  -2.73251931e-06   4.18326590e-06   3.57782737e-05
    4.45901569e-05   4.46921840e-05   9.80389887e-05]]


0,1,2,3
Dep. Variable:,lwage,R-squared:,0.96
Model:,GLS,Adj. R-squared:,0.96
Method:,Least Squares,F-statistic:,173800.0
Date:,"Sun, 18 Mar 2018",Prob (F-statistic):,0.0
Time:,19:50:51,Log-Likelihood:,-46478.0
No. Observations:,50742,AIC:,92970.0
Df Residuals:,50735,BIC:,93030.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1559,0.001,281.903,0.000,0.155,0.157
exp,0.0507,0.001,63.031,0.000,0.049,0.052
exp_sq,-0.0803,0.002,-47.948,0.000,-0.084,-0.077
2,0.0353,0.048,0.741,0.459,-0.058,0.129
3,0.1293,0.025,5.109,0.000,0.080,0.179
r1,0.1846,0.008,23.953,0.000,0.169,0.200
r2,0.0562,0.010,5.406,0.000,0.036,0.077

0,1,2,3
Omnibus:,18869.717,Durbin-Watson:,1.774
Prob(Omnibus):,0.0,Jarque-Bera (JB):,407365.004
Skew:,-1.273,Prob(JB):,0.0
Kurtosis:,16.645,Cond. No.,517.0


In [94]:
#part 4 find coefficients nonlinear constraints
from scipy.optimize import minimize 

y_matrix  = y.as_matrix()
scipy.optimize.lsq_linear(X2.as_matrix(), y_matrix )


In [97]:
#best way to do this without defining OLS from scratch 
#i.e. if constraint binds, the estimates will be different...


# b4 + 2*exp*b3 > 0
#since, exp > 0 
#b4 + 2*50*exp > 0

X3 = X1.loc[:,('educ', 2, 3)]
X3['exp'] = X1['exp'].add(-100*X1['exp_sq'])
X3['r1'] = X1[1].add(X1[4])
X3['r2'] = X1[5].add(X1[6])
model3 = sm.OLS(y,X3).fit()
model3.summary()


0,1,2,3
Dep. Variable:,lwage,R-squared:,0.957
Model:,OLS,Adj. R-squared:,0.957
Method:,Least Squares,F-statistic:,187400.0
Date:,"Sun, 18 Mar 2018",Prob (F-statistic):,0.0
Time:,19:58:29,Log-Likelihood:,-48406.0
No. Observations:,50742,AIC:,96820.0
Df Residuals:,50736,BIC:,96880.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
educ,0.1795,0.000,421.666,0.000,0.179,0.180
2,0.1422,0.049,2.875,0.004,0.045,0.239
3,0.3181,0.026,12.184,0.000,0.267,0.369
exp,-0.0002,5.17e-06,-40.452,0.000,-0.000,-0.000
r1,0.3705,0.007,50.072,0.000,0.356,0.385
r2,0.2684,0.010,26.273,0.000,0.248,0.288

0,1,2,3
Omnibus:,17540.744,Durbin-Watson:,1.78
Prob(Omnibus):,0.0,Jarque-Bera (JB):,332779.5
Skew:,-1.184,Prob(JB):,0.0
Kurtosis:,15.32,Cond. No.,14600.0


In [None]:
R = [0]
model1.wald_test