In [44]:
%reset -f

In [45]:
import numpy                 as np
import pandas                as pd
import statsmodels.api       as sm

In [46]:
dataset = pd.read_excel('cps09mar.xlsx')

In [47]:
dataset.columns

Index(['age', 'female', 'hisp', 'education', 'earnings', 'hours', 'week',
       'union', 'uncov', 'region', 'race', 'marital'],
      dtype='object')

In [49]:
#[1]
#generate variables
dataset['lwage'] = np.log(dataset.earnings)
dataset['experience'] = dataset.age - dataset.education - 6
dataset['experience2'] = np.square(dataset.experience)/100

In [50]:
dataset.head()

Unnamed: 0,age,female,hisp,education,earnings,hours,week,union,uncov,region,race,marital,lwage,experience,experience2
0,52,0,0,12,146000,45,52,0,0,1,1,1,11.891362,34,11.56
1,38,0,0,18,50000,45,52,0,0,1,1,1,10.819778,14,1.96
2,38,0,0,14,32000,40,51,0,0,1,1,1,10.373491,18,3.24
3,41,1,0,13,47000,40,52,0,0,1,1,1,10.757903,22,4.84
4,42,0,0,13,161525,50,52,1,0,1,1,1,11.992415,23,5.29


In [51]:
#Restrict the to individuals with less than 45 years experience
dataset = dataset[dataset['experience']<45]

Estimate the regression
<font size = 4>$$ \log(wage)=\beta_0+\beta_1*education+\beta_2*experience+\beta_3*experience^2+e $$</font>

In [64]:
X = dataset[['education', 'experience', 'experience2']]
X = sm.add_constant(X)
Y = dataset[['lwage']]

In [65]:
OLS_model = sm.OLS(Y, X)
OLS_1 = OLS_model.fit()
print(OLS_1.summary())
print(OLS_1.ssr)

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.261
Model:                            OLS   Adj. R-squared:                  0.261
Method:                 Least Squares   F-statistic:                     5814.
Date:                Fri, 06 Oct 2023   Prob (F-statistic):               0.00
Time:                        21:27:07   Log-Likelihood:                -45028.
No. Observations:               49477   AIC:                         9.006e+04
Df Residuals:                   49473   BIC:                         9.010e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           8.4178      0.018    479.980      

<font size=4>So now we know that $$R^2=0.261,\,and\,SSR=17882.08$$</font>


In [66]:
#[2]
#Regress log(wage) on experience and experience^2
X2 = dataset[['experience', 'experience2']]
X2 = sm.add_constant(X2)

In [67]:
OLS_model2 = sm.OLS(Y, X2)
OLS_2 = OLS_model2.fit()
print(OLS_2.summary())
res_2 = OLS_2.resid #Save the residual from the regression

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                  0.037
Method:                 Least Squares   F-statistic:                     946.4
Date:                Fri, 06 Oct 2023   Prob (F-statistic):               0.00
Time:                        21:27:15   Log-Likelihood:                -51570.
No. Observations:               49477   AIC:                         1.031e+05
Df Residuals:                   49474   BIC:                         1.032e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          10.2075      0.011    924.044      

In [68]:
#Regress education on experience and experience^2
Y3 = dataset[['education']]

In [69]:
OLS_model3 = sm.OLS(Y3, X2)
OLS_3 = OLS_model3.fit()
print(OLS_3.summary())
res_3 = OLS_3.resid #save the residual from the regression

                            OLS Regression Results                            
Dep. Variable:              education   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     591.3
Date:                Fri, 06 Oct 2023   Prob (F-statistic):          1.71e-254
Time:                        21:27:31   Log-Likelihood:            -1.1860e+05
No. Observations:               49477   AIC:                         2.372e+05
Df Residuals:                   49474   BIC:                         2.372e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          14.3906      0.043    336.111      

In [71]:
#Regress residules on residules
OLS_model4 = sm.OLS(res_2, sm.add_constant(res_3))
OLS_4 = OLS_model4.fit()
print(OLS_4.summary())
print(OLS_4.ssr)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.232
Model:                            OLS   Adj. R-squared:                  0.232
Method:                 Least Squares   F-statistic:                 1.498e+04
Date:                Fri, 06 Oct 2023   Prob (F-statistic):               0.00
Time:                        21:28:35   Log-Likelihood:                -45028.
No. Observations:               49477   AIC:                         9.006e+04
Df Residuals:                   49475   BIC:                         9.008e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       5.876e-15      0.003   2.17e-12      1.0

<font size=4>From the result, we know that$$the\,estimated\,coefficient=0.1244,\,R^2=0.232,\,and\,SSR=17882.08$$</font>
<font size=4>Thus, the estimate is the same as [1], the SSR is also the same, but R^2 is less than [1]</font>