In [1]:
%reset -f

In [75]:
import numpy             as np
import statsmodels.api   as sm
import pandas            as pd
import seaborn           as sns

from statsmodels.discrete.discrete_model import LogitResults

from statsmodels.iolib.summary2 import summary_col

In [3]:
df = pd.read_csv("mroz9.csv")

In [4]:
df = sm.add_constant(df)
df.columns

Index(['const', 'inlf', 'kidslt6', 'kidsge6', 'age', 'educ', 'unem', 'exper',
       'nwifeinc', 'lnwage', 'expersq'],
      dtype='object')

In [5]:
df.head()

Unnamed: 0,const,inlf,kidslt6,kidsge6,age,educ,unem,exper,nwifeinc,lnwage,expersq
0,1.0,1,1,0,32,12,5.0,14,10.91006,1.210154,196
1,1.0,1,0,2,30,12,11.0,5,19.499981,0.328512,25
2,1.0,1,1,3,35,12,5.0,15,12.03991,1.514138,225
3,1.0,1,0,3,34,12,5.0,6,6.799996,0.092123,36
4,1.0,1,1,2,31,14,9.5,7,20.100058,1.524272,49


In [7]:
df.describe()

Unnamed: 0,const,inlf,kidslt6,kidsge6,age,educ,unem,exper,nwifeinc,lnwage,expersq
count,753.0,753.0,753.0,753.0,753.0,753.0,753.0,753.0,753.0,428.0,753.0
mean,1.0,0.568393,0.237716,1.353254,42.537849,12.286853,8.623506,10.63081,20.128964,1.190173,178.038513
std,0.0,0.49563,0.523959,1.319874,8.072574,2.280246,3.114934,8.06913,11.634797,0.723198,249.630849
min,1.0,0.0,0.0,0.0,30.0,5.0,3.0,0.0,-0.029057,-2.054164,0.0
25%,1.0,0.0,0.0,0.0,36.0,12.0,7.5,4.0,13.02504,0.816509,16.0
50%,1.0,1.0,0.0,1.0,43.0,12.0,7.5,9.0,17.700001,1.247574,81.0
75%,1.0,1.0,0.0,2.0,49.0,13.0,11.0,15.0,24.466,1.603571,225.0
max,1.0,1.0,3.0,8.0,60.0,17.0,14.0,45.0,96.0,3.218876,2025.0


In [None]:
X = df[['const', 'nwifeinc', 'educ', 'exper', 'expersq', 'age', 'kidslt6']]
Y = df[['inlf']]

# OLS coefficients and Marginal effect table
## since OLS coefficient is the ME, there is no need to output another table

In [82]:
ols_mod = sm.OLS(Y, X)
ols_res = ols_mod.fit(cov_type='HC1')

print(ols_res.summary())

                            OLS Regression Results                            
Dep. Variable:                   inlf   R-squared:                       0.263
Model:                            OLS   Adj. R-squared:                  0.257
Method:                 Least Squares   F-statistic:                     72.28
Date:                Mon, 24 May 2021   Prob (F-statistic):           5.53e-71
Time:                        12:26:20   Log-Likelihood:                -424.38
No. Observations:                 753   AIC:                             862.8
Df Residuals:                     746   BIC:                             895.1
Df Model:                           6                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.6509      0.140      4.634      0.0

# Probit coefficients and Marginal effect table

In [83]:
probit_mod = sm.Probit(Y, X)
probit_res = probit_mod.fit(cov_type='HC1')

print(probit_res.summary())

Optimization terminated successfully.
         Current function value: 0.533394
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:                   inlf   No. Observations:                  753
Model:                         Probit   Df Residuals:                      746
Method:                           MLE   Df Model:                            6
Date:                Mon, 24 May 2021   Pseudo R-squ.:                  0.2199
Time:                        12:26:30   Log-Likelihood:                -401.65
converged:                       True   LL-Null:                       -514.87
Covariance Type:                  HC1   LLR p-value:                 4.369e-46
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.4634      0.449      1.031      0.302      -0.417       1.344
nwifeinc      -0.0118      0.

In [86]:
probit_me = probit_res.get_margeff()
print(probit_me.summary())

       Probit Marginal Effects       
Dep. Variable:                   inlf
Method:                          dydx
At:                           overall
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
nwifeinc      -0.0036      0.002     -2.253      0.024      -0.007      -0.000
educ           0.0387      0.007      5.271      0.000       0.024       0.053
exper          0.0368      0.005      7.092      0.000       0.027       0.047
expersq       -0.0006      0.000     -3.205      0.001      -0.001      -0.000
age           -0.0167      0.002     -7.713      0.000      -0.021      -0.012
kidslt6       -0.2652      0.031     -8.446      0.000      -0.327      -0.204


# Loit coefficients and Marginal effect table

In [87]:
logit_mod = sm.Logit(Y, X)
logit_res = logit_mod.fit(cov_type='HC1')

print(logit_res.summary())

Optimization terminated successfully.
         Current function value: 0.533983
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                   inlf   No. Observations:                  753
Model:                          Logit   Df Residuals:                      746
Method:                           MLE   Df Model:                            6
Date:                Mon, 24 May 2021   Pseudo R-squ.:                  0.2191
Time:                        12:26:43   Log-Likelihood:                -402.09
converged:                       True   LL-Null:                       -514.87
Covariance Type:                  HC1   LLR p-value:                 6.756e-46
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.7502      0.758      0.990      0.322      -0.736       2.236
nwifeinc      -0.0210      0.

In [88]:
logit_me = logit_res.get_margeff()
print(logit_me.summary())

        Logit Marginal Effects       
Dep. Variable:                   inlf
Method:                          dydx
At:                           overall
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
nwifeinc      -0.0038      0.002     -2.350      0.019      -0.007      -0.001
educ           0.0389      0.007      5.224      0.000       0.024       0.054
exper          0.0364      0.005      7.050      0.000       0.026       0.047
expersq       -0.0006      0.000     -3.200      0.001      -0.001      -0.000
age           -0.0165      0.002     -7.572      0.000      -0.021      -0.012
kidslt6       -0.2625      0.032     -8.220      0.000      -0.325      -0.200


In [89]:
hypotheses = '(exper = expersq), (exper = 0)'
f_test = logit_res.wald_test(hypotheses)

print(f_test)

<Wald test (chi2): statistic=[[90.87993652]], p-value=1.843624736087094e-20, df_denom=2>


# GLM coefficient comparison table

In [90]:
info_dict={'R2' : lambda x: f"{x.rsquared:.2f}",
           'Pseudo R2' : lambda x: f"{x.prsquared:.2f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[ols_res, probit_res, logit_res],
                            float_format='%0.4f',
                            stars = True,
                            model_names=['OLS',
                                         'Probit',
                                         'Logit'],
                            info_dict=info_dict,
                            regressor_order=['const','nwifeinc', 'educ', 
                                             'exper', 'expersq', 'age', 
                                             'kidslt6'])

results_table.add_title('GLM coefficient comparison')

print(results_table)

           GLM coefficient comparison
                    OLS       Probit     Logit   
-------------------------------------------------
const            0.6509***  0.4634     0.7502    
                 (0.1405)   (0.4493)   (0.7581)  
nwifeinc         -0.0033**  -0.0118**  -0.0210** 
                 (0.0015)   (0.0053)   (0.0091)  
educ             0.0373***  0.1287***  0.2176*** 
                 (0.0072)   (0.0256)   (0.0441)  
exper            0.0391***  0.1221***  0.2037*** 
                 (0.0058)   (0.0187)   (0.0319)  
expersq          -0.0006*** -0.0019*** -0.0031***
                 (0.0002)   (0.0006)   (0.0010)  
age              -0.0169*** -0.0553*** -0.0922***
                 (0.0023)   (0.0078)   (0.0135)  
kidslt6          -0.2655*** -0.8809*** -1.4682***
                 (0.0317)   (0.1158)   (0.2023)  
R-squared        0.2633                          
R-squared Adj.   0.2573                          
R2               0.26                            
Pseudo R2   

# GLM ME table
## note that, there is no Marginal Effect comparison table; hence, I am writing one myself

In [91]:
me_OLS_col = np.array(ols_res.params[1::])
p_OLS_col  = np.array(ols_res.pvalues[1::])
me_Pro_col = probit_me.margeff
p_Pro_col  = probit_me.pvalues
me_Log_col = logit_me.margeff
p_Log_col  = logit_me.pvalues

df1 = pd.concat([pd.DataFrame({'vari':['nwifeinc','educ', 'exper', 'expersq', 'age', 'kidslt6']}), 
                 pd.DataFrame({'OLS ME': me_OLS_col.round(4)}),
                 pd.DataFrame({'OLS p-val': p_OLS_col.round(2)}),
                 pd.DataFrame({'Probit ME': me_Pro_col.round(4)}),
                 pd.DataFrame({'Probit p-val': p_Pro_col.round(2)}),
                 pd.DataFrame({'Logit ME': me_Log_col.round(4)}),
                 pd.DataFrame({'Logit p-val': p_Log_col.round(2)})],
                axis=1, join='inner')

In [92]:
df1

Unnamed: 0,vari,OLS ME,OLS p-val,Probit ME,Probit p-val,Logit ME,Logit p-val
0,nwifeinc,-0.0033,0.03,-0.0036,0.02,-0.0038,0.02
1,educ,0.0373,0.0,0.0387,0.0,0.0389,0.0
2,exper,0.0391,0.0,0.0368,0.0,0.0364,0.0
3,expersq,-0.0006,0.0,-0.0006,0.0,-0.0006,0.0
4,age,-0.0169,0.0,-0.0167,0.0,-0.0165,0.0
5,kidslt6,-0.2655,0.0,-0.2652,0.0,-0.2625,0.0


In [93]:
!rm -rf Week10_Python.html
!jupyter nbconvert --to html Week10_Python.ipynb

[NbConvertApp] Converting notebook Week10_Python.ipynb to html
[NbConvertApp] Writing 609519 bytes to Week10_Python.html
