# logit/probit-регрессии: предельные значения

In [39]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## labour force equation

Для датасета [mroz_Green](http://www.stern.nyu.edu/~wgreene/Text/Edition7/TableF5-1.csv)
рассморим регрессию **LFP на WA, log(FAMINC), WE, KL6, K618, CIT, UN**
трёх спецификаций:

- LPM
- logit
- probit

In [40]:
# подключим датасет mroz_Greene по ссылке 
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/econometrica/main/econometrica2/data-csv/TableF5-1.csv', na_values=(' ', '', '  '))
#подключим датасет mroz_Greene из локального файла
#df = pd.read_csv('TableF5-1.csv', na_values=(' ', '', '  '))

## Спецификация и подгонка

In [41]:
# LPM 
res_lpm_hc = smf.ols(formula='LFP~WA+np.log(FAMINC)+WE+KL6+K618+CIT+UN', data=df).fit(cov_type='HC3')

In [42]:
# logit
res_logit = smf.logit(formula='LFP~WA+np.log(FAMINC)+WE+KL6+K618+CIT+UN', data=df).fit()

Optimization terminated successfully.
         Current function value: 0.614104
         Iterations 5


In [43]:
# probit
res_probit = smf.probit(formula='LFP~WA+np.log(FAMINC)+WE+KL6+K618+CIT+UN', data=df).fit()

Optimization terminated successfully.
         Current function value: 0.614282
         Iterations 5


In [44]:
# Сравнение моделей
print(summary_col([res_lpm_hc, res_logit, res_probit], model_names=['Robust LPM', 'logit', 'probit'], float_format="%.3f", stars=True, info_dict = {'Observations': lambda x: '{0:d}'.format(int(x.nobs)), 'Log Likelihood': lambda x: '{:.3f}'.format(x.llf), 'Akaike Inf. Crit.': lambda x: '{:.3f}'.format(x.aic)}))


                  Robust LPM   logit     probit 
------------------------------------------------
CIT               -0.048     -0.214    -0.126   
                  (0.037)    (0.176)   (0.107)  
Intercept         0.079      -1.856    -1.108   
                  (0.362)    (1.679)   (1.014)  
K618              -0.018     -0.095    -0.057   
                  (0.014)    (0.067)   (0.040)  
KL6               -0.302***  -1.443*** -0.868***
                  (0.034)    (0.194)   (0.112)  
R-squared         0.130                         
R-squared Adj.    0.122                         
UN                -0.004     -0.017    -0.011   
                  (0.006)    (0.026)   (0.016)  
WA                -0.013***  -0.063*** -0.038***
                  (0.002)    (0.013)   (0.008)  
WE                0.038***   0.179***  0.108*** 
                  (0.008)    (0.040)   (0.024)  
np.log(FAMINC)    0.075**    0.341**   0.205**  
                  (0.038)    (0.172)   (0.104)  
Observations      7

## Предельные значения
Для вычисления нужно указать указать параметр `at`:
* `mean` для вычисления предельного значения в средней точке
* `overall` для вычисления среднего по выборке предельного значения

[Документация](https://www.statsmodels.org/dev/generated/statsmodels.discrete.discrete_model.LogitResults.get_margeff.html) 

### Предельные значения для каждого регрессора в средней точке для logit модели

In [45]:
margeff_logit = res_logit.get_margeff(at='mean')
# вывод результатов
margeff_logit.summary()
# краткий отчёт
# margeff_logit.summary_frame() 

0,1
Dep. Variable:,LFP
Method:,dydx
At:,mean

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
WA,-0.0154,0.003,-5.006,0.0,-0.021,-0.009
np.log(FAMINC),0.0835,0.042,1.982,0.048,0.001,0.166
WE,0.0439,0.01,4.45,0.0,0.025,0.063
KL6,-0.3527,0.048,-7.395,0.0,-0.446,-0.259
K618,-0.0232,0.016,-1.416,0.157,-0.055,0.009
CIT,-0.0522,0.043,-1.211,0.226,-0.137,0.032
UN,-0.0043,0.006,-0.675,0.5,-0.017,0.008


### Средние по выборке предельные значения для каждого регрессора в средней точке для logit модели

In [46]:
margeff_logit = res_logit.get_margeff(at='overall')
# вывод результатов
margeff_logit.summary()
# краткий отчёт
# margeff_logit.summary_frame() 

0,1
Dep. Variable:,LFP
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
WA,-0.0134,0.003,-5.324,0.0,-0.018,-0.008
np.log(FAMINC),0.0727,0.036,2.001,0.045,0.001,0.144
WE,0.0382,0.008,4.667,0.0,0.022,0.054
KL6,-0.3074,0.036,-8.642,0.0,-0.377,-0.238
K618,-0.0202,0.014,-1.422,0.155,-0.048,0.008
CIT,-0.0455,0.037,-1.215,0.224,-0.119,0.028
UN,-0.0037,0.005,-0.676,0.499,-0.014,0.007


### Предельные значения для каждого регрессора в средней точке для logit модели

In [47]:
margeff_probit = res_probit.get_margeff(at='mean')
# вывод результатов
margeff_probit.summary()
# краткий отчёт
# margeff_probit.summary_frame() 

0,1
Dep. Variable:,LFP
Method:,dydx
At:,mean

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
WA,-0.0149,0.003,-5.053,0.0,-0.021,-0.009
np.log(FAMINC),0.0805,0.041,1.973,0.049,0.001,0.161
WE,0.0425,0.009,4.504,0.0,0.024,0.061
KL6,-0.3405,0.044,-7.738,0.0,-0.427,-0.254
K618,-0.0223,0.016,-1.407,0.16,-0.053,0.009
CIT,-0.0492,0.042,-1.17,0.242,-0.132,0.033
UN,-0.0041,0.006,-0.67,0.503,-0.016,0.008


### Средние по выборке предельные значения для каждого регрессора в средней точке для probit модели

In [48]:
margeff_probit = res_probit.get_margeff(at='overall')
# вывод результатов
margeff_probit.summary()
# краткий отчёт
# margeff_probit.summary_frame() 

0,1
Dep. Variable:,LFP
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
WA,-0.0134,0.003,-5.314,0.0,-0.018,-0.008
np.log(FAMINC),0.072,0.036,1.987,0.047,0.001,0.143
WE,0.038,0.008,4.686,0.0,0.022,0.054
KL6,-0.3044,0.035,-8.818,0.0,-0.372,-0.237
K618,-0.0199,0.014,-1.412,0.158,-0.048,0.008
CIT,-0.044,0.038,-1.173,0.241,-0.118,0.03
UN,-0.0037,0.005,-0.671,0.502,-0.014,0.007
