# probit-регрессия: z-тест

In [4]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_params # вывод результатов тестирования
from scipy.stats import norm # критические значения гауссова распределения

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
# импорт датасета
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/econometrica/main/econometrica2/data-csv/loanapp.csv', na_values=(' ', '', '  '))
# импорт данных из локального файла
# df = pd.read_csv('loanapp.csv')
df

Unnamed: 0,occ,loanamt,action,msa,suffolk,appinc,typur,unit,married,dep,...,approve,mortno,mortperf,mortlat1,mortlat2,chist,multi,loanprc,thick,white
0,1,89,1,1120,0,72,0,1.0,0.0,0.0,...,1,0,1,0,0,1,0.0,0.754237,0.0,1
1,1,128,3,1120,0,74,0,1.0,1.0,1.0,...,0,0,1,0,0,1,0.0,0.800000,1.0,1
2,1,128,1,1120,0,84,3,1.0,0.0,0.0,...,1,0,1,0,0,1,0.0,0.895105,1.0,1
3,1,66,1,1120,0,36,0,1.0,1.0,0.0,...,1,0,1,0,0,0,0.0,0.600000,0.0,1
4,1,120,1,1120,0,59,8,1.0,1.0,0.0,...,1,0,1,0,0,1,0.0,0.895522,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,1,158,1,1120,0,96,0,1.0,1.0,0.0,...,1,0,1,0,0,1,0.0,0.897727,0.0,1
1985,1,35,1,1120,0,169,1,1.0,1.0,0.0,...,1,1,0,0,0,1,0.0,0.111111,0.0,1
1986,2,225,1,1120,0,49,0,2.0,1.0,0.0,...,1,0,1,0,0,1,1.0,1.000000,0.0,1
1987,1,98,1,1120,1,110,1,1.0,0.0,0.0,...,1,1,0,0,0,1,0.0,0.455814,0.0,1


## Спецификация и подгонка модели
Для датасета `loanapp`
рассморим probit-регрессию **approve на appinc, mortno, unem, dep, male, married, yjob, self**

In [6]:
mod = smf.probit(formula='approve~appinc+mortno+unem+dep+male+married+yjob+self', data=df) # спецификация модели
res = mod.fit() # подгонка модели
res.summary() # отчет

Optimization terminated successfully.
         Current function value: 0.363578
         Iterations 6


0,1,2,3
Dep. Variable:,approve,No. Observations:,1971.0
Model:,Probit,Df Residuals:,1962.0
Method:,MLE,Df Model:,8.0
Date:,"Fri, 17 Nov 2023",Pseudo R-squ.:,0.02895
Time:,15:52:23,Log-Likelihood:,-716.61
converged:,True,LL-Null:,-737.98
Covariance Type:,nonrobust,LLR p-value:,9.865e-07

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.1418,0.109,10.512,0.000,0.929,1.355
appinc,-0.0005,0.000,-1.375,0.169,-0.001,0.000
mortno,0.4071,0.087,4.703,0.000,0.237,0.577
unem,-0.0308,0.016,-1.909,0.056,-0.062,0.001
dep,-0.0828,0.035,-2.355,0.019,-0.152,-0.014
male,0.0200,0.099,0.201,0.841,-0.175,0.215
married,0.2208,0.086,2.552,0.011,0.051,0.390
yjob,-0.0007,0.035,-0.020,0.984,-0.069,0.068
self,-0.1583,0.107,-1.483,0.138,-0.367,0.051


## z-тест
Пусть уровень значимости $\alpha=0.1=10\%$

In [None]:
# вывод результатов z-теста
summary_params(res)

### критическое значение ($z_{cr}$) стандартного гауссова распределения

In [None]:
sign_level = 0.1
norm.ppf(q=1-sign_level/2)

### Какие коэффициенты значимы? 

Отвергаем нулевую гипотезу при $|z|>z_{cr}$, `коэффициент значим`

Не отвергаем нулевую гипотезу при $|z|<z_{cr}$, `коэффициент незначим`

### Вывод о значимости коэффициентов, используя критическое значение

значимы: `mortno`, `unem`, `dep`, `married`

## Элементы z-теста

In [7]:
# тестовые z-статистики для кажлого коэффциента с округленим
res.tvalues.round(3)

Intercept    10.512
appinc       -1.375
mortno        4.703
unem         -1.909
dep          -2.355
male          0.201
married       2.552
yjob         -0.020
self         -1.483
dtype: float64

In [8]:
# P-значения для z-статистик с округленим
res.pvalues.round(4)

Intercept    0.0000
appinc       0.1691
mortno       0.0000
unem         0.0563
dep          0.0185
male         0.8408
married      0.0107
yjob         0.9840
self         0.1382
dtype: float64

### Какие коэффициенты значимы? 

Отвергаем нулевую гипотезу при $P<\alpha$, `коэффициент значим`

Не отвергаем нулевую гипотезу при $P>\alpha$, `коэффициент незначим`

### Вывод о значимости коэффициентов, используя  $P$-значение

значимы: `mortno`, `unem`, `dep`, `married`