# logit-модель: подгонка

In [7]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [8]:
# импорт датасета
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/econometrica/main/econometrica2/data-csv/loanapp.csv', na_values=(' ', '', '  '))
# импорт данных из локального файла
# df = pd.read_csv('loanapp.csv')
df

Unnamed: 0,occ,loanamt,action,msa,suffolk,appinc,typur,unit,married,dep,...,approve,mortno,mortperf,mortlat1,mortlat2,chist,multi,loanprc,thick,white
0,1,89,1,1120,0,72,0,1.0,0.0,0.0,...,1,0,1,0,0,1,0.0,0.754237,0.0,1
1,1,128,3,1120,0,74,0,1.0,1.0,1.0,...,0,0,1,0,0,1,0.0,0.800000,1.0,1
2,1,128,1,1120,0,84,3,1.0,0.0,0.0,...,1,0,1,0,0,1,0.0,0.895105,1.0,1
3,1,66,1,1120,0,36,0,1.0,1.0,0.0,...,1,0,1,0,0,0,0.0,0.600000,0.0,1
4,1,120,1,1120,0,59,8,1.0,1.0,0.0,...,1,0,1,0,0,1,0.0,0.895522,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,1,158,1,1120,0,96,0,1.0,1.0,0.0,...,1,0,1,0,0,1,0.0,0.897727,0.0,1
1985,1,35,1,1120,0,169,1,1.0,1.0,0.0,...,1,1,0,0,0,1,0.0,0.111111,0.0,1
1986,2,225,1,1120,0,49,0,2.0,1.0,0.0,...,1,0,1,0,0,1,1.0,1.000000,0.0,1
1987,1,98,1,1120,1,110,1,1.0,0.0,0.0,...,1,1,0,0,0,1,0.0,0.455814,0.0,1


## Спецификация и подгонка модели
Для датасета `loanapp`
рассморим logit-регрессию **approve на appinc, mortno, unem, dep, male, married, yjob, self**

Спецификация: 
$$P(approve=1)=\Lambda(\beta_0+\beta_1appinc+\beta_2mortno+\beta_3unem+\beta_4dep+\beta_5male+\beta_6married+\beta_7yjob+\beta_8self)$$

Альтернативная спецификация:
$$logit(P(approve=1))=\beta_0+\beta_1appinc+\beta_2mortno+\beta_3unem+\beta_4dep+\beta_5male+\beta_6married+\beta_7yjob+\beta_8self$$

Здесь $$logit(P(approve=1))=\log\frac{P(approve=1)}{1-P(approve=1)}=\log\frac{P(approve=1)}{P(approve=0)}$$


In [9]:
mod = smf.logit(formula='approve~appinc+mortno+unem+dep+male+married+yjob+self', data=df) # спецификация модели
res = mod.fit() # подгонка модели

Optimization terminated successfully.
         Current function value: 0.363490
         Iterations 7


In [13]:
res.summary() # отчет

0,1,2,3
Dep. Variable:,approve,No. Observations:,1971.0
Model:,Logit,Df Residuals:,1962.0
Method:,MLE,Df Model:,8.0
Date:,"Fri, 17 Nov 2023",Pseudo R-squ.:,0.02919
Time:,15:58:44,Log-Likelihood:,-716.44
converged:,True,LL-Null:,-737.98
Covariance Type:,nonrobust,LLR p-value:,8.485e-07

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.9315,0.199,9.689,0.000,1.541,2.322
appinc,-0.0010,0.001,-1.472,0.141,-0.002,0.000
mortno,0.7868,0.172,4.571,0.000,0.449,1.124
unem,-0.0549,0.029,-1.866,0.062,-0.113,0.003
dep,-0.1608,0.065,-2.486,0.013,-0.288,-0.034
male,0.0300,0.186,0.161,0.872,-0.334,0.394
married,0.4246,0.162,2.615,0.009,0.106,0.743
yjob,-0.0065,0.065,-0.099,0.921,-0.134,0.121
self,-0.2804,0.197,-1.426,0.154,-0.666,0.105


In [11]:
# коэффициенты подогнанной модели
res.params

Intercept    1.931493
appinc      -0.001002
mortno       0.786834
unem        -0.054946
dep         -0.160826
male         0.029972
married      0.424632
yjob        -0.006462
self        -0.280392
dtype: float64

In [12]:
# коэффициенты подогнанной модели с округлением
res.params.round(3)

Intercept    1.931
appinc      -0.001
mortno       0.787
unem        -0.055
dep         -0.161
male         0.030
married      0.425
yjob        -0.006
self        -0.280
dtype: float64

## Интерпретация
- при увеличение дохода (`appinc`) на единицу отношение шансов уменьшается на 0,1\%
- для людей без ипотечной кредитной истории отношение шансов больше на 78.7\%, чем для людей с ипотечной кредитной историей
- при увеличение безработицы в отрасли (`unem`) на единицу отношение шансов уменьшается на 5.5\%
- при увеличение количества иждивенцев (`dep`) на единицу отношение шансов уменьшается на 16.1\%
- для мужчин отношение шансов больше на 3\%, чем для женщин
- для женатых (замужних) людей отношение шансов больше на 42.5\%, чем для холостых
- при увеличение стажа на текущей работе (`yjob`) на единицу отношение шансов уменьшается на 0.6\%
- для самозанятых людей отношение шансов меньше на 28\%, чем для несамозанятых

### Описание переменных датасета `loanapp`

Переменные используемые в модели
	
- `approve` - бинарная, 1 если кредитная заявка одобрена 
- `appinc` - доход заявителя  (в \$1000)
- `mortno` -  бинарная, 1 если нет ипотечной кредитной истории 
- `unem` - уровень безработицы в отрасли в \% 
- `dep` - количество иждивенцев 
- `male` - гендерный фактор 
- `married` - семейный статус 
- `yjob` - стаж на текущей работе 
- `self` - бинарная, 1 если самозанятый 