# logit/probit-регрессия: Качество подгонки

In [None]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# импорт данных
df = pd.read_csv('Filename.csv')

## Спецификация и подгонка

In [None]:
mod = smf.logit(formula = , data = df)
# mod = smf.probit(formula = , data = df)
res = mod.fit()

## Качество подгонки. Базовые показатели
McDadden's $R^2$
$$ 
	R^2_{pseudo}=1-\frac{\log L_{full}}{\log L_{null}}
$$

In [None]:
res.prsquared

McFadden’s Adjusted $R^2$
$$ 
	R^2_{adj}=1-\frac{\log L_{full}-k}{\log L_{null}}
$$

In [None]:
1-(res.llf-res.df_model)/res.llnull

Cox & Snell $R^2$
$$
	R^2_{C\& S}=1-\left(\frac{L_{null}}{L_{full}}\right)^{2/n}=1-\left(\frac{\exp(\log L_{null})}{\exp(\log L_{full})}\right)^{2/n}=
	1-\exp\left(\frac{2}{n}(\log L_{null}-\log L_{full})\right)=1-\exp\left(-\frac{LR_{overall}}{n}\right)
$$

In [None]:
1-np.exp(-res.llr/res.nobs)

Nagelkerke / Cragg & Uhler $R^2$
$$
	R^2_{N,C\& U}=\frac{1-\left(\frac{L_{null}}{L_{full}}\right)^{2/n}}{1-L_{null}^{2/n}}=
	\frac{1-\exp\left(-\frac{LR}{n}\right)}{1-\exp(2\log L_{null}/n)}
$$

In [None]:
(1-np.exp(-res.llr/res.nobs))/(1-np.exp(2*res.llnull/res.nobs))

Efron's $R^2$
$$
	R^2_{Efron}=1-\frac{\sum(y_i-\hat{P}_i)^2}{\sum(y_i-\bar{y})^2}=1-\frac{\sum(y_i-\hat{P}_i)^2}{n Var(y)}
$$

In [None]:
1-(np.sum(res.resid_response**2))/(res.nobs*np.var(mod.endog))

McKelvey & Zavoina's $R^2$
\begin{align*}
	R^2_{logit}&=\frac{Var(\hat{P})}{Var(\hat{P})+\pi^2/3} & R^2_{probit}&=\frac{Var(\hat{P})}{Var(\hat{P})+1}
\end{align*}

In [None]:
y_prob = res.predict(mod.exog, transform=False)

# logit
np.var(y_prob)/(np.var(y_prob)+np.pi**2/3)

# probit
# np.var(y_prob)/(np.var(y_prob)+1)