In [3]:
import statsmodels.discrete as sd
import statsmodels.api as sm
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import datasets
iris = datasets.load_iris()

Regression where underlying variable has only 2 possible values = Binary classification.
Same case as logistic regression, MLE estimator.

        $P(Y=1|X) = \phi(X^{T} \beta)$; $\phi$ cdf
        
        $Y^{*} = X^{T} \beta + \epsilon$, $Y = 1_{Y*>0}$

Assumption: iid (normally) error terms.

McFaddens pseudo R²: $1 - \frac{ln \hat{L} (M_{Full})}{ln \hat{L} (M_{Intercept})}$ likelihood of model of interest/the one where all coeffs except intercept one are restricted to 0.
R² of McKelvey and Zaloinas, AIC,...
        
If many observations per cell, Berkson's minimum $\chi^{2}$ method.

http://blog.yhat.com/posts/logistic-regression-and-python.html

In [10]:
df_iris = pd.DataFrame({'sepal length': iris.data[:,0], 'sepal width': iris.data[:,1], 'petal length': iris.data[:,2], 'petal width': iris.data[:,3], 'target': iris.target})
df_iris['target binary'] = np.where(df_iris['target']>0, 1, 0)
spector_data = sm.datasets.spector.load_pandas()
spector_data.exog = sm.tools.add_constant(spector_data.exog)

#Endog: our binary variable, exog explaining factors.
#missing: none by default, can take drop
#can replace fit by fit_regularized

In [49]:
# Logit Model
logit_mod = sd.discrete_model.Logit(spector_data.endog, spector_data.exog, missing='none')
logit_res = logit_mod.fit()
print(logit_res.summary())

print("\nConfidence intervals", logit_res.conf_int())

print("\nOdd ratios")

params = logit_res.params
conf = logit_res.conf_int()
conf['Odd Ratio'] = params
conf.columns = ['2.5%', '97.5%', 'Odd Ratio']
print(np.exp(conf))

l_pred = logit_res.predict(spector_data.exog)
#print(l_pred)

Optimization terminated successfully.
         Current function value: 0.402801
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                  GRADE   No. Observations:                   32
Model:                          Logit   Df Residuals:                       28
Method:                           MLE   Df Model:                            3
Date:                Wed, 18 Sep 2019   Pseudo R-squ.:                  0.3740
Time:                        15:55:10   Log-Likelihood:                -12.890
converged:                       True   LL-Null:                       -20.592
Covariance Type:            nonrobust   LLR p-value:                  0.001502
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -13.0213      4.931     -2.641      0.008     -22.687      -3.356
GPA            2.8261      1.

In [53]:
# Probit Model
probit_mod = sd.discrete_model.Probit(spector_data.endog, spector_data.exog, missing='none')
probit_res = probit_mod.fit()
print(probit_res.summary())

print("\nConfidence intervals", probit_res.conf_int())

print("\nOdd ratios")
params = probit_res.params
conf = probit_res.conf_int()
conf['Odd Ratio'] = params
conf.columns = ['2.5%', '97.5%', 'Odd Ratio']
print(np.exp(conf))

l_pred = probit_res.predict(spector_data.exog)
#print(l_pred)

Optimization terminated successfully.
         Current function value: 0.400588
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:                  GRADE   No. Observations:                   32
Model:                         Probit   Df Residuals:                       28
Method:                           MLE   Df Model:                            3
Date:                Wed, 18 Sep 2019   Pseudo R-squ.:                  0.3775
Time:                        15:57:27   Log-Likelihood:                -12.819
converged:                       True   LL-Null:                       -20.592
Covariance Type:            nonrobust   LLR p-value:                  0.001405
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -7.4523      2.542     -2.931      0.003     -12.435      -2.469
GPA            1.6258      0.

           2.5%      97.5%  Odd Ratio
const  0.000004   0.084655   0.000580
GPA    1.304507  19.802233   5.082534
TUCE   0.893425   1.241290   1.053090
PSI    1.297033  13.364276   4.163401
