In [None]:
import os
import sys
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set()

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def logit(x):
    return np.log(x / (1 - x))

## Logistic regression

$\mathsf{logit}(p) = \beta_0 + \beta_1 x$

In [None]:
n = 200
cutoff = 30

X = np.random.randint(low=0, high=100, size=n)

noise = np.random.binomial(n=1, p=0.05, size=n).astype(bool)
mask = (X >= cutoff) ^ noise

y = mask.astype(int)

# fig logistic regression model
reg = LogisticRegression()
reg.fit(X.reshape(-1,1), y)
m = reg.coef_.item()
b = reg.intercept_.item()

print(m, b)

# logit(p) = mx + b => p = sigmoid(mx + b)
plt.figure(figsize=(12, 4))

plt.scatter(X, y, s=5, label='data')
plt.vlines(cutoff, 0, 1, linestyle='--', alpha=0.5, lw=2, color='r', label='cutoff')

xs = np.linspace(X.min(), X.max(), 100)
plt.plot(xs, sigmoid(m * xs + b), lw=2, color='r', label='model')

plt.legend()
pass

In [None]:
sm.Logit(y, sm.add_constant(X)).fit().summary()

In [None]:
# https://stats.stackexchange.com/questions/89484/how-to-compute-the-standard-errors-of-a-logistic-regressions-coefficients

predProbs = reg.predict_proba(X.reshape(-1, 1))

# Design matrix -- add column of 1's at the beginning of your X_train matrix
X_design = np.hstack([np.ones((X.shape[0], 1)), X.reshape(-1, 1)])

# Initiate matrix of 0's, fill diagonal with each predicted observation's variance
V = np.diagflat(np.product(predProbs, axis=1))

print("Logit params:\n", np.round(logitParams, 4))

# Covariance matrix
covLogit = np.linalg.inv(X_design.T @ V @ X_design)
print("Covariance matrix:\n", np.round(covLogit, 3))

# Standard errors
print("Standard errors:\n", np.round(np.sqrt(np.diag(covLogit)), 3))

# Wald statistic (coefficient / s.e.) ^ 2
logitParams = np.insert(reg.coef_, 0, reg.intercept_)
print("z statistics:\n", np.round(logitParams / np.sqrt(np.diag(covLogit)),3))