In [1]:
# library imports 

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss
import numpy as np

In [2]:
# regression data
import random
from sklearn.datasets import make_classification

# rng
random.seed(0)

regression_params = {
     'n_samples':1000
    ,'n_features':5
    ,'n_informative':3
    ,'n_redundant':0
    ,'n_repeated':0
    ,'n_classes' : 2
    ,'n_clusters_per_class':1
    ,'random_state':0
    ,'class_sep' : .7
}

X,y = make_classification(**regression_params)

# add constant
X = np.concatenate([X,np.ones(shape=(len(X),1))],axis=1)

# train / test splits
X_train,X_test,y_train,y_test = train_test_split(X,y)

# statsmodels

In [3]:
import statsmodels.api as sm

In [4]:
sm_model = sm.Logit(endog=y_train,exog=X_train)

In [5]:
results = sm_model.fit(maxiter=10000)

Optimization terminated successfully.
         Current function value: 0.136023
         Iterations 9


In [6]:
print(results.summary())

                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                  750
Model:                          Logit   Df Residuals:                      744
Method:                           MLE   Df Model:                            5
Date:                Tue, 06 Jul 2021   Pseudo R-squ.:                  0.8038
Time:                        00:19:23   Log-Likelihood:                -102.02
converged:                       True   LL-Null:                       -519.84
Covariance Type:            nonrobust   LLR p-value:                2.254e-178
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -5.9049      0.594     -9.948      0.000      -7.068      -4.742
x2             0.0429      0.196      0.218      0.827      -0.342       0.428
x3            -7.5644      0.676    -11.185      0.0

In [7]:
log_loss(y_test,results.predict(X_test),normalize=False)

30.988019406777205

# sklearn

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
sk_model = LogisticRegression()

In [10]:
sk_model = sk_model.fit(X_train[:,0:-1],y_train)

In [11]:
sk_model.coef_

array([[-3.7297326 ,  0.06227502, -5.05295708,  0.03704701,  3.33401777]])

In [12]:
sk_model.intercept_

array([4.12888085])

In [13]:
log_loss(y_test,sk_model.predict_proba(X_test[:,0:-1])[:,1],normalize=False)

35.37877426988723

# pure python

In [15]:
# turn back into numpy arrays

X_train = X_train
y_train = y_train.reshape(-1,1)

In [16]:
# functions

def sigmoid(x):
    return 1 / (1+np.exp(-x))

def predict(x,beta):
    assert x.shape[-1] == beta.shape[0]
    return sigmoid(np.dot(x,beta))

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [23]:
# initial parameters 
beta = np.array([random.random() for _ in range(regression_params['n_features']+1)]).reshape(-1,1)
epochs = 100000
for epoch in range(epochs):
    grad = gradient(x=X_train,beta=beta,y_true=y_train)
    beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
beta

array([[-5.90482279],
       [ 0.04287568],
       [-7.56436576],
       [ 0.07196776],
       [ 5.11770353],
       [ 6.04713554]])

In [24]:
log_loss(y_test,predict(X_test,beta),normalize=False)

30.988029580241474