In [1]:
# library imports 

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss
import numpy as np

In [2]:
# regression data
import random
from sklearn.datasets import make_classification

# rng
random.seed(0)

regression_params = {
     'n_samples':1000
    ,'n_features':5
    ,'n_informative':3
    ,'n_redundant':0
    ,'n_repeated':0
    ,'n_classes' : 2
    ,'n_clusters_per_class':1
    ,'random_state':0
    ,'class_sep' : .7
}

X,y = make_classification(**regression_params)

# add constant
X = np.concatenate([X,np.ones(shape=(len(X),1))],axis=1)

# train / test splits
X_train,X_test,y_train,y_test = train_test_split(X,y)

# statsmodels

In [3]:
import statsmodels.api as sm

In [4]:
sm_model = sm.Logit(endog=y_train,exog=X_train)

In [5]:
results = sm_model.fit_regularized(maxiter=10000)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.14009426893867571
            Iterations: 41
            Function evaluations: 41
            Gradient evaluations: 41


In [6]:
print(results.summary())

                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                  750
Model:                          Logit   Df Residuals:                      744
Method:                           MLE   Df Model:                            5
Date:                Sat, 28 Jan 2023   Pseudo R-squ.:                  0.7979
Time:                        13:03:50   Log-Likelihood:                -105.07
converged:                       True   LL-Null:                       -519.82
Covariance Type:            nonrobust   LLR p-value:                4.811e-177
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -5.7761      0.574    -10.055      0.000      -6.902      -4.650
x2             0.0247      0.190      0.130      0.897      -0.349       0.398
x3            -7.7236      0.691    -11.178      0.0

In [7]:
log_loss(y_test,results.predict(X_test),normalize=False)

27.4905309020301

In [8]:
roc_auc_score(y_test,results.predict(X_test))

0.9866803278688525

# sklearn

In [9]:
from sklearn.linear_model import LogisticRegression

In [10]:
sk_model = LogisticRegression(C=1e9)

In [11]:
sk_model = sk_model.fit(X_train[:,0:-1],y_train)

In [12]:
sk_model.coef_

array([[-5.77607452,  0.02470264, -7.72351765,  0.04881045,  5.1807721 ]])

In [13]:
sk_model.intercept_

array([5.94364052])

In [14]:
log_loss(y_test,sk_model.predict_proba(X_test[:,0:-1])[:,1],normalize=False)

27.49055168355615

In [15]:
roc_auc_score(y_test,sk_model.predict_proba(X_test[:,0:-1])[:,1])

0.9866803278688525

# pure python

In [16]:
# turn back into numpy arrays

X_train = X_train
y_train = y_train.reshape(-1,1)

In [17]:
# functions

def sigmoid(x):
    if isinstance(x,np.ndarray):
        return 1/(1+np.exp(-x))
    elif isinstance(x,torch.Tensor):
        return 1/(1+torch.exp(-x))
    else:
        raise NotImplementedError("Invalid type.")

def predict(x,beta):
    assert x.shape[-1] == beta.shape[0]
    return sigmoid(np.dot(x,beta))

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [18]:
# initial parameters 
beta = np.array([random.random() for _ in range(regression_params['n_features']+1)]).reshape(-1,1)
epochs = 100000
for epoch in range(epochs):
    grad = gradient(x=X_train,beta=beta,y_true=y_train)
    beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
beta

array([[-5.77601708],
       [ 0.0247123 ],
       [-7.72345363],
       [ 0.04882082],
       [ 5.1807325 ],
       [ 5.94360082]])

In [19]:
log_loss(y_test,predict(X_test,beta),normalize=False)

27.49056043808065

In [20]:
roc_auc_score(y_test,predict(X_test,beta))

0.9866803278688525

# pytorch

In [21]:
import torch.nn
import torch
from torch.optim import SGD

In [22]:
class Logistic(torch.nn.Module):
    def __init__(self,n_features):
        super(Logistic,self).__init__()
        
        # (n_features,1) shape tensor
        self.__beta = torch.nn.Parameter(torch.rand(n_features,1))
        
    def forward(self,x):
        # x = (n_samples,n_features) 
        return sigmoid(torch.matmul(x.double(),self.beta.double())).double()
    
    def predict(self,x):
        return sigmoid(np.dot(x,self.beta.detach().numpy()))
    
    @property
    def beta(self):
        return self.__beta

In [23]:
# make training sets tensors

X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train).reshape(-1,1)

In [24]:
# +1 for intercept 
torch_model = Logistic(regression_params['n_features']+1)

epochs = 100000
optimizer = SGD(torch_model.parameters(),lr=0.2)
criterion = torch.nn.BCELoss()

# training loop 
for epoch in range(epochs):
    y_pred = torch_model(X_train)
    loss = criterion(y_pred,y_train.double())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [25]:
list(torch_model.parameters())

[Parameter containing:
 tensor([[-5.7750],
         [ 0.0247],
         [-7.7222],
         [ 0.0488],
         [ 5.1798],
         [ 5.9427]], requires_grad=True)]

In [26]:
log_loss(y_test,torch_model.predict(X_test),normalize=False)

27.4910053291642

In [27]:
roc_auc_score(y_test,torch_model.predict(X_test))

0.9866803278688525