In [1]:
# library imports 

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss
import numpy as np

In [2]:
# regression data
import random
from sklearn.datasets import make_classification

# rng
random.seed(0)

regression_params = {
     'n_samples':1000
    ,'n_features':5
    ,'n_informative':3
    ,'n_redundant':0
    ,'n_repeated':0
    ,'n_classes' : 4
    ,'n_clusters_per_class':1
    ,'random_state':0
    ,'class_sep' : .2
}

X,y = make_classification(**regression_params)

# add constant
X = np.concatenate([X,np.ones(shape=(len(X),1))],axis=1)

# train / test splits
X_train,X_test,y_train,y_test = train_test_split(X,y)

# statsmodels

In [3]:
import statsmodels.api as sm

In [4]:
sm_model = sm.MNLogit(endog=y_train,exog=X_train)

In [5]:
result = sm_model.fit_regularized(maxiter=1000)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 1.2892789813406902
            Iterations: 41
            Function evaluations: 41
            Gradient evaluations: 41


In [6]:
print(result.summary())

                          MNLogit Regression Results                          
Dep. Variable:                      y   No. Observations:                  750
Model:                        MNLogit   Df Residuals:                      732
Method:                           MLE   Df Model:                           15
Date:                Sat, 28 Jan 2023   Pseudo R-squ.:                 0.06928
Time:                        18:00:27   Log-Likelihood:                -966.96
converged:                       True   LL-Null:                       -1038.9
Covariance Type:            nonrobust   LLR p-value:                 3.825e-23
       y=1       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -0.0112      0.119     -0.095      0.924      -0.244       0.221
x2            -0.6398      0.121     -5.290      0.000      -0.877      -0.403
x3            -0.0125      0.160     -0.078      0.9

In [7]:
# accuracy
(np.argmax(result.predict(X_test),axis=1)== y_test).mean()

0.4

In [8]:
# roc auc
roc_auc_score(y_test,result.predict(X_test),multi_class='ovr')

0.6568321405043528

# sklearn

In [9]:
from sklearn.linear_model import LogisticRegression

In [10]:
sk_model = LogisticRegression(multi_class='ovr')

In [11]:
sk_model = sk_model.fit(X_train[:,0:-1],y_train)

In [12]:
sk_model.coef_

array([[-0.30672041,  0.52210748,  0.23327633, -0.11700328, -0.0084696 ],
       [-0.35207948, -0.28023505,  0.26558992,  0.09148067, -0.106462  ],
       [ 0.44906086, -0.47887949, -0.92485383, -0.00975547, -0.00448302],
       [ 0.17785134,  0.23713595,  0.44564524,  0.03422481,  0.11514281]])

In [13]:
sk_model.intercept_

array([-1.14577753, -1.25937846, -1.10287189, -1.21661989])

In [14]:
# accuracy
(sk_model.predict(X_test[:,0:-1]) == y_test).mean()

0.412

In [15]:
# roc auc
roc_auc_score(y_test,sk_model.predict_proba(X_test[:,0:-1]),multi_class='ovr')

0.6563500989505626

# pure python

In [16]:
# turn back into numpy arrays

X_train = X_train
y_train = y_train.reshape(-1,1)

In [17]:
# functions

def sigmoid(x):
    return 1/(1+np.exp(-x))

def predict(x,beta):
    return sigmoid(np.dot(x,beta))

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [18]:
betas = np.zeros(shape=(regression_params['n_features']+1,regression_params['n_classes']))
for _class in np.unique(y):
    betas[:,_class] = [random.random() for _ in range(len(betas))]
betas

array([[0.84442185, 0.78379859, 0.28183784, 0.81021724],
       [0.7579544 , 0.30331273, 0.7558042 , 0.90216595],
       [0.42057158, 0.47659695, 0.618369  , 0.31014757],
       [0.25891675, 0.58338204, 0.25050634, 0.72983175],
       [0.51127472, 0.90811289, 0.90974626, 0.89883829],
       [0.40493414, 0.50468686, 0.98278548, 0.68398393]])

In [19]:
epochs = 100000
for _class in np.unique(y):
    beta = betas[:,_class].reshape(-1,1)
    for epoch in range(epochs):
        grad = gradient(x=X_train,beta=beta,y_true=(y_train==_class).astype('int'))
        beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
    betas[:,_class] = beta.flat[:]

In [20]:
betas

array([[-0.31091651, -0.35520343,  0.45528877,  0.17895338],
       [ 0.52808986, -0.28184945, -0.48642954,  0.24007559],
       [ 0.23928012,  0.26980223, -0.94443672,  0.45329399],
       [-0.11803808,  0.09220572, -0.00951351,  0.03412835],
       [-0.00850577, -0.10740909, -0.00478774,  0.11630634],
       [-1.14816671, -1.26070482, -1.10516299, -1.21876915]])

In [21]:
# accuracy
(np.argmax(predict(X_test,betas),axis=1) == y_test).mean()

0.412

In [22]:
# roc auc
roc_auc_score(y_test,(predict(X_test,betas) /predict(X_test,betas).sum(axis=1).reshape(-1,1)),multi_class='ovr')

0.656529271565547

# pytorch

In [23]:
import torch
import torch.nn

In [24]:
class Classifier(torch.nn.Module):
    def __init__(self,n_inputs,n_outputs):
        super(Classifier,self).__init__()
        self.__clf = torch.nn.Linear(n_inputs,n_outputs)
        self.__sigmoid = torch.nn.Sigmoid()
        
    def forward(self,x):
        return self.sigmoid(self.clf(x.float()))
    
    @property
    def clf(self):
        return self.__clf
    
    @property
    def sigmoid(self):
        return self.__sigmoid

In [25]:
X_train = torch.tensor(X_train[:,0:-1])
y_train = torch.tensor(y_train)

In [26]:

torch_model = Classifier(regression_params['n_features'],regression_params['n_classes'])

criterion = torch.nn.NLLLoss()
optimizer = torch.optim.SGD(torch_model.parameters(),lr=0.01)


# training loop 
for epoch in range(epochs):
    y_pred = torch_model(X_train)
    loss = criterion(y_pred,y_train.flatten().long())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [27]:
torch_model.clf.weight

Parameter containing:
tensor([[-0.6348,  1.0120,  0.8013, -0.0873, -0.0165],
        [-0.3554, -0.0453,  0.4924,  0.0785, -0.0525],
        [ 0.4145,  0.0818, -0.6722,  0.0043,  0.0438],
        [ 1.1969,  0.9229,  0.7268, -0.0063,  0.1360]], requires_grad=True)

In [28]:
torch_model.clf.bias

Parameter containing:
tensor([5.1848, 5.2895, 5.3621, 5.0276], requires_grad=True)

In [29]:
(np.argmax(torch_model(torch.tensor(X_test[:,0:-1])).detach().numpy(),axis=1) == y_test).mean()

0.364

In [30]:
roc_auc_score(y_test,(torch_model(torch.tensor(X_test[:,0:-1])).detach().numpy() / torch_model(torch.tensor(X_test[:,0:-1])).detach().numpy().sum(axis=1).reshape(-1,1)),multi_class='ovr')

0.6355223286560547