In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

In [2]:
iris = load_iris(as_frame=True)
list(iris)

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename',
 'data_module']

In [3]:
X = iris.data.values
y = iris.target

In [4]:
test = np.array([1, 2, 3, 4], dtype=np.int8)
print(np.unique(test).shape)

(4,)


In [5]:
np.random.seed(42)

X = np.array(X)
y = np.array(y)

indices = np.random.permutation(len(X))
print(f"List of indices:\n{indices}")

X_shuffled = X[indices]
y_shuffled = y[indices]

rows, cols = X_shuffled.shape

rows_90 = np.int_(rows * 0.90)

X_train, X_test = X_shuffled[:rows_90,:], X_shuffled[rows_90:, :]
y_train, y_test = y_shuffled[:rows_90], y_shuffled[rows_90:]

List of indices:
[ 73  18 118  78  76  31  64 141  68  82 110  12  36   9  19  56 104  69
  55 132  29 127  26 128 131 145 108 143  45  30  22  15  65  11  42 146
  51  27   4  32 142  85  86  16  10  81 133 137  75 109  96 105  66   0
 122  67  28  40  44  60 123  24  25  23  94  39  95 117  47  97 113  33
 138 101  62  84 148  53   5  93 111  49  35  80  77  34 114   7  43  70
  98 120  83 134 135  89   8  13 119 125   3  17  38  72 136   6 112 100
   2  63  54 126  50 115  46 139  61 147  79  59  91  41  58  90  48  88
 107 124  21  57 144 129  37 140   1  52 130 103  99 116  87  74 121 149
  20  71 106  14  92 102]


In [12]:
class SoftmaxRegression:
    def __init__(self, epochs=150, lr=0.01, batch_size=32):
        if epochs < 0:
            raise ValueError("epoch must be positive")
            
        if lr < 0:
            raise ValueError("learning rate must be positive")
            
        if batch_size < 0:
            raise ValueError("Batch size must be positive")
            
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.weights = None
        self.bias = None

    def fit(self, X, y):

        if X.shape[0] != y.shape[0]:
            raise ValueError(f"Features must have the same amount of samples than labels: {X.shape[0]} != {y.shape[0]}")

        self.samples, self.features = X.shape
        self.num_class = np.unique(y).shape[0]
        self.weights = np.random.rand(self.features, self.num_class).astype(np.float32)
        self.bias = np.random.rand(self.num_class).astype(np.float32)

        batches = (self.samples + self.batch_size - 1)// self.batch_size
        for epoch in range(self.epochs):
            indices = np.random.permutation(self.samples)
            X, y = X[indices], y[indices]
        
            total_loss = 0
            total_samples = 0
            for batch in range(batches):
                start = batch * self.batch_size
                end = min((batch + 1) * self.batch_size, self.samples)
                
                X_batch, y_batch = X[start:end], y[start:end]
                N = X_batch.shape[0]


                probs = self.predict(X_batch)
                y_batch_one_hot = one_hot_encoding(y_batch, self.num_class)

                loss = self.loss_functions(probs, y_batch_one_hot)

                #Gradient Descent
                dl_dz = probs - y_batch_one_hot
                dl_dw = (X_batch.T @ dl_dz) / N
                dl_db = np.mean(dl_dz, axis=0)

                self.weights -=  self.lr * dl_dw
                self.bias -= self.lr * dl_db

                total_loss += loss * N
                total_samples += N
                avg_loss = total_loss/total_samples
            print(f"Epoch: {epoch+1}/{self.epochs} Training loss: {avg_loss:.4f}")                

    def predict(self, X):
        if self.weights is None:
            print("Model must train before predicting")
        score = np.dot(X, self.weights) + self.bias
        probs = self.softmax(score)
        return probs
        
    def predict_class(self, X):
        probs = self.predict(X)
        return np.argmax(probs, axis=1)
    
    def loss_functions(self, probs, y_real_one_hot):#Cross Entropy loss
        N = y_real_one_hot.shape[0]
        loss = - np.sum(y_real_one_hot * np.log(probs + 1e-8))/N
        return loss

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def one_hot_encoding(x, num_classes):
    encoded_arr = np.zeros((x.size, num_classes), dtype=int)
    encoded_arr[np.arange(x.size),x] = 1 
    return encoded_arr
        

In [13]:
SR = SoftmaxRegression()
SR.fit(X_train, y_train)

Epoch: 1/150 Training loss: 2.9184
Epoch: 2/150 Training loss: 1.3592
Epoch: 3/150 Training loss: 1.0128
Epoch: 4/150 Training loss: 0.9425
Epoch: 5/150 Training loss: 0.9142
Epoch: 6/150 Training loss: 0.8896
Epoch: 7/150 Training loss: 0.8694
Epoch: 8/150 Training loss: 0.8466
Epoch: 9/150 Training loss: 0.8310
Epoch: 10/150 Training loss: 0.8133
Epoch: 11/150 Training loss: 0.7941
Epoch: 12/150 Training loss: 0.7834
Epoch: 13/150 Training loss: 0.7602
Epoch: 14/150 Training loss: 0.7512
Epoch: 15/150 Training loss: 0.7344
Epoch: 16/150 Training loss: 0.7232
Epoch: 17/150 Training loss: 0.7087
Epoch: 18/150 Training loss: 0.6985
Epoch: 19/150 Training loss: 0.6935
Epoch: 20/150 Training loss: 0.6790
Epoch: 21/150 Training loss: 0.6673
Epoch: 22/150 Training loss: 0.6603
Epoch: 23/150 Training loss: 0.6506
Epoch: 24/150 Training loss: 0.6468
Epoch: 25/150 Training loss: 0.6355
Epoch: 26/150 Training loss: 0.6290
Epoch: 27/150 Training loss: 0.6270
Epoch: 28/150 Training loss: 0.6146
E

In [14]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score
from sklearn.linear_model import LogisticRegression

In [15]:
y_pred = SR.predict_class(X_test)
print(f"Confusion matrix:\n{confusion_matrix(y_test, y_pred)}")
print(f"Confusion matrix normalized:\n{confusion_matrix(y_test, y_pred, normalize='true')}")

Confusion matrix:
[[2 0 0]
 [0 6 0]
 [0 0 7]]
Confusion matrix normalized:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


# Softmax regression with scikit learn

In [20]:
Softmax_reg = LogisticRegression(random_state=42, max_iter=150)
Softmax_reg.fit(X_train, y_train)

In [21]:
y_pred_sk = Softmax_reg.predict(X_test)
print(f"Confusion matrix:\n{confusion_matrix(y_test, y_pred_sk)}")
print(f"Confusion matrix normalized:\n{confusion_matrix(y_test, y_pred_sk, normalize='true')}")

Confusion matrix:
[[2 0 0]
 [0 6 0]
 [0 1 6]]
Confusion matrix normalized:
[[1.         0.         0.        ]
 [0.         1.         0.        ]
 [0.         0.14285714 0.85714286]]
