In [1]:
import numpy as np

from dataset import get_2D_normalised, get_dimensionlly_reduced



In [2]:
class SoftmaxClassifier:
    def __init__(self, num_classes, input_size, learning_rate=0.1):
        """
        Constructor for the SoftmaxClassifier class.
        :param num_classes: The number of classes in the classification problem.
        :param input_size: The number of input features in the dataset.
        :param learning_rate: The learning rate used in gradient descent.
        """
        self.num_classes = num_classes
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.W = np.random.randn(input_size, num_classes) / np.sqrt(input_size)
        self.b = np.zeros((1, num_classes))

    def softmax(self, Z):
        """
        Softmax function for a given matrix of input logits.
        :param Z: A matrix of input logits, with shape (m, num_classes).
        :return: A matrix of output probabilities, with shape (m, num_classes).
        """
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

    def predict(self, X):
        """
        Predict the class labels for a given matrix of input features.
        :param X: A matrix of input features, with shape (m, input_size).
        :return: A vector of predicted class labels, with shape (m,).
        """
        logits = np.dot(X, self.W) + self.b
        probs = self.softmax(logits)
        return np.argmax(probs, axis=1)

    def train(self, X, y, num_epochs=10, batch_size=32):
        """
        Train the softmax classifier using gradient descent.
        :param X: A matrix of input features, with shape (m, input_size).
        :param y: A vector of class labels, with shape (m,).
        :param num_epochs: The number of epochs to train for.
        :param batch_size: The batch size to use for gradient descent.
        """
        num_batches = int(np.ceil(len(X) / batch_size))
        for epoch in range(num_epochs):
            for i in range(num_batches):
                batch_start = i * batch_size
                batch_end = (i + 1) * batch_size
                X_batch = X[batch_start:batch_end]
                y_batch = y[batch_start:batch_end]
                dW, db = self.compute_gradients(X_batch, y_batch)
                self.W -= self.learning_rate * dW
                self.b -= self.learning_rate * db
                
    def compute_gradients(self, X_batch, y_batch):
        logits = np.dot(X_batch, self.W) + self.b
        probs = self.softmax(logits)
        delta = probs
        delta[range(len(X_batch)), y_batch] -= 1
        dW = np.dot(X_batch.T, delta) / len(X_batch)
        db = np.mean(delta, axis=0)
        return dW, db
                
    def train_early(self, X, y, num_epochs=100, batch_size=32, learning_rate=0.01, reg_strength=0.0, early_stopping_patience=5, validation_frac=0.1):
        """
        Train the SoftmaxClassifier on the given training data using mini-batch gradient descent with early stopping.

        :param X: The training data, a Numpy array of shape (num_examples, input_size).
        :param y: The training labels, a Numpy array of shape (num_examples,).
        :param num_epochs: The number of epochs to train for.
        :param batch_size: The mini-batch size to use for training.
        :param learning_rate: The learning rate to use for gradient descent.
        :param reg_strength: The L2 regularization strength to use for training.
        :param early_stopping_patience: The number of epochs to wait without improvement in the validation accuracy before stopping training.
        :param validation_frac: The fraction of the training data to use for validation.
        """

        # Split the data into training and validation sets
        num_examples = X.shape[0]
        num_train = int(num_examples * (1 - validation_frac))
        train_indices = np.arange(num_train)
        val_indices = np.arange(num_train, num_examples)
        X_train, y_train = X[train_indices], y[train_indices]
        X_val, y_val = X[val_indices], y[val_indices]

        # Initialize the weights and biases
        self.W = np.random.randn(self.input_size, self.num_classes) / np.sqrt(self.input_size)
        self.b = np.zeros(self.num_classes)

        # Initialize the best validation accuracy and the number of epochs since the last improvement
        best_val_acc = 0.0
        epochs_since_last_improvement = 0

        # Train the model for the specified number of epochs
        for epoch in range(num_epochs):
            # Shuffle the training data
            np.random.shuffle(train_indices)

            # Split the training data into mini-batches
            for i in range(0, num_train, batch_size):
                batch_indices = train_indices[i:i+batch_size]
                X_batch, y_batch = X_train[batch_indices], y_train[batch_indices]

                # Compute the gradients of the loss with respect to the weights and biases
                dW, db = self.compute_gradients(X_batch, y_batch)

                # Update the weights and biases using gradient descent
                self.W -= learning_rate * (dW + reg_strength * self.W)
                self.b -= learning_rate * db

            # Compute the training and validation accuracies for this epoch
            train_acc = np.mean(self.predict(X_train) == y_train)
            val_acc = np.mean(self.predict(X_val) == y_val)

            # Print the training and validation accuracies for this epoch
            print("Epoch {}: training accuracy = {:.4f}, validation accuracy = {:.4f}".format(epoch+1, train_acc, val_acc))

            # Check if the validation accuracy has improved
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                epochs_since_last_improvement = 0
            else:
                epochs_since_last_improvement += 1

            # Check if training should be stopped due to lack of improvement in the validation accuracy
            if epochs_since_last_improvement >= early_stopping_patience:
                print("Validation accuracy has not improved for {} epochs. Stopping training...".format(epochs_since_last_improvement))
                break

        print("Training complete. Best validation accuracy = {:.4f}".format(best_val_acc))

# Without PCA

In [7]:
(x_train, y_train), (x_test, y_test) = get_2D_normalised()

In [17]:
softmax_clf = SoftmaxClassifier(num_classes=10, input_size=3072)
softmax_clf.train_early(x_train, y_train, num_epochs=50, batch_size=32, early_stopping_patience=10)

Epoch 1: training accuracy = 0.2047, validation accuracy = 0.1996
Epoch 2: training accuracy = 0.3327, validation accuracy = 0.3208
Epoch 3: training accuracy = 0.3442, validation accuracy = 0.3366
Epoch 4: training accuracy = 0.2966, validation accuracy = 0.2884
Epoch 5: training accuracy = 0.3165, validation accuracy = 0.3018
Epoch 6: training accuracy = 0.3630, validation accuracy = 0.3360
Epoch 7: training accuracy = 0.3190, validation accuracy = 0.3092
Epoch 8: training accuracy = 0.3399, validation accuracy = 0.3230
Epoch 9: training accuracy = 0.3262, validation accuracy = 0.3162
Epoch 10: training accuracy = 0.3474, validation accuracy = 0.3278
Epoch 11: training accuracy = 0.3224, validation accuracy = 0.3100
Epoch 12: training accuracy = 0.3597, validation accuracy = 0.3400
Epoch 13: training accuracy = 0.4030, validation accuracy = 0.3784
Epoch 14: training accuracy = 0.3848, validation accuracy = 0.3570
Epoch 15: training accuracy = 0.3693, validation accuracy = 0.3516
Epoc

In [18]:
test_preds = softmax_clf.predict(x_test)

accuracy = np.mean(test_preds == y_test)
print("Test accuracy:", accuracy)

Test accuracy: 0.3596


# With PCA

In [6]:
(x_train_pca, y_train_pca), (x_test_pca, y_test_pca) = get_dimensionlly_reduced(components=1024, needed=1024)

In [7]:
softmax_pca = SoftmaxClassifier(num_classes=10, input_size=1024)
softmax_pca.train_early(x_train_pca, y_train_pca, num_epochs=50, batch_size=32, early_stopping_patience=10)

Epoch 1: training accuracy = 0.3196, validation accuracy = 0.3036
Epoch 2: training accuracy = 0.3592, validation accuracy = 0.3396
Epoch 3: training accuracy = 0.3654, validation accuracy = 0.3370
Epoch 4: training accuracy = 0.3213, validation accuracy = 0.2996
Epoch 5: training accuracy = 0.3650, validation accuracy = 0.3300
Epoch 6: training accuracy = 0.3599, validation accuracy = 0.3332
Epoch 7: training accuracy = 0.3574, validation accuracy = 0.3264
Epoch 8: training accuracy = 0.3580, validation accuracy = 0.3266
Epoch 9: training accuracy = 0.3893, validation accuracy = 0.3540
Epoch 10: training accuracy = 0.3618, validation accuracy = 0.3230
Epoch 11: training accuracy = 0.3665, validation accuracy = 0.3262
Epoch 12: training accuracy = 0.3214, validation accuracy = 0.3078
Epoch 13: training accuracy = 0.3575, validation accuracy = 0.3244
Epoch 14: training accuracy = 0.3696, validation accuracy = 0.3188
Epoch 15: training accuracy = 0.3575, validation accuracy = 0.3242
Epoc

In [9]:
test_preds_pca = softmax_pca.predict(x_test_pca)

accuracy_pca = np.mean(test_preds_pca == y_test_pca)
print("Test accuracy:", accuracy_pca)

Test accuracy: 0.3427
