In [183]:
import numpy as np

class BinaryLogisticRegression:
    def __init__(self, n_features, batch_size, conv_threshold):
        """Initialize the binary logistic regression model.
        @param n_features: Number of features in the dataset, an integer.
        @param batch_size: Batch size for training, an integer.
        @param conv_threshold: Convergence threshold for training, a float.
        @return: None
        """
        self.n_features = n_features
        self.weights = np.zeros(n_features + 1)  # extra element for bias
        self.alpha = 0.03
        self.batch_size = batch_size
        self.conv_threshold = conv_threshold

    def sigmoid(self, z):
        '''
        Perform sigmoid operation
        @params:
            z: the input to which sigmoid will be applied
        @return:
            an array with sigmoid applied elementwise.
        '''
        return 1 / (1 + np.exp(-z))

    def train(self, X, Y):
        '''
        Trains the model using stochastic gradient descent
        @params:
            X: a 2D Numpy array where each row contains an example, padded by 1 column for the bias
            Y: a 1D Numpy array containing the corresponding labels for each example
        @return:
            num_epochs: integer representing the number of epochs taken to reach convergence
        '''
        # intializing values
        converge = False
        epochs = 0
        n_examples = X.shape[0]

        while not converge:
            # update # of epochs
            epochs += 1
            # acquire indices for shuffling of X and Y
            indices = np.arange(n_examples)
            np.random.shuffle(indices)
            X = X[indices]
            Y = Y[indices]
            # calc last epoch loss
            last_epoch_loss = self.loss(X, Y)
            # for the # of batches
            for i in range(0, n_examples, self.batch_size):
                X_batch = X[i:i + self.batch_size]
                Y_batch = Y[i:i + self.batch_size]
                # reinitialize gradient to be 0s
                grad = np.zeros(self.weights.shape)
                # for each pair in the batch
                for x, y in zip(X_batch, Y_batch):
                    prediction = self.sigmoid(np.dot(self.weights, x))
                    # gradient calculation
                    error = prediction - y
                    grad += error * x
                # update weights
                self.weights -= ((self.alpha * grad)/ self.batch_size)
            epoch_loss = self.loss(X, Y)
            if abs(epoch_loss - last_epoch_loss) < self.conv_threshold:
                converge = True
        return epochs

    def loss(self, X, Y):
        '''
        Returns the total log loss on some dataset (X, Y), divided by the number of examples.
        @params:
            X: 2D Numpy array where each row contains an example, padded by 1 column for the bias
            Y: 1D Numpy array containing the corresponding labels for each example
        @return:
            A float number which is the average loss of the model on the dataset
        '''
        n_examples = X.shape[0]
        total_loss = 0
        
        for i in range(n_examples):
            # linear output (dot product)
            linear_output = np.dot(self.weights, X[i])
            # calc logistic loss for each sample
            y = 1 if Y[i] == 1 else -1
            logistic_loss = np.log(1 + np.exp(-y * linear_output))
            total_loss += logistic_loss
    
        return total_loss / n_examples
    
    def predict(self, X):
        '''
        Compute predictions based on the learned weigths and examples X
        @params:
            X: a 2D Numpy array where each row contains an example, padded by 1 column for the bias
        @return:
            A 1D Numpy array with one element for each row in X containing the predicted class.
        '''
        # multiply X by weights of model
        predictions = self.sigmoid(X @ self.weights)
        return np.where(predictions >= 0.5, 1, 0)

    def accuracy(self, X, Y):
        '''
        Outputs the accuracy of the trained model on a given testing dataset X and labels Y.
        @params:
            X: a 2D Numpy array where each row contains an example, padded by 1 column for the bias
            Y: a 1D Numpy array containing the corresponding labels for each example
        @return:
            a float number indicating accuracy (between 0 and 1)
        '''
        predictions = self.predict(X)
        accuracy = np.mean(predictions == Y)
        return accuracy


In [184]:
class AllPairsLogisticRegression:
    def __init__(self, n_classes, binary_classifier_class, n_features, batch_size, conv_threshold):
        """
        Initialize the all-pairs logistic regression model.
        @param n_classes: Number of classes in the dataset, an integer.
        @param binary_classifier_class: Class for binary logistic regression, a class object.
        @param n_features: Number of features in the dataset, an integer.
        @param batch_size: Batch size for training the binary classifiers, an integer.
        @param conv_threshold: Convergence threshold for training, a float.
        @return: None
        """
        self.n_classes = n_classes
        self.classifiers = {} 
        self.n_features = n_features
        self.batch_size = batch_size
        self.conv_threshold = conv_threshold
        self.binary_classifier_class = binary_classifier_class

    def train(self, X, Y):
        """
        Train the all-pairs logistic regression model by training binary classifiers
        for each pair of classes in the dataset.
        @param X: Input features of the dataset, a numpy array of shape (n_samples, n_features).
        @param Y: Labels of the dataset, a numpy array of shape (n_samples,).
        @return: None
        """
        for class_i in range(self.n_classes):
            for class_j in range(class_i + 1, self.n_classes):
                SX = []
                SY = []
                for t in range(len(Y)):
                    if Y[t] == class_i:
                        SX.append(X[t])
                        SY.append(1)
                    elif Y[t] == class_j:
                        SX.append(X[t])
                        SY.append(-1)
                SX = np.array(SX)
                SY = np.array(SY)
                classifier = self.binary_classifier_class(
                    n_features=self.n_features,
                    batch_size=self.batch_size,
                    conv_threshold=self.conv_threshold
                )
                classifier.train(SX, SY)
                self.classifiers[(class_i, class_j)] = classifier

    def predict(self, X):
        """
        Predict the class labels for the input data using the trained classifiers.
        @param X: Input features to classify, a numpy array of shape (n_samples, n_features).
        @return: Predicted class labels, a numpy array of shape (n_samples,).
        """
        n_samples = X.shape[0]
        votes = np.zeros((n_samples, self.n_classes), dtype=int)
        for (class_i, class_j), classifier in self.classifiers.items():
            predictions = classifier.predict(X)
            votes[:, class_i] += (predictions == 1)
            votes[:, class_j] += (predictions == 0)
        return np.argmax(votes, axis=1)

    def accuracy(self, X, Y):
        """
        Calculate the accuracy of the model on the input data and labels.
        @param X: Input features of the dataset, a numpy array of shape (n_samples, n_features).
        @param Y: True labels of the dataset, a numpy array of shape (n_samples,).
        @return: Accuracy of the model as a float between 0 and 1.
        """
        predictions = self.predict(X)
        correct_predictions = np.sum(predictions == Y)
        return correct_predictions / len(Y)

In [185]:
import pytest
import random
# Sets random seed for testing purposes
random.seed(0)
np.random.seed(0)

# BINARY LOGISTIC REGRESSION
# test model with 1 predictor, batch size of 1 and conv threshold of 1e-2 (only 2 classes bc binary)
test_model1 = BinaryLogisticRegression(n_features=1, batch_size=1, conv_threshold=1e-3)

# test data with bias term
x_bias = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 1], [1.2, 1]])  
# labels
y = np.array([0, 0, 1, 1, 1, 0])

# calc init loss
initial_loss = test_model1.loss(x_bias, y)
assert initial_loss == pytest.approx(0.693, 0.001)

# checking that weights have the correct shape
assert test_model1.weights.shape == (2,)

# train model
test_model1.train(x_bias, y)

# test model by inputting training data --> accuracy should be 100%
x_bias_test = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 1], [1.2, 1]])
predictions = test_model1.predict(x_bias_test)
expected_predictions = np.array([0, 0, 1, 1, 1, 0])
assert np.all(predictions == expected_predictions)
accuracy = test_model1.accuracy(x_bias_test, expected_predictions)
assert accuracy == pytest.approx(1.0, 0.01)

# input new unseen testing data --> accuracy should also be 100%
x_bias_test = np.array([[1.5, 1], [3.5, 1]])
predictions = test_model1.predict(x_bias_test)
expected_predictions = np.array([0, 1]) 
assert np.all(predictions == expected_predictions)
accuracy = test_model1.accuracy(x_bias_test, expected_predictions)
assert accuracy == pytest.approx(1.0, 0.01)

# testing weight calculations manually [as implemented in the code]
weights = np.zeros(2)  
x_sample = np.array([1, 1])  
y_sample = 0 
z = np.dot(weights, x_sample)
prediction = 1 / (1 + np.exp(-z))
gradient = (prediction - y_sample) * x_sample
test_gradient = np.array([0.5, 0.5])  
assert gradient == pytest.approx(test_gradient, 0.01)

# testing case with one data point and testing to see behavior of weights
test_model2 = BinaryLogisticRegression(n_features=1, batch_size=1, conv_threshold=1e-2)
x_train = np.array([[1, 1]])  
y_train = np.array([1])       
test_model2.train(x_train, y_train)
assert test_model3.weights[0] > 0 # positive
assert test_model3.weights[1] > 0 # bias also positive