In [9]:
from enum import Enum
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.utils import resample

In [10]:
class Regularization(Enum):
    NONE = 0
    L1 = 1
    L2 = 2
    ELASTIC_NET = 3

In [11]:
# LOGISTIC REGRESSION

def sigmoid(x: np.ndarray) -> np.ndarray:
    out = 1 / (1 + np.exp(-x))
    return out


def predict(X: np.ndarray, weights: np.ndarray) -> np.ndarray:
    return sigmoid(X @ weights)


def compute_cost(X: np.ndarray,
                 y: np.ndarray,
                 w: np.ndarray,
                 regularization: Regularization = Regularization.NONE,
                 lambda_: float = 0.01,
                 elastic_ratio: float = 0.5) -> np.ndarray:
    preds = predict(X, w)
    preds = np.clip(preds, 1e-10, 1 - 1e-10)
    cost = - np.sum(y * np.log(preds) + (1 - y) * np.log(1 - preds))

    if regularization == Regularization.L1:
        cost += lambda_ * np.sum(np.abs(w))
    elif regularization == Regularization.L2:
        cost += lambda_ * np.sum(w ** 2) / 2
    elif regularization == Regularization.ELASTIC_NET:
        l1_term = elastic_ratio * np.sum(np.abs(w))
        l2_term = (1 - elastic_ratio) * np.sum(w ** 2) / 2
        cost += lambda_ * (l1_term + l2_term)

    return cost


def compute_gradient(X: np.ndarray,
                     y: np.ndarray,
                     w: np.ndarray,
                     regularization: Regularization = Regularization.NONE,
                     lambda_=0.01,
                     elastic_ratio=0.5) -> np.ndarray:
    grad = X.T @ (sigmoid(X @ w) - y)

    if regularization == Regularization.L1:
        grad += lambda_ * np.sign(w)
    elif regularization == Regularization.L2:
        grad += lambda_ * w
    elif regularization == Regularization.ELASTIC_NET:
        l1_grad = elastic_ratio * np.sign(w)
        l2_grad = (1 - elastic_ratio) * w
        grad += lambda_ * (l1_grad + l2_grad)

    return grad


def compute_confusion_matrix(X: np.ndarray, y: np.ndarray, w: np.ndarray, threshold=0.5) -> tuple[int, int, int, int]:
    predictions = predict(X, w)
    binary_predictions = (predictions >= threshold).astype(int)

    true_positives = np.sum((binary_predictions == 1) & (y == 1))
    false_positives = np.sum((binary_predictions == 1) & (y == 0))
    false_negatives = np.sum((binary_predictions == 0) & (y == 1))
    true_negatives = np.sum((binary_predictions == 0) & (y == 0))

    return true_positives, false_positives, false_negatives, true_negatives


def compute_accuracy(X: np.ndarray, y: np.ndarray, w: np.ndarray, threshold=0.5) -> float:
    predictions = predict(X, w)
    binary_predictions = predictions >= threshold
    correct_predictions = np.sum(binary_predictions == y)

    acc = correct_predictions / y.shape[0]

    return acc


def compute_precision(X, y, w):
    true_positives, false_positives, false_negatives, true_negatives = compute_confusion_matrix(X, y, w)

    precision = true_positives / (true_positives + false_positives)

    return precision


def compute_recall(X, y, w):
    true_positives, false_positives, false_negatives, true_negatives = compute_confusion_matrix(X, y, w)

    recall = true_positives / (true_positives + false_negatives)

    return recall


def compute_f1_score(X, y, w):
    precision = compute_precision(X, y, w)
    recall = compute_recall(X, y, w)

    f1_score = 2 * precision * recall / (precision + recall)

    return f1_score


def cross_validate_logistic_regression(X: np.ndarray,
                                       y: np.ndarray,
                                       lambdas: list[float],
                                       alpha: float,
                                       no_iterations: int,
                                       regularization: Regularization = Regularization.NONE,
                                       elastic_ratio=0.5,
                                       k=5) -> float:
    best_lambda = lambdas[0]
    best_cost = float('inf')

    for lambda_ in lambdas:
        kf = KFold(n_splits=k, shuffle=True, random_state=42)
        avg_cost = 0

        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            w = np.random.randn(X_train.shape[1])
            w, _, _, _, _, _ = train(X_train, y_train, w, alpha, no_iterations, regularization, lambda_, elastic_ratio)
            cost = compute_cost(X_val, y_val, w, regularization, lambda_, elastic_ratio)
            avg_cost += cost

        avg_cost /= k
        if avg_cost < best_cost:
            best_cost = avg_cost
            best_lambda = lambda_

    return best_lambda


def train(X: np.ndarray,
          y: np.ndarray,
          w: np.ndarray,
          alpha: float,
          no_iterations: int,
          regularization: Regularization = Regularization.NONE,
          lambda_=0.01,
          elastic_ratio=0.5
          ) -> tuple[
    np.ndarray, list[float], list[float], list[float], list[float], list[float]]:
    costs = []
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []

    for step in range(no_iterations):
        grad = compute_gradient(X, y, w, regularization, lambda_, elastic_ratio)
        w = w - alpha * grad

        costs.append(compute_cost(X, y, w, regularization, lambda_, elastic_ratio))
        accuracies.append(compute_accuracy(X, y, w))
        precisions.append(compute_precision(X, y, w))
        recalls.append(compute_recall(X, y, w))
        f1_scores.append(compute_f1_score(X, y, w))

    return w, costs, accuracies, precisions, recalls, f1_scores

def bagging_logistic_regression(X_train: np.ndarray,
                                y_train: np.ndarray,
                                X_test: np.ndarray,
                                y_test: np.ndarray,
                                n_estimators: int,
                                alpha: float,
                                no_iterations: int,
                                regularization: Regularization = Regularization.NONE,
                                lambda_: float = 0.01,
                                elastic_ratio: float = 0.5) -> np.ndarray:
    predictions = []
    
    for i in range(n_estimators):
        X_resampled, y_resampled = resample(X_train, y_train, replace=True)
        
        w = np.zeros(X_resampled.shape[1])
        w, _, _, _, _, _ = train(X_resampled, y_resampled, w, alpha, no_iterations, regularization, lambda_, elastic_ratio)
        
        preds = predict(X_test, w)
        predictions.append(preds)
        
    averaged_predictions = np.mean(predictions, axis=0)
    final_predictions = ((averaged_predictions >= 0.5).astype(int) == y_test)
    
    return final_predictions

def test(X: np.ndarray, y: np.ndarray, w: np.ndarray) -> tuple[float, float, float, float, float]:
    cost = compute_cost(X, y, w)
    accuracy = compute_accuracy(X, y, w)
    precision = compute_precision(X, y, w)
    recall = compute_recall(X, y, w)
    f1_score = compute_f1_score(X, y, w)

    return cost, accuracy, precision, recall, f1_score

In [12]:
# TRAIN AND TEST ALGORITHM

def evaluate_algorithm(X_train,
                       y_train,
                       X_test,
                       y_test,
                       w,
                       alpha,
                       no_iterations,
                       regularization: Regularization = Regularization.NONE,
                       lambda_=0.01,
                       elastic_ratio=0.5
                       ):
    w, costs_train, accuracies_train, precisions_train, recalls_train, f1_scores_train = train(
        X_train,
        y_train,
        w,
        alpha,
        no_iterations,
        regularization,
        lambda_,
        elastic_ratio
    )

    cost_test, accuracy_test, precision_test, recall_test, f1_score_test = test(X_test, y_test, w)

    print(f'Stochastic Gradient Descent average cost on test: {cost_test / X_test.shape[0]}')
    print(f'Stochastic Gradient Descent accuracy on test: {accuracy_test}')
    print(f'Stochastic Gradient Descent precision on test: {precision_test}')
    print(f'Stochastic Gradient Descent recall on test: {recall_test}')
    print(f'Stochastic Gradient Descent F1 Score on test: {f1_score_test}')

    #display the cost of the logistic regression on the training set
    plt.figure()
    plt.plot(costs_train)
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.title('Stochastic Gradient Descent')
    plt.show()

    #display the accuracy of the logistic regression on the training set
    plt.figure()
    plt.plot(accuracies_train)
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.title('Stochastic Gradient Descent')
    plt.show()

    #display the precision of the logistic regression on the training set
    plt.figure()
    plt.plot(precisions_train)
    plt.xlabel('Iterations')
    plt.ylabel('Precision')
    plt.title('Stochastic Gradient Descent')
    plt.show()

    #display the recall of the logistic regression on the training set
    plt.figure()
    plt.plot(recalls_train)
    plt.xlabel('Iterations')
    plt.ylabel('Recall')
    plt.title('Stochastic Gradient Descent')
    plt.show()

    #display the f1_score of the logistic regression on the training set
    plt.figure()
    plt.plot(f1_scores_train)
    plt.xlabel('Iterations')
    plt.ylabel('F1 Score')
    plt.title('Stochastic Gradient Descent')
    plt.show()