# Logistic regression and key insights

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# Gradient Descent to Binary Logistic Regression 
class GDLogisticRegression:
    def __init__(self, learning_rate=0.01, tolerance=1e-8, regularization=None, alpha=0.01, l1_ratio=0.5, max_iter=100):
        self.learning_rate = learning_rate
        self.tolerance = tolerance
        self.regularization = regularization
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.max_iter = max_iter

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def binary_crossentropy(self, y_true, y_pred):
        # Binary cross-entropy loss
        y_pred = np.clip(y_pred, 1e-10, 1 - 1e-10)  
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def fit(self, X, y, X_val=None, y_val=None):
        n_samples, n_features = X.shape
        
        self.bias = 0
        self.weights = np.zeros(n_features)

        previous_db = 0
        previous_dw = np.zeros(n_features)

        self.train_losses = []
        self.val_losses = []

        for _ in range(self.max_iter):
        
            y_pred_linear = X @ self.weights + self.bias
            y_pred_sigmoid = self.sigmoid(y_pred_linear)
            
            # Gradients 
            db = 1 / n_samples * np.sum(y_pred_sigmoid - y)
            dw = 1 / n_samples * X.T @ (y_pred_sigmoid - y)

            if self.regularization == "ridge":
                dw += (self.alpha / n_samples) * self.weights
            elif self.regularization == "lasso":
                dw += (self.alpha / n_samples) * np.sign(self.weights)
            elif self.regularization == "elasticnet":
                dw += (self.alpha / n_samples) * (self.l1_ratio * np.sign(self.weights) + (1 - self.l1_ratio) * self.weights)

            self.bias -= self.learning_rate * db
            self.weights -= self.learning_rate * dw

            train_loss = self.binary_crossentropy(y, y_pred_sigmoid)
            self.train_losses.append(train_loss)

            if X_val is not None and y_val is not None:
                y_val_pred_linear = X_val @ self.weights + self.bias
                y_val_pred_sigmoid = self.sigmoid(y_val_pred_linear)
                val_loss = self.binary_crossentropy(y_val, y_val_pred_sigmoid)
                self.val_losses.append(val_loss)

            abs_db_reduction = np.abs(db - previous_db)
            abs_dw_reduction = np.abs(dw - previous_dw)

            if abs_db_reduction < self.tolerance and abs_dw_reduction.all() < self.tolerance:
                break

            previous_db = db
            previous_dw = dw

    def predict(self, X_test):
        y_pred_linear = X_test @ self.weights + self.bias 
        y_pred_sigmoid = self.sigmoid(y_pred_linear)

        classes = np.array([0 if pred < 0.5 else 1 for pred in y_pred_sigmoid])
        return classes
    
    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)

        accuracy = np.mean(y_pred == y_test)
        precision = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_pred == 1)
        recall = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_test == 1)
        f1_score = 2 * (precision * recall) / (precision + recall)

        return {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1_score
        }

In [3]:
import numpy as np

class SoftmaxRegression:
    def __init__(self, learning_rate=0.1, tolerance=1e-4, max_iter=1000, 
                 regularization=None, alpha=0.01, l1_ratio=0.5):
        self.learning_rate = learning_rate
        self.tolerance = tolerance
        self.max_iter = max_iter
        self.regularization = regularization
        self.alpha = alpha
        self.l1_ratio = l1_ratio

    def softmax(self, predictions):
        exp = np.exp(predictions - np.max(predictions, axis=1, keepdims=True))
        return exp / np.sum(exp, axis=1, keepdims=True)
    
    def cross_entropy_loss(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-10, 1 - 1e-10)  # Avoiding log(0)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def one_hot_encode(self, y, n_classes):
        one_hot = np.zeros((y.size, n_classes))
        one_hot[np.arange(y.size), y] = 1
        return one_hot

    def fit(self, X, y):
        n_classes = len(np.unique(y))
        n_samples, n_features = X.shape

        one_hot_y = self.one_hot_encode(y, n_classes)

        self.bias = np.zeros(n_classes)
        self.weights = np.zeros((n_features, n_classes))

        previous_db = np.zeros(n_classes)
        previous_dw = np.zeros((n_features, n_classes))

        self.losses = []

        for _ in range(self.max_iter):
            y_pred_linear = X @ self.weights + self.bias
            y_pred_softmax = self.softmax(y_pred_linear)

            db = (1 / n_samples) * np.sum(y_pred_softmax - one_hot_y, axis=0)
            dw = (1 / n_samples) * X.T @ (y_pred_softmax - one_hot_y)

            if self.regularization == "ridge":  
                dw += (self.alpha / n_samples) * self.weights
            elif self.regularization == "lasso":  
                dw += (self.alpha / n_samples) * np.sign(self.weights)
            elif self.regularization == "elasticnet": 
                l1_term = self.l1_ratio * np.sign(self.weights)
                l2_term = (1 - self.l1_ratio) * self.weights
                dw += (self.alpha / n_samples) * (l1_term + l2_term)

            self.bias -= self.learning_rate * db
            self.weights -= self.learning_rate * dw

            # loss (including regularization in loss calculation)
            loss = self.cross_entropy_loss(one_hot_y, y_pred_softmax)
            if self.regularization == "ridge":
                loss += (self.alpha / (2 * n_samples)) * np.sum(self.weights ** 2)
            elif self.regularization == "lasso":
                loss += (self.alpha / n_samples) * np.sum(np.abs(self.weights))
            elif self.regularization == "elasticnet":
                l1_loss = self.l1_ratio * np.sum(np.abs(self.weights))
                l2_loss = (1 - self.l1_ratio) * np.sum(self.weights ** 2)
                loss += (self.alpha / n_samples) * (l1_loss + l2_loss)

            self.losses.append(loss)

            abs_db_reduction = np.abs(db - previous_db)
            abs_dw_reduction = np.abs(dw - previous_dw)

            if np.all(abs_db_reduction < self.tolerance) and np.all(abs_dw_reduction < self.tolerance):
                break

            previous_db = db
            previous_dw = dw

    def predict(self, X_test):
        y_pred_linear = X_test @ self.weights + self.bias
        y_pred_softmax = self.softmax(y_pred_linear)
        most_prob_class = np.argmax(y_pred_softmax, axis=1)
        return most_prob_class

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        accuracy = np.mean(y_pred == y_test)
        return accuracy
