<a href="https://colab.research.google.com/github/inderpreetsingh01/ml_machine_coding/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Binary classification
# Batch Gradient Descent
# Sigmoid activation
# Cross-Entropy Loss

In [1]:
import numpy as np

In [None]:
# Feature	Included
# Sigmoid Activation	✅
# Gradient Descent	✅
# Binary Classification	✅
# Log Loss & Accuracy Metrics	✅
# Predict Probabilities	✅
# Predict Class Labels	✅

class LogisticRegression:
    """
    Basic Logistic Regression using Batch Gradient Descent.
    No Regularization, No Scaling, No Early Stopping.
    """

    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        """
        Sigmoid activation function.
        """
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        """
        Train Logistic Regression model.
        """
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            # Gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict_proba(self, X):
        """
        Predict probabilities for class 1.
        """
        linear_model = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X, threshold=0.5):
        """
        Predict binary class labels.
        """
        y_pred_proba = self.predict_proba(X)
        return (y_pred_proba >= threshold).astype(int)

    def evaluate(self, X, y_true):
        """
        Evaluate model using accuracy and log loss.
        """
        y_pred_proba = self.predict_proba(X)
        y_pred_labels = self.predict(X)
        accuracy = np.mean(y_true == y_pred_labels)
        log_loss = -np.mean(
            y_true * np.log(y_pred_proba + 1e-15) + (1 - y_true) * np.log(1 - y_pred_proba + 1e-15)
        )
        return {"accuracy": accuracy, "log_loss": log_loss}


# ==== Synthetic Test Case ====
np.random.seed(42)
n_samples = 100
X = np.random.randn(n_samples, 2)
true_weights = np.array([2, -1])
bias = -0.5
linear_combination = np.dot(X, true_weights) + bias
y = (linear_combination > 0).astype(int)  # Generate binary labels

print("==== Training Basic Logistic Regression ====")
model = LogisticRegression(lr=0.1, n_iters=1000)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
# L1 and L2 regularization
class LogisticRegression:
    """
    Logistic Regression with L1 (Lasso) and L2 (Ridge) Regularization.
    """

    def __init__(self, lr=0.01, n_iters=1000, l1_lambda=0.0, l2_lambda=0.0):
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Add Regularization
            dw += self.l2_lambda * self.weights  # L2
            dw += self.l1_lambda * np.sign(self.weights)  # L1

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict_proba(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

    def evaluate(self, X, y_true):
        y_pred_proba = self.predict_proba(X)
        y_pred_labels = self.predict(X)
        accuracy = np.mean(y_true == y_pred_labels)
        log_loss = -np.mean(
            y_true * np.log(y_pred_proba + 1e-15) + (1 - y_true) * np.log(1 - y_pred_proba + 1e-15)
        )
        return {"accuracy": accuracy, "log_loss": log_loss}


# ==== Synthetic Test Case ====
np.random.seed(42)
n_samples = 100
X = np.random.randn(n_samples, 2)
true_weights = np.array([2, -1])
bias = -0.5
linear_combination = np.dot(X, true_weights) + bias
y = (linear_combination > 0).astype(int)

print("==== Logistic Regression with L1 & L2 Regularization ====")
model = LogisticRegression(lr=0.1, n_iters=1000, l1_lambda=0.1, l2_lambda=0.1)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
# ✅ Why Feature Scaling?
# Speeds up convergence of gradient descent.
# Prevents domination by features with large magnitudes.
# Especially critical for Logistic Regression.

In [None]:
class LogisticRegression:
    """
    Logistic Regression with:
    - L1 & L2 Regularization
    - Feature Scaling (Standardization)
    """

    def __init__(self, lr=0.01, n_iters=1000, l1_lambda=0.0, l2_lambda=0.0):
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _scale_features(self, X, fit=True):
        """
        Standardize features (Z-score normalization).
        """
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1  # Avoid division by zero
        return (X - self.scaler_mean) / self.scaler_std

    def fit(self, X, y):
        X_scaled = self._scale_features(X, fit=True)
        n_samples, n_features = X_scaled.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X_scaled, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X_scaled.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Regularization
            dw += self.l2_lambda * self.weights
            dw += self.l1_lambda * np.sign(self.weights)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict_proba(self, X):
        X_scaled = self._scale_features(X, fit=False)
        linear_model = np.dot(X_scaled, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

    def evaluate(self, X, y_true):
        y_pred_proba = self.predict_proba(X)
        y_pred_labels = self.predict(X)
        accuracy = np.mean(y_true == y_pred_labels)
        log_loss = -np.mean(
            y_true * np.log(y_pred_proba + 1e-15) + (1 - y_true) * np.log(1 - y_pred_proba + 1e-15)
        )
        return {"accuracy": accuracy, "log_loss": log_loss}


# ==== Synthetic Test Case ====
np.random.seed(42)
n_samples = 100
X = np.random.randn(n_samples, 2) * 10  # Scaled features to test scaling
true_weights = np.array([2, -1])
bias = -0.5
linear_combination = np.dot(X, true_weights) + bias
y = (linear_combination > 0).astype(int)

print("==== Logistic Regression with L1/L2 Regularization and Feature Scaling ====")
model = LogisticRegression(lr=0.1, n_iters=1000, l1_lambda=0.1, l2_lambda=0.1)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
def test_logistic_regression():
    np.random.seed(42)
    # Synthetic binary classification: y = 1 if 2*x1 - x2 - 0.5 > 0 else 0
    X = np.random.randn(100, 2) * 10  # Test scaling as well
    true_weights = np.array([2, -1])
    bias = -0.5
    y = (np.dot(X, true_weights) + bias > 0).astype(int)

    model = LogisticRegression(
        lr=0.1,
        n_iters=1000,
        l1_lambda=0.1,
        l2_lambda=0.1
    )
    model.fit(X, y)
    metrics = model.evaluate(X, y)

    # ✅ Test assertions
    assert metrics['accuracy'] >= 0.95, f"Expected high accuracy, got {metrics['accuracy']}"
    assert metrics['log_loss'] < 0.2, f"Expected low log loss, got {metrics['log_loss']}"
    print("✅ Logistic Regression Test Passed:", metrics)

In [None]:
# Loss function calculation and early stopping
class LogisticRegression:
    """
    Logistic Regression with:
    - L1 & L2 Regularization
    - Feature Scaling
    - Binary Cross-Entropy Loss Calculation
    - Early Stopping
    """

    def __init__(self, lr=0.01, n_iters=1000, l1_lambda=0.0, l2_lambda=0.0,
                 early_stopping=False, tol=1e-4, patience=10):
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.early_stopping = early_stopping
        self.tol = tol
        self.patience = patience
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _scale_features(self, X, fit=True):
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1
        return (X - self.scaler_mean) / self.scaler_std

    def _compute_loss(self, y_true, y_pred):
        # Binary Cross Entropy Loss + Regularization
        bce_loss = -np.mean(
            y_true * np.log(y_pred + 1e-15) + (1 - y_true) * np.log(1 - y_pred + 1e-15)
        )
        l1_penalty = self.l1_lambda * np.sum(np.abs(self.weights))
        l2_penalty = self.l2_lambda * 0.5 * np.sum(self.weights ** 2)
        return bce_loss + l1_penalty + l2_penalty

    def fit(self, X, y):
        X_scaled = self._scale_features(X, fit=True)
        n_samples, n_features = X_scaled.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        best_loss = np.inf
        patience_counter = 0

        for i in range(self.n_iters):
            linear_model = np.dot(X_scaled, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            # Gradients
            dw = (1 / n_samples) * np.dot(X_scaled.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Regularization Gradients
            dw += self.l2_lambda * self.weights
            dw += self.l1_lambda * np.sign(self.weights)

            # Update
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Calculate Loss for Early Stopping
            if self.early_stopping:
                loss = self._compute_loss(y, y_pred)
                if loss + self.tol < best_loss:
                    best_loss = loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= self.patience:
                        print(f"Early stopping at iteration {i}, Loss: {loss:.4f}")
                        break

    def predict_proba(self, X):
        X_scaled = self._scale_features(X, fit=False)
        linear_model = np.dot(X_scaled, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

    def evaluate(self, X, y_true):
        y_pred_proba = self.predict_proba(X)
        y_pred_labels = self.predict(X)
        accuracy = np.mean(y_true == y_pred_labels)
        log_loss = self._compute_loss(y_true, y_pred_proba)
        return {"accuracy": accuracy, "log_loss": log_loss}

In [None]:
# multiclass classification problem

# ✅ Key Notes:
# Uses one-hot encoding internally for gradient computation.
# Loss printed every 100 iterations for monitoring.
# Numerically stable softmax (avoids overflow).
# Supports easy switching between binary & multi-class via softmax.

class SoftmaxRegression:
    """
    Multiclass Logistic Regression (Softmax Classifier)
    - Supports L2 Regularization
    - Feature Scaling (Standardization)
    """
    def __init__(self, lr=0.1, n_iters=1000, l2_lambda=0.0):
        self.lr = lr
        self.n_iters = n_iters
        self.l2_lambda = l2_lambda
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Stability
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _scale_features(self, X, fit=True):
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1
        return (X - self.scaler_mean) / self.scaler_std

    def _compute_loss(self, y_true, y_pred):
        n_samples = y_true.shape[0]
        # Cross-Entropy Loss with L2 Regularization
        log_probs = -np.log(y_pred[range(n_samples), y_true] + 1e-15)
        ce_loss = np.mean(log_probs)
        l2_penalty = self.l2_lambda * 0.5 * np.sum(self.weights ** 2)
        return ce_loss + l2_penalty

    def fit(self, X, y):
        X_scaled = self._scale_features(X, fit=True)
        n_samples, n_features = X_scaled.shape
        n_classes = np.max(y) + 1

        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)

        for i in range(self.n_iters):
            logits = np.dot(X_scaled, self.weights) + self.bias
            y_pred = self._softmax(logits)

            # One-hot encoding of y
            y_one_hot = np.zeros_like(y_pred)
            y_one_hot[np.arange(n_samples), y] = 1

            # Gradients
            dw = (1 / n_samples) * np.dot(X_scaled.T, (y_pred - y_one_hot))
            db = (1 / n_samples) * np.sum(y_pred - y_one_hot, axis=0)

            # L2 Regularization
            dw += self.l2_lambda * self.weights

            # Update weights & bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Optional: Loss monitoring
            if i % 100 == 0 or i == self.n_iters - 1:
                loss = self._compute_loss(y, y_pred)
                print(f"Iteration {i}, Loss: {loss:.4f}")

    def predict_proba(self, X):
        X_scaled = self._scale_features(X, fit=False)
        logits = np.dot(X_scaled, self.weights) + self.bias
        return self._softmax(logits)

    def predict(self, X):
        y_pred = self.predict_proba(X)
        return np.argmax(y_pred, axis=1)

    def evaluate(self, X, y_true):
        y_pred = self.predict(X)
        accuracy = np.mean(y_true == y_pred)
        return {"accuracy": accuracy}

In [None]:
np.random.seed(42)

# Synthetic dataset (3 classes)
n_samples = 300
n_features = 2
n_classes = 3
X = np.random.randn(n_samples, n_features) * 2
y = np.random.choice(n_classes, n_samples)

model = SoftmaxRegression(lr=0.1, n_iters=1000, l2_lambda=0.01)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:\n", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
# multilabel classification
import numpy as np

class MultiLabelLogisticRegression:
    """
    Multilabel Logistic Regression using Sigmoid per class
    - Each label is treated as a binary classification task
    - Supports L2 Regularization
    - Feature Scaling included
    """

    def __init__(self, lr=0.1, n_iters=1000, l2_lambda=0.0):
        self.lr = lr
        self.n_iters = n_iters
        self.l2_lambda = l2_lambda
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _scale_features(self, X, fit=True):
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1
        return (X - self.scaler_mean) / self.scaler_std

    def _compute_loss(self, y_true, y_pred):
        # Binary Cross Entropy loss for each class
        bce_loss = -np.mean(y_true * np.log(y_pred + 1e-15) +
                            (1 - y_true) * np.log(1 - y_pred + 1e-15))
        l2_penalty = self.l2_lambda * 0.5 * np.sum(self.weights ** 2)
        return bce_loss + l2_penalty

    def fit(self, X, Y):
        """
        X: shape (n_samples, n_features)
        Y: shape (n_samples, n_classes), binary 0/1
        """
        X_scaled = self._scale_features(X, fit=True)
        n_samples, n_features = X_scaled.shape
        n_classes = Y.shape[1]

        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)

        for i in range(self.n_iters):
            linear = np.dot(X_scaled, self.weights) + self.bias
            y_pred = self._sigmoid(linear)

            # Gradients
            dw = (1 / n_samples) * np.dot(X_scaled.T, (y_pred - Y))
            db = (1 / n_samples) * np.sum(y_pred - Y, axis=0)

            # L2 regularization
            dw += self.l2_lambda * self.weights

            # Update
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            if i % 100 == 0 or i == self.n_iters - 1:
                loss = self._compute_loss(Y, y_pred)
                print(f"Iteration {i}, Loss: {loss:.4f}")

    def predict_proba(self, X):
        X_scaled = self._scale_features(X, fit=False)
        logits = np.dot(X_scaled, self.weights) + self.bias
        return self._sigmoid(logits)

    def predict(self, X, threshold=0.5):
        proba = self.predict_proba(X)
        return (proba >= threshold).astype(int)

    def evaluate(self, X, Y_true, threshold=0.5):
        Y_pred = self.predict(X, threshold)
        accuracy = np.mean(Y_pred == Y_true)
        return {"accuracy": accuracy}

In [None]:
np.random.seed(42)

# 100 samples, 5 features, 3 labels
X = np.random.randn(100, 5)
Y = np.random.randint(0, 2, size=(100, 3))  # multilabel ground truth

model = MultiLabelLogisticRegression(lr=0.1, n_iters=1000)
model.fit(X, Y)

proba = model.predict_proba(X)
preds = model.predict(X)
metrics = model.evaluate(X, Y)
print("Metrics:", metrics)

In [None]:
def compute_auc_riemann(y_true, y_scores, method="left"):
    thresholds = np.linspace(0, 1, 100)
    tpr_list, fpr_list = [], []

    for thresh in thresholds:
        y_pred = (y_scores >= thresh).astype(int)
        TP = np.sum((y_true == 1) & (y_pred == 1))
        TN = np.sum((y_true == 0) & (y_pred == 0))
        FP = np.sum((y_true == 0) & (y_pred == 1))
        FN = np.sum((y_true == 1) & (y_pred == 0))

        tpr = TP / (TP + FN + 1e-15)
        fpr = FP / (FP + TN + 1e-15)
        tpr_list.append(tpr)
        fpr_list.append(fpr)

    tpr = np.array(tpr_list)
    fpr = np.array(fpr_list)

    # Sort by FPR
    sorted_idx = np.argsort(fpr)
    fpr = fpr[sorted_idx]
    tpr = tpr[sorted_idx]

    auc = 0
    for i in range(len(fpr) - 1):
        width = fpr[i+1] - fpr[i]
        height = tpr[i] if method == "left" else tpr[i+1]
        auc += width * height

    return auc


np.random.seed(0)
y_true = np.array([0, 1, 1, 0, 1, 0, 1])
y_scores = np.array([0.1, 0.8, 0.65, 0.2, 0.95, 0.4, 0.9])

fpr, tpr, auc = compute_roc_auc(y_true, y_scores)
print(f"AUC Score: {auc:.4f}")

In [None]:
def compute_auc_trapezoid(y_true, y_scores):
    fpr, tpr, _ = compute_roc_auc(y_true, y_scores)
    auc = np.trapz(tpr, fpr)
    return auc

In [None]:
def binary_classification_metrics(y_true, y_pred, y_proba=None):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    accuracy = (TP + TN) / len(y_true)
    precision = TP / (TP + FP + 1e-15)
    recall = TP / (TP + FN + 1e-15)
    f1 = 2 * precision * recall / (precision + recall + 1e-15)

    metrics = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

    # ROC-AUC (only if probabilities provided)
    if y_proba is not None:
        fpr, tpr, _ = roc_curve(y_true, y_proba)
        auc = np.trapz(tpr, fpr)
        metrics["roc_auc"] = auc

    return metrics


# Example binary test
np.random.seed(0)
y_true = np.array([0, 1, 1, 0, 1, 0, 1])
y_proba = np.array([0.1, 0.8, 0.65, 0.2, 0.95, 0.4, 0.9])
y_pred = (y_proba >= 0.5).astype(int)

metrics = binary_classification_metrics(y_true, y_pred, y_proba)
print(metrics)

In [None]:
def top_k_accuracy(y_true, y_pred_proba, k=3):
    """
    y_true: shape (n_samples,)
    y_pred_proba: shape (n_samples, n_classes)
    """
    top_k_preds = np.argsort(y_pred_proba, axis=1)[:, -k:]  # top-k class indices
    correct = 0
    for i in range(len(y_true)):
        if y_true[i] in top_k_preds[i]:
            correct += 1
    return correct / len(y_true)

# Example multiclass test
y_true = np.array([0, 1, 2])
y_pred_proba = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.8, 0.1],
    [0.2, 0.2, 0.6]
])
print("Top-1 Accuracy:", top_k_accuracy(y_true, y_pred_proba, k=1))
print("Top-2 Accuracy:", top_k_accuracy(y_true, y_pred_proba, k=2))
