# Logistic Regression

In [5]:
#------------ No New Package --------------
import numpy as np
#---------------------------------------------------------

class LogisticRegression:
    """
    Logistic Regression classifier using Stochastic Gradient Descent (SGD)
    for binary classification problems.
    """

    def __init__(self, learning_rate=0.01, n_epochs=100, batch_size=32):
        """
        Initialize the Logistic Regression model.

        Parameters:
        -----------
        learning_rate : float
            The step size for updating model parameters
        n_epochs : int
            Number of passes through the training data
        batch_size : int
            Number of training examples to use in each gradient update
        """
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.w = None  # weights
        self.b = None  # bias

    def sigmoid(self, z):
        """
        Compute the sigmoid activation function.

        Formula:
        σ(z) = 1 / (1 + e^(-z))
        where z = wx + b

        Parameters:
        -----------
        z : array-like
            Input values

        Returns:
        --------
        array-like
            Sigmoid of input values
        """
        return 1/(1+ np.exp(-z))

    def initialize_parameters(self, n_features):
        """
        Initialize model parameters using Xavier initialization.

        Formula for Xavier initialization:
        w ~ N(0, sqrt(2/n_features))

        Parameters:
        -----------
        n_features : int
            Number of input features
        """
        # Xavier initialization for better convergence
        self.w = np.random.randn(n_features) * np.sqrt(2.0 / n_features)
        self.b = 0

    def compute_loss(self, y_true, y_pred):
        """
        Compute binary cross-entropy loss.

        Formula:
        L = -1/N * Σ(y * log(ŷ) + (1-y) * log(1-ŷ))
        where:
        - y is true label
        - ŷ is predicted probability
        - N is number of samples

        Parameters:
        -----------
        y_true : array-like
            True binary labels  
        y_pred : array-like
            Predicted probabilities

        Returns:
        --------
        float
            Average binary cross-entropy loss
        """
     
        loss = -np.mean(y_true * np.log(y_pred + 1e-15) + (1 - y_true) * np.log(1 - y_pred + 1e-15))
        return loss

    def compute_gradients(self, X_batch, y_batch, y_pred):
        """
        Compute gradients for weights and bias.

        Formulas:
        ∂L/∂w = 1/N * X^T * (ŷ - y)
        ∂L/∂b = 1/N * Σ(ŷ - y)
        where:
        - X is input features
        - y is true labels
        - ŷ is predicted probabilities
        - N is batch size

        Parameters:
        -----------
        X_batch : array-like
            Input features for current batch
        y_batch : array-like
            True labels for current batch
        y_pred : array-like
            Predicted probabilities for current batch

        Returns:
        --------
        tuple
            Gradients for weights and bias
        """
        n = y_batch.shape[0]
        error = y_pred - y_batch 
        dw = (1/n)* np.dot(X_batch.T, error)
        db = (1/n) * np.sum(error)
        return dw, db


    def fit(self, X, y):
        """
        Train the logistic regression model using mini-batch SGD.

        Process:
        1. Initialize parameters
        2. For each epoch:
            a. Shuffle data
            b. Split into mini-batches
            c. For each mini-batch:
                - Compute forward pass (sigmoid)
                - Compute gradients
                - Update parameters

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        y : array-like of shape (n_samples,)
            Target values
        """
        # Initialize Parameters
        n_samples, n_features = X.shape
        self.initialize_parameters(n_features)

        # Epoch Loop
        for epoch in range(self.n_epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch training
            for i in range(0, n_samples, self.batch_size):
                
                # Get Batch Data
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z = np.dot(X_batch, self.w) + self.b
                y_pred = self.sigmoid(z)

                # Compute gradients
                dw, db = self.compute_gradients(X_batch, y_batch, y_pred)

                # Update parameters
                self.w -= self.learning_rate * dw
                self.b -= self.learning_rate * db       

            # Calculate and trace the loss
            pred = self.sigmoid(np.dot(X, self.w) + self.b)
            loss = self.compute_loss(y, pred)
            print(f"Epoch {epoch + 1}/{self.n_epochs}, Loss: {loss}")


    def predict_proba(self, X):
        """
        Predict class probabilities for input samples.

        Formula:
        P(y=1|x) = σ(wx + b)
        where σ is the sigmoid function

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted probabilities
        """
        z = np.dot(X, self.w) + self.b
        return self.sigmoid(z)

    def predict(self, X, threshold=0.5):
        """
        Predict class labels for input samples.

        Formula:
        y = 1 if P(y=1|x) >= threshold else 0

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples
        threshold : float
            Classification threshold

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted class labels (0 or 1)
        """
        y_prob = self.predict_proba(X)
        return (y_prob >= threshold).astype(int)

In [6]:
# Test Code
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Generate synthetic data
np.random.seed(42)  # For reproducibility
n_samples = 1000
n_features = 10

# Generate random features
X = np.random.randn(n_samples, n_features)

# Generate binary target labels (0 or 1)
# Let's create labels based on a linear combination of features plus some noise
weights = np.random.randn(n_features)
bias = np.random.randn(1)
y_prob = 1 / (1 + np.exp(-(np.dot(X, weights) + bias)))  # Sigmoid to get probabilities
y = (y_prob > 0.5).astype(int).flatten()  # Convert probabilities to binary labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the Logistic Regression model
model = LogisticRegression(learning_rate=0.01, n_epochs=100, batch_size=32)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output the results
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)


Epoch 1/100, Loss: 0.7710158504566229
Epoch 2/100, Loss: 0.7272705759876165
Epoch 3/100, Loss: 0.6873079361502843
Epoch 4/100, Loss: 0.6508780694633292
Epoch 5/100, Loss: 0.6177744178187602
Epoch 6/100, Loss: 0.5877167692359556
Epoch 7/100, Loss: 0.5604594821965195
Epoch 8/100, Loss: 0.535702362205049
Epoch 9/100, Loss: 0.5132625705296096
Epoch 10/100, Loss: 0.4928812983677808
Epoch 11/100, Loss: 0.47434697959347305
Epoch 12/100, Loss: 0.45746430459759746
Epoch 13/100, Loss: 0.44208171557269454
Epoch 14/100, Loss: 0.4280448218921059
Epoch 15/100, Loss: 0.41518947933501144
Epoch 16/100, Loss: 0.40339282918858976
Epoch 17/100, Loss: 0.39255358824001746
Epoch 18/100, Loss: 0.38256620562995663
Epoch 19/100, Loss: 0.37334290006042165
Epoch 20/100, Loss: 0.3648011974227194
Epoch 21/100, Loss: 0.35688138169906103
Epoch 22/100, Loss: 0.3495218492253023
Epoch 23/100, Loss: 0.3426588713511623
Epoch 24/100, Loss: 0.336255078502004
Epoch 25/100, Loss: 0.3302583139781538
Epoch 26/100, Loss: 0.32463

# Softmax Regression (Multinomial Logistic Regression)

In [28]:
import numpy as np

class SoftmaxRegression:
    """
    Softmax Regression classifier using Stochastic Gradient Descent (SGD)
    for multiclass classification problems.
    """

    def __init__(self, learning_rate=0.01, n_epochs=100, batch_size=32):
        """
        Initialize the Softmax Regression model.

        Parameters:
        -----------
        learning_rate : float
            The step size for updating model parameters
        n_epochs : int
            Number of passes through the training data
        batch_size : int
            Number of training examples to use in each gradient update
        """
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.W = None  # weights
        self.b = None  # biases

    def softmax(self, z):
        """
        Compute the softmax function.

        Formula:
        softmax(z) = exp(z) / Σ exp(z)
        where z = Wx + b

        Parameters:
        -----------
        z : array-like of shape (n_samples, n_classes)
            Input values

        Returns:
        --------
        array-like of shape (n_samples, n_classes)
            Softmax probabilities for each class
        """
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def initialize_parameters(self, n_features, n_classes):
        """
        Initialize model parameters using Xavier initialization.

        Parameters:
        -----------
        n_features : int
            Number of input features
        n_classes : int
            Number of output classes
        """
        # Xavier initialization for weights
        self.W = np.random.randn(n_features, n_classes) * np.sqrt(2.0 / n_features)
        self.b = np.zeros((1, n_classes))

    def compute_loss(self, y_true, y_pred):
        """
        Compute cross-entropy loss for multiclass classification.

        Formula:
        L = -1/N * Σ Σ(y * log(ŷ))
        where:
        - y is true label
        - ŷ is predicted probability
        - N is number of samples

        Parameters:
        -----------
        y_true : array-like of shape (n_samples, n_classes)
            True one-hot encoded labels
        y_pred : array-like of shape (n_samples, n_classes)
            Predicted probabilities

        Returns:
        --------
        float
            Average cross-entropy loss
        """
        m = y_true.shape[0]
        loss = -np.sum(y_true * np.log(y_pred + 1e-15)) / m
        return loss

    def compute_gradients(self, X_batch, y_batch, y_pred):
        """
        Compute gradients for weights and biases.

        Formulas:
        ∂L/∂W = 1/N * X^T * (ŷ - y)
        ∂L/∂b = 1/N * Σ(ŷ - y)

        Parameters:
        -----------
        X_batch : array-like
            Input features for current batch
        y_batch : array-like
            True labels for current batch
        y_pred : array-like
            Predicted probabilities for current batch

        Returns:
        --------
        tuple
            Gradients for weights and biases
        """
        m = X_batch.shape[0]
        dW = (1 / m) * np.dot(X_batch.T, (y_pred - y_batch))
        db = (1 / m) * np.sum(y_pred - y_batch, axis=0, keepdims=True)
        return dW, db

    def fit(self, X, y):
        """
        Train the softmax regression model using mini-batch SGD.

        Process:
        1. Initialize parameters
        2. For each epoch:
            a. Shuffle data
            b. Split into mini-batches
            c. For each mini-batch:
                - Compute forward pass (softmax)
                - Compute gradients
                - Update parameters

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        y : array-like of shape (n_samples,)
            Target values (class labels)
        """
        # Initialize Parameters
        n_samples, n_features = X.shape
        n_classes = y.shape[1]
        
        self.initialize_parameters(n_features, n_classes)

        # Epoch Loop
        for epoch in range(self.n_epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch training
            for i in range(0, n_samples, self.batch_size):
                # Get Batch Data
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z = np.dot(X_batch, self.W) + self.b
                y_pred = self.softmax(z)

                # Compute gradients
                dW, db = self.compute_gradients(X_batch, y_batch, y_pred)

                # Update parameters
                self.W -= self.learning_rate * dW
                self.b -= self.learning_rate * db

            # Calculate and trace the loss
        pred = self.softmax(np.dot(X, self.W) + self.b)
        loss = self.compute_loss(y, pred)
        print(f"Epoch {epoch + 1}/{self.n_epochs}, Loss: {loss}")


    def predict_proba(self, X):
        """
        Predict class probabilities for input samples.

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples, n_classes)
            Predicted probabilities for each class
        """
        z = np.dot(X, self.W) + self.b
        return self.softmax(z)

    def predict(self, X):
        """
        Predict class labels for input samples.

        Formula:
        y = argmax(P(y|x))

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted class labels
        """
        y_proba = self.predict_proba(X)
        return np.argmax(y_proba, axis=1)


In [29]:
# Test Code
import numpy as np
import requests
import gzip
import os
from urllib.request import urlretrieve
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

# URLs to download the Fashion MNIST dataset
BASE_URL = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
FILENAMES = {
    "train_images": "train-images-idx3-ubyte.gz",
    "train_labels": "train-labels-idx1-ubyte.gz",
    "test_images": "t10k-images-idx3-ubyte.gz",
    "test_labels": "t10k-labels-idx1-ubyte.gz",
}

# Download and extract Fashion MNIST dataset
def download_and_load_mnist(filename, num_items, item_size, reshape_dims=None):
    if not os.path.exists(filename):
        urlretrieve(BASE_URL + filename.split("/")[-1], filename)

    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=item_size)
        if reshape_dims:
            return data.reshape(num_items, *reshape_dims)
        else:
            return data

# Load datasets
def load_fashion_mnist():
    train_images = download_and_load_mnist("train-images-idx3-ubyte.gz", 60000, 16, (28, 28))
    train_labels = download_and_load_mnist("train-labels-idx1-ubyte.gz", 60000, 8)
    test_images = download_and_load_mnist("t10k-images-idx3-ubyte.gz", 10000, 16, (28, 28))
    test_labels = download_and_load_mnist("t10k-labels-idx1-ubyte.gz", 10000, 8)

    # Reshape and normalize images
    train_images = train_images.reshape(60000, 28*28) / 255.0
    test_images = test_images.reshape(10000, 28*28) / 255.0

    return train_images, train_labels, test_images, test_labels

# One-hot encoding for labels
def one_hot_encode(labels, num_classes=10):
    encoder = OneHotEncoder(categories=[range(num_classes)], sparse_output=False)
    return encoder.fit_transform(labels.reshape(-1, 1))

# Load data
X_train, y_train, X_test, y_test = load_fashion_mnist()
y_train_encoded = one_hot_encode(y_train)
y_test_encoded = one_hot_encode(y_test)

# Initialize and train the Softmax Regression model
model = SoftmaxRegression(learning_rate=0.1, n_epochs=50, batch_size=64)
model.fit(X_train, y_train_encoded)

# Predict on the test set
y_pred_proba = model.predict_proba(X_test)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)


Epoch 50/50, Loss: 0.41327039373727276
Accuracy: 0.8346
Confusion Matrix:
[[815   2  37  89   3   2  40   0  12   0]
 [  3 952   6  31   4   0   2   0   2   0]
 [ 20   3 864  18  69   1  15   1   9   0]
 [ 21   8  29 906  18   0  12   0   6   0]
 [  0   0 228  59 676   0  32   0   5   0]
 [  0   0   0   1   0 935   0  41   4  19]
 [153   2 237  85 111   1 385   0  26   0]
 [  0   0   0   0   0  43   0 911   0  46]
 [  6   1  18  14   1   4   6   6 944   0]
 [  0   0   0   0   0  16   0  25   1 958]]
