# Logistic Regression

In [11]:
#------------ No New Package --------------
import numpy as np
#---------------------------------------------------------

class LogisticRegression:
    """
    Logistic Regression classifier using Stochastic Gradient Descent (SGD)
    for binary classification problems.
    """

    def __init__(self, learning_rate=0.01, n_epochs=100, batch_size=32):
        """
        Initialize the Logistic Regression model.

        Parameters:
        -----------
        learning_rate : float
            The step size for updating model parameters
        n_epochs : int
            Number of passes through the training data
        batch_size : int
            Number of training examples to use in each gradient update
        """
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.w = None  # weights
        self.b = None  # bias

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
        """
        Compute the sigmoid activation function.

        Formula:
        σ(z) = 1 / (1 + e^(-z))
        where z = wx + b

        Parameters:
        -----------
        z : array-like
            Input values

        Returns:
        --------
        array-like
            Sigmoid of input values
        """
        # ===== Insert your code here =====

    def initialize_parameters(self, n_features):
        self.w = np.random.randn(n_features) * np.sqrt(2 / n_features)
        self.b = 0

        """
        Initialize model parameters using Xavier initialization.

        Formula for Xavier initialization:
        w ~ N(0, sqrt(2/n_features))

        Parameters:
        -----------
        n_features : int
            Number of input features
        """
        # Xavier initialization for better convergence
        # ===== Insert your code here =====

    def compute_loss(self, y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss

        """
        Compute binary cross-entropy loss.

        Formula:
        L = -1/N * Σ(y * log(ŷ) + (1-y) * log(1-ŷ))
        where:
        - y is true label
        - ŷ is predicted probability
        - N is number of samples

        Parameters:
        -----------
        y_true : array-like
            True binary labels
        y_pred : array-like
            Predicted probabilities

        Returns:
        --------
        float
            Average binary cross-entropy loss
        """
        # ===== Insert your code here =====

    def compute_gradients(self, X_batch, y_batch, y_pred):
        m = X_batch.shape[0]
        dw = np.dot(X_batch.T, (y_pred - y_batch)) / m
        db = np.sum(y_pred - y_batch) / m
        return dw, db
        """
        Compute gradients for weights and bias.

        Formulas:
        ∂L/∂w = 1/N * X^T * (ŷ - y)
        ∂L/∂b = 1/N * Σ(ŷ - y)
        where:
        - X is input features
        - y is true labels
        - ŷ is predicted probabilities
        - N is batch size

        Parameters:
        -----------
        X_batch : array-like
            Input features for current batch
        y_batch : array-like
            True labels for current batch
        y_pred : array-like
            Predicted probabilities for current batch

        Returns:
        --------
        tuple
            Gradients for weights and bias
        """
        # ===== Insert your code here =====

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.initialize_parameters(n_features)

        """
        Train the logistic regression model using mini-batch SGD.

        Process:
        1. Initialize parameters
        2. For each epoch:
            a. Shuffle data
            b. Split into mini-batches
            c. For each mini-batch:
                - Compute forward pass (sigmoid)
                - Compute gradients
                - Update parameters

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        y : array-like of shape (n_samples,)
            Target values
        """
        # Initialize Parameters
        # ===== Insert your code here =====

        # Epoch Loop
        for epoch in range(self.n_epochs):
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            # Shuffle the data
            # ===== Insert your code here =====

            # Mini-batch training
            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]
    
                y_pred = self.sigmoid(np.dot(X_batch, self.w) + self.b)
                dw, db = self.compute_gradients(X_batch, y_batch, y_pred)
    
                self.w -= self.learning_rate * dw
                self.b -= self.learning_rate * db
                # Get Batch Data
                # ===== Insert your code here =====

                # Forward pass
                # ===== Insert your code here =====

                # Compute gradients
                # ===== Insert your code here =====

                # Update parameters
                # ===== Insert your code here =====

            # Calculate and trace the loss
            # ===== Insert your code here =====


    def predict_proba(self, X):
        return self.sigmoid(np.dot(X, self.w) + self.b)
        """
        Predict class probabilities for input samples.

        Formula:
        P(y=1|x) = σ(wx + b)
        where σ is the sigmoid function

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted probabilities
        """
        # ===== Insert your code here =====

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

        """
        Predict class labels for input samples.

        Formula:
        y = 1 if P(y=1|x) >= threshold else 0

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples
        threshold : float
            Classification threshold

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted class labels (0 or 1)
        """
        # ===== Insert your code here =====

In [12]:
# Test Code
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Generate synthetic data
np.random.seed(42)  # For reproducibility
n_samples = 1000
n_features = 10

# Generate random features
X = np.random.randn(n_samples, n_features)

# Generate binary target labels (0 or 1)
# Let's create labels based on a linear combination of features plus some noise
weights = np.random.randn(n_features)
bias = np.random.randn(1)
y_prob = 1 / (1 + np.exp(-(np.dot(X, weights) + bias)))  # Sigmoid to get probabilities
y = (y_prob > 0.5).astype(int).flatten()  # Convert probabilities to binary labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the Logistic Regression model
model = LogisticRegression(learning_rate=0.01, n_epochs=100, batch_size=32)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output the results
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.98
Confusion Matrix:
[[ 63   2]
 [  2 133]]


# Softmax Regression (Multinomial Logistic Regression)

In [15]:
import numpy as np

class SoftmaxRegression:
    """
    Softmax Regression classifier using Stochastic Gradient Descent (SGD)
    for multiclass classification problems.
    """

    def __init__(self, learning_rate=0.01, n_epochs=100, batch_size=32):
        """
        Initialize the Softmax Regression model.

        Parameters:
        -----------
        learning_rate : float
            The step size for updating model parameters
        n_epochs : int
            Number of passes through the training data
        batch_size : int
            Number of training examples to use in each gradient update
        """
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.W = None  # weights
        self.b = None  # biases

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)
        """
        Compute the softmax function.

        Formula:
        softmax(z) = exp(z) / Σ exp(z)
        where z = Wx + b

        Parameters:
        -----------
        z : array-like of shape (n_samples, n_classes)
            Input values

        Returns:
        --------
        array-like of shape (n_samples, n_classes)
            Softmax probabilities for each class
        """
        # ===== Insert your code here =====

    def initialize_parameters(self, n_features, n_classes):
        self.W = np.random.randn(n_features, n_classes) * np.sqrt(2 / n_features)
        self.b = np.zeros((1, n_classes))

        """
        Initialize model parameters using Xavier initialization.

        Parameters:
        -----------
        n_features : int
            Number of input features
        n_classes : int
            Number of output classes
        """
        # Xavier initialization for weights
        # ===== Insert your code here =====

    def compute_loss(self, y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

        """
        Compute cross-entropy loss for multiclass classification.

        Formula:
        L = -1/N * Σ Σ(y * log(ŷ))
        where:
        - y is true label
        - ŷ is predicted probability
        - N is number of samples

        Parameters:
        -----------
        y_true : array-like of shape (n_samples, n_classes)
            True one-hot encoded labels
        y_pred : array-like of shape (n_samples, n_classes)
            Predicted probabilities

        Returns:
        --------
        float
            Average cross-entropy loss
        """
        # ===== Insert your code here =====

    def compute_gradients(self, X_batch, y_batch, y_pred):
        m = X_batch.shape[0]
        dW = np.dot(X_batch.T, (y_pred - y_batch)) / m
        db = np.sum(y_pred - y_batch, axis=0, keepdims=True) / m
        return dW, db

        """
        Compute gradients for weights and biases.

        Formulas:
        ∂L/∂W = 1/N * X^T * (ŷ - y)
        ∂L/∂b = 1/N * Σ(ŷ - y)

        Parameters:
        -----------
        X_batch : array-like
            Input features for current batch
        y_batch : array-like
            True labels for current batch
        y_pred : array-like
            Predicted probabilities for current batch

        Returns:
        --------
        tuple
            Gradients for weights and biases
        """
        # ===== Insert your code here =====

    def fit(self, X, y):
        """
        Train the softmax regression model using mini-batch SGD.

        Process:
        1. Initialize parameters
        2. For each epoch:
            a. Shuffle data
            b. Split into mini-batches
            c. For each mini-batch:
                - Compute forward pass (softmax)
                - Compute gradients
                - Update parameters

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        y : array-like of shape (n_samples,)
            Target values (class labels)
        """
        n_samples, n_features = X.shape
        n_classes = y.shape[1]
        self.initialize_parameters(n_features, n_classes)

        for epoch in range(self.n_epochs):
            # Shuffle the data at the beginning of each epoch
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            # Mini-batch training
            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i + self.batch_size]
                y_batch = y_shuffled[i:i + self.batch_size]

                # Forward pass
                y_pred = self.softmax(np.dot(X_batch, self.W) + self.b)

                # Compute gradients
                dW, db = self.compute_gradients(X_batch, y_batch, y_pred)

                # Update parameters
                self.W -= self.learning_rate * dW
                self.b -= self.learning_rate * db

            # Calculate and trace the loss for the entire dataset after each epoch
            y_pred_full = self.predict_proba(X)
            loss = self.compute_loss(y, y_pred_full)
            print(f"Epoch {epoch + 1}, Loss: {loss}")


            


    def predict_proba(self, X):
        return self.softmax(np.dot(X, self.W) + self.b)

        """
        Predict class probabilities for input samples.

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples, n_classes)
            Predicted probabilities for each class
        """
        # ===== Insert your code here =====

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

        """
        Predict class labels for input samples.

        Formula:
        y = argmax(P(y|x))

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input samples

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted class labels
        """
        # ===== Insert your code here =====


In [17]:
# Test Code
import numpy as np
import requests
import gzip
import os
from urllib.request import urlretrieve
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

# URLs to download the Fashion MNIST dataset
BASE_URL = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
FILENAMES = {
    "train_images": "train-images-idx3-ubyte.gz",
    "train_labels": "train-labels-idx1-ubyte.gz",
    "test_images": "t10k-images-idx3-ubyte.gz",
    "test_labels": "t10k-labels-idx1-ubyte.gz",
}

# Download and extract Fashion MNIST dataset
def download_and_load_mnist(filename, num_items, item_size, reshape_dims=None):
    if not os.path.exists(filename):
        urlretrieve(BASE_URL + filename.split("/")[-1], filename)

    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=item_size)
        if reshape_dims:
            return data.reshape(num_items, *reshape_dims)
        else:
            return data

# Load datasets
def load_fashion_mnist():
    train_images = download_and_load_mnist("train-images-idx3-ubyte.gz", 60000, 16, (28, 28))
    train_labels = download_and_load_mnist("train-labels-idx1-ubyte.gz", 60000, 8)
    test_images = download_and_load_mnist("t10k-images-idx3-ubyte.gz", 10000, 16, (28, 28))
    test_labels = download_and_load_mnist("t10k-labels-idx1-ubyte.gz", 10000, 8)

    # Reshape and normalize images
    train_images = train_images.reshape(60000, 28*28) / 255.0
    test_images = test_images.reshape(10000, 28*28) / 255.0

    return train_images, train_labels, test_images, test_labels

# One-hot encoding for labels
def one_hot_encode(labels, num_classes=10):
    encoder = OneHotEncoder(categories=[range(num_classes)], sparse=False)
    return encoder.fit_transform(labels.reshape(-1, 1))

# Load data
X_train, y_train, X_test, y_test = load_fashion_mnist()
y_train_encoded = one_hot_encode(y_train)
y_test_encoded = one_hot_encode(y_test)

# Initialize and train the Softmax Regression model
model = SoftmaxRegression(learning_rate=0.1, n_epochs=50, batch_size=64)
model.fit(X_train, y_train_encoded)

# Predict on the test set
y_pred_proba = model.predict_proba(X_test)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'