In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score

class LogisticRegressionGD:
    def __init__(self, learning_rate=0.1, tolerance=1e-6, max_iterations=60000):
        """
        Initialize the Logistic Regression model with gradient descent.
        Args:
        - learning_rate: Step size for gradient descent.
        - tolerance: Threshold for parameter change to stop iterations.
        - max_iterations: Maximum number of iterations to avoid infinite loop.
        """
        self.learning_rate = learning_rate
        self.tolerance = tolerance
        self.max_iterations = max_iterations
        self.theta_0 = None
        self.theta_1 = None
        self.theta_2 = None

    def sigmoid(self, z):
        """
        Sigmoid function.
        """
        return 1 / (1 + np.exp(-z))

    def predict_proba(self, X):
        """
        Predict probabilities for given input.
        """
        z = self.theta_0 + self.theta_1 * X[:, 0] + self.theta_2 * X[:, 1]
        return self.sigmoid(z)

    def compute_loss(self, y, y_pred):
        """
        Compute the cross-entropy loss.
        """
        epsilon = 1e-15  # To prevent log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

    def fit(self, x1, x2, y):
        """
        Fit the logistic regression model using gradient descent.
        Args:
        - x1: Feature 1.
        - x2: Feature 2.
        - y: Target labels.
        """
        # Initialize parameters
        self.theta_0 = -1
        self.theta_1 = 1.5
        self.theta_2 = 0.5

        N = len(y)
        iteration = 0
        change = np.inf  # Track change in parameters

        while iteration < self.max_iterations and change > self.tolerance:
            # Compute predictions
            f_theta = self.theta_0 + self.theta_1 * x1 + self.theta_2 * x2
            P = self.sigmoid(f_theta)

            # Compute gradients
            delta = P - y
            grad_theta_0 = np.sum(delta) / N
            grad_theta_1 = np.sum(delta * x1) / N
            grad_theta_2 = np.sum(delta * x2) / N

            # Update parameters
            new_theta_0 = self.theta_0 - self.learning_rate * grad_theta_0
            new_theta_1 = self.theta_1 - self.learning_rate * grad_theta_1
            new_theta_2 = self.theta_2 - self.learning_rate * grad_theta_2

            # Compute parameter change
            change = np.sqrt(
                (new_theta_0 - self.theta_0) ** 2
                + (new_theta_1 - self.theta_1) ** 2
                + (new_theta_2 - self.theta_2) ** 2
            )
            # print(change)

            # Update parameters for next iteration
            self.theta_0, self.theta_1, self.theta_2 = new_theta_0, new_theta_1, new_theta_2

            # Print parameters after first iteration
            if iteration == 0:
                print(
                    f"After 1 iteration: θ0 = {self.theta_0:.4f}, θ1 = {self.theta_1:.4f}, θ2 = {self.theta_2:.4f}"
                )

            iteration += 1

        print(f"Converged after {iteration} iterations with parameter change {change:.6f}.")

    def predict(self, x1, x2):
        """
        Predict probabilities for the input features.
        Args:
        - x1: Feature 1.
        - x2: Feature 2.
        Returns:
        - Probabilities of y = 1.
        """
        f_theta = self.theta_0 + self.theta_1 * x1 + self.theta_2 * x2
        return self.sigmoid(f_theta)

    def get_params(self):
        """
        Get the learned parameters.
        """
        return self.theta_0, self.theta_1, self.theta_2

    def evaluate(self, y_true, y_pred):
        """
        Evaluate the model using accuracy, precision, and recall.
        """
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        return accuracy, precision, recall


# Training data from Table 1
train_data = np.array([
    [0.346, 0.780, 0],
    [0.303, 0.439, 0],
    [0.358, 0.729, 0],
    [0.602, 0.863, 1],
    [0.790, 0.753, 1],
    [0.611, 0.965, 1]
])

x1_train = train_data[:, 0]
x2_train = train_data[:, 1]
y_train = train_data[:, 2]

# print(x1_train)
# print(x2_train)
# print(y_train)
# Test data from Table 2
test_data = np.array([
    [0.959, 0.382, 0],
    [0.750, 0.306, 0],
    [0.395, 0.760, 0],
    [0.823, 0.764, 1],
    [0.761, 0.874, 1],
    [0.844, 0.435, 1]
])

x1_test = test_data[:, 0]
x2_test = test_data[:, 1]
y_test = test_data[:, 2]

# Create and train the logistic regression model
model = LogisticRegressionGD(learning_rate=0.1, tolerance=1e-4, max_iterations=600000)
model.fit(x1_train, x2_train, y_train)

# Get learned parameters
theta_0, theta_1, theta_2 = model.get_params()
print(f"Final parameters: θ0 = {theta_0:.4f}, θ1 = {theta_1:.4f}, θ2 = {theta_2:.4f}")

# Test the model on the test set
proba_test = model.predict(x1_test, x2_test)
predicted_classes = (proba_test >= 0.5).astype(int)
accuracy, precision, recall = model.evaluate(y_test, predicted_classes)

# Compute training loss
loss_train = model.compute_loss(y_train, model.predict(x1_train, x2_train))

print(f"Training Loss: {loss_train:.4f}")
print(f"Test Set Evaluation:\nAccuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")


After 1 iteration: θ0 = -1.0032, θ1 = 1.5054, θ2 = 0.5020
Converged after 83655 iterations with parameter change 0.000100.
Final parameters: θ0 = -20.4000, θ1 = 30.1154, θ2 = 7.2970
Training Loss: 0.0086
Test Set Evaluation:
Accuracy: 0.6667, Precision: 0.6000, Recall: 1.0000
