In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
%%writefile ../../src/models/logistic_regression.py
import numpy as np

from src.evaluation.classification_metrics import ClassificationMetrics

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000, tolerance=1e-4, lambda_reg=0.1):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.tolerance = tolerance
        self.lambda_reg = lambda_reg

    def sigmoid(self, z):
        # Clip z to prevent overflow
        z = np.clip(z, -250, 250)
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        self.weights = np.zeros(X.shape[1])
        self.bias = 0
        previous_loss = float('inf')

        for i in range(self.num_iterations):
            # Calculate predictions
            z = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(z)

            # Compute the cost with epsilon to avoid log(0)
            epsilon = 1e-5
            regularization_term = (self.lambda_reg / (2 * len(y))) * np.sum(np.square(self.weights))
            cost = (-1 / len(y)) * np.sum(y * np.log(predictions + epsilon) + (1 - y) * np.log(1 - predictions + epsilon)) + regularization_term

            # Check for convergence
            if previous_loss - cost < self.tolerance:
                print(f"Convergence reached at iteration {i}.")
                break
            previous_loss = cost

            # Compute gradients with regularization (excluding bias term from regularization)
            dw = (1 / len(y)) * np.dot(X.T, (predictions - y)) + (self.lambda_reg / len(y)) * self.weights
            db = (1 / len(y)) * np.sum(predictions - y)

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

        print(f"Final loss: {cost}")

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        predictions = self.sigmoid(z)
        return [1 if i > 0.5 else 0 for i in predictions]

    def predict_proba(self, X):
        z = np.dot(X, self.weights) + self.bias
        predictions = self.sigmoid(z)
        return predictions

Writing ../../src/models/logistic_regression.py


In [17]:
import numpy as np
from src.data.load_dataset import load_spambase

from sklearn.model_selection import train_test_split

In [18]:
X, y = load_spambase()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
log_reg = LogisticRegression(learning_rate=0.00001, num_iterations=500000, tolerance=1e-8)
log_reg.fit(X_train, y_train)

Final loss: 0.3882142077655066


In [35]:
y_pred = log_reg.predict(X_test)
accuracy = np.mean(y_pred == y_test)
accuracy

0.8935939196525515

In [36]:
best_params = {
    "learning_rate": 0.00001,
    "num_iterations": 500000,
    "tolerance": 1e-8,
    "lambda_reg": 0.1
}

In [37]:
import os
import json

from src.config import EXPERIMENT_DIR

experiment_config = {
    "name": "custom_logistic_regression_w_best_params",
    "description": "Custom logistic regression with best hyperparameters.",
    "model_class": "src.models.random_forest.RandomForestClassifier",
    "model_params": best_params,
    "metrics": [
        "accuracy",
        "f1",
        "roc_auc"
    ]
}
experiment_dir = os.path.join(EXPERIMENT_DIR, 'custom_logistic_regression_w_best_params')
os.makedirs(experiment_dir, exist_ok=True)
exp_config_path = os.path.join(experiment_dir, 'config.json')
with open(exp_config_path, 'w') as file:
    json.dump(experiment_config, file, indent=4)    