In [5]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
import pandas as pd

# Logistic Regression from Scratch
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.05, max_iter=5000, regularization=None, lambda_=0.01, tol=1e-2):  # Fixed _init_ method
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.regularization = regularization
        self.lambda_ = lambda_  # Changed lambda to lambda_
        self.tol = tol  # Stopping tolerance for gradient magnitude
        self.theta = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def compute_loss(self, X, y):
        m = len(y)
        h = self.sigmoid(X @ self.theta)
        loss = -(1/m) * (y @ np.log(h) + (1 - y) @ np.log(1 - h))
        if self.regularization == 'l2':
            loss += (self.lambda_ / (2 * m)) * np.sum(np.square(self.theta[1:]))
        return loss

    def gradient(self, X, y):
        m = len(y)
        h = self.sigmoid(X @ self.theta)
        gradient = (1/m) * (X.T @ (h - y))
        if self.regularization == 'l2':
            gradient[1:] += (self.lambda_ / m) * self.theta[1:]
        return gradient
    
    def fit(self, X, y):
        m, n = X.shape
        self.theta = np.zeros(n)
        for i in range(self.max_iter):
            grad = self.gradient(X, y)
            grad_magnitude = np.linalg.norm(grad)
            if grad_magnitude < self.tol:  # Stop if gradient is small enough
                print(f"Converged after {i+1} iterations.")
                break
            self.theta -= self.learning_rate * grad
            
            if i % 500 == 0:  # Print loss value every 100 iterations
                print(f"Iteration {i}, Loss: {round(self.compute_loss(X, y),3)}")


    def predict_proba(self, X):
        return self.sigmoid(X @ self.theta)

    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)

X = pd.read_excel("../coffeeDataSynthesized.xlsx", "dataset")
y = np.where(X["type"] == "robusta", 0, 1)
y = pd.Series(y)

X = X[['width', 'height', 'depth', 'weight']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

# Train Logistic Regression from Scratch
model_scratch = LogisticRegressionScratch(learning_rate=0.05, max_iter=5000,  lambda_=0.1, tol=1e-2)
start_time = time.time()
model_scratch.fit(X_train, y_train)
scratch_time = time.time() - start_time

# Predictions and accuracy
y_train_pred_scratch = model_scratch.predict(X_train)
y_test_pred_scratch = model_scratch.predict(X_test)

train_accuracy_scratch = accuracy_score(y_train, y_train_pred_scratch)
test_accuracy_scratch = accuracy_score(y_test, y_test_pred_scratch)

# Measure training time for Scikit-learn Logistic Regression
start_time = time.time()
model_sklearn = LogisticRegression( C=10, max_iter=5000, solver='lbfgs', tol=1e-2)
model_sklearn.fit(X_train, y_train)  # Exclude intercept term
sklearn_time = time.time() - start_time

# Predictions and accuracy
y_train_pred_sklearn = model_sklearn.predict(X_train)
y_test_pred_sklearn = model_sklearn.predict(X_test)

train_accuracy_sklearn = accuracy_score(y_train, y_train_pred_sklearn)
test_accuracy_sklearn = accuracy_score(y_test, y_test_pred_sklearn)
# Report Results


print("Logistic Regression using Scikit-learn")
print(f"Train Accuracy: {train_accuracy_sklearn:.4f}")
print(f"Test Accuracy: {test_accuracy_sklearn:.4f}")
print(f"Runtime: {sklearn_time:.4f} seconds")

print("Logistic Regression from Scratch")
print(f"Train Accuracy: {train_accuracy_scratch:.4f}")
print(f"Test Accuracy: {test_accuracy_scratch:.4f}")
print(f"Runtime: {scratch_time:.4f} seconds\n")


Iteration 0, Loss: 0.692
Iteration 500, Loss: 0.635
Iteration 1000, Loss: 0.612
Iteration 1500, Loss: 0.598
Iteration 2000, Loss: 0.589
Iteration 2500, Loss: 0.583
Iteration 3000, Loss: 0.578
Iteration 3500, Loss: 0.575
Iteration 4000, Loss: 0.571
Converged after 4371 iterations.
Logistic Regression using Scikit-learn
Train Accuracy: 0.7634
Test Accuracy: 0.8388
Runtime: 0.0128 seconds
Logistic Regression from Scratch
Train Accuracy: 0.6488
Test Accuracy: 0.6529
Runtime: 4.0333 seconds



In [6]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_test_pred_scratch)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[ 48  72]
 [ 12 110]]


In [7]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_test_pred_sklearn)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[ 91  29]
 [ 10 112]]


In [8]:
from sklearn.metrics import accuracy_score, roc_auc_score, average_precision_score, recall_score, precision_score, f1_score

# Assuming y_true and y_pred are our true labels and predicted labels
# For probabilities, use y_pred_proba for AUROC and average precision.
model_scratch = LogisticRegressionScratch(learning_rate=0.05, max_iter=5000, regularization='l2', lambda_=0.1, tol=1e-2)

# Example: Classification accuracy
sk_accuracy = accuracy_score(y_test, y_test_pred_sklearn)
# Example: Recall, Precision, and F1-score
sk_recall = recall_score(y_test, y_test_pred_sklearn)
sk_precision = precision_score(y_test, y_test_pred_sklearn)
sk_f1 = f1_score(y_test, y_test_pred_sklearn)
sk_auroc = roc_auc_score(y_test, y_test_pred_sklearn)



sc_accuracy = accuracy_score(y_test, y_test_pred_scratch)
# Example: Recall, Precision, and F1-score
sc_recall = recall_score(y_test, y_test_pred_scratch)
sc_precision = precision_score(y_test, y_test_pred_scratch)
sc_f1 = f1_score(y_test, y_test_pred_scratch)
sc_auroc = roc_auc_score(y_test, y_test_pred_scratch)

# Print all metrics
print(f"Our Accuracy: {sc_accuracy:.4f}")
print(f"Our Recall: {sc_recall:.4f}")
print(f"Our Precision: {sc_precision:.4f}")
print(f"Our F1-score: {sc_f1:.4f}"),
print(f"Our AUROC: {sc_auroc:.4f}")
print()
print(f"Sklearn Accuracy: {sk_accuracy:.4f}")
print(f"Sklearn Recall: {sk_recall:.4f}")
print(f"Sklearn Precision: {sk_precision:.4f}")
print(f"Sklearn F1-score: {sk_f1:.4f}"),
print(f"Sklearn AUROC: {sk_auroc:.4f}")





Our Accuracy: 0.6529
Our Recall: 0.9016
Our Precision: 0.6044
Our F1-score: 0.7237
Our AUROC: 0.6508

Sklearn Accuracy: 0.8388
Sklearn Recall: 0.9180
Sklearn Precision: 0.7943
Sklearn F1-score: 0.8517
Sklearn AUROC: 0.8382
