In [1]:
# Imports

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score 
from sklearn.linear_model import LogisticRegression
from scipy.special import expit



In [2]:
# Load input data and labels

dataset = datasets.load_breast_cancer()

x, y = dataset.data, dataset.target

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [3]:
# Helper function to evaluate model performance
def evaluate(y_true, y_pred):
    print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
    print(f"Precision: {precision_score(y_true, y_pred)}")
    print(f"Recall: {recall_score(y_true, y_pred)}")
    print(f"F1 Score: {f1_score(y_true, y_pred)}")
    print(f"Confusion Matrix: \n{confusion_matrix(y_true, y_pred)}")

In [9]:
# Implement Logistic Regression using sklearn

regressor = LogisticRegression(max_iter=5000)
regressor.fit(X_train, Y_train)

# Predict
predictions = regressor.predict(X_test)

# Evaluate
evaluate(Y_test, predictions)

Accuracy: 0.956140350877193
Precision: 0.9459459459459459
Recall: 0.9859154929577465
F1 Score: 0.9655172413793103
Confusion Matrix: 
[[39  4]
 [ 1 70]]


In [19]:
# Implementing Logistic Regressor class from scratch

class Logistic_regressor():

    def __init__(self, n_iter=5000, learning_rate=0.01, l2_reg = 1e-5):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.l2_reg = l2_reg
        self.weights = None
        self.bias = None
        self.losses = []

    def sigmoid(self, x):
        # return 1 / (1 + np.exp(-x))
        return expit(x)

    def binary_cross_entropy_loss(self, y_true, y_pred, weights, l2_reg):
        epsilon = 1e-10  

        bce_loss = -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

        l2_term = l2_reg * np.sum(weights ** 2) / len(y_true)  

        total_loss = bce_loss + l2_term

        return total_loss
    
    def fit(self, X, Y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        for _ in range(self.n_iter):
            A = self.sigmoid(np.dot(X, self.weights) + self.bias)
            self.losses.append(self.binary_cross_entropy_loss(Y, A, self.weights, self.l2_reg))
            dz = (A - Y) / (A * (1 - A) + 1e-10) # Derivative of loss function
            dw = (1 / n_samples) * (np.dot(X.T, dz))
            db = (1 / n_samples) * np.sum(dz)
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        threshold = 0.5
        y_hat = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(y_hat)
        y_predicted_cls = [1 if i > threshold else 0 for i in y_predicted]
        return np.asarray(y_predicted_cls)


In [20]:
my_regressor = Logistic_regressor(n_iter=5000, learning_rate=0.01, l2_reg=1e-5)
my_regressor.fit(X_train, Y_train)
my_predictions = my_regressor.predict(X_test)

# Evaluate
evaluate(Y_test, my_predictions)

Accuracy: 0.9649122807017544
Precision: 0.958904109589041
Recall: 0.9859154929577465
F1 Score: 0.9722222222222222
Confusion Matrix: 
[[40  3]
 [ 1 70]]
