# Logistic Regression from Scratch

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

In [6]:
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
    
    def sigmoid(self, z):
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features) # initialize weights to zeros
        self.bias = 0 # initialize bias to 0

        # perform gradient descent for specified no of iterations
        for _ in range(self.n_iters):
            model = np.dot(X, self.weights) + self.bias # linear model prediction // dot product of features and weights+bias
            y_pred = self.sigmoid(model) # apply sigmoid to get probabilities

            # compute gradient for weights and biases
            dw = (1/n_samples) * np.dot(X.T, (y_pred-y)) # gradient wrt weight
            db = (1/n_samples) * np.sum(y_pred-y) # gradient wrt bias

            # update weights and bias using gradient
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        # make predictions using linear model and sigmoid function
        model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(model)

        # convert probabilities to binary class labels
        return [1 if i > 0.5 else 0 for i in y_pred]

In [7]:
# load dataset
data = load_breast_cancer()
X = data.data
y = data.target

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# train log reg from scratch
log_reg_scratch = LogisticRegressionScratch(learning_rate=0.01, n_iters=1000)
log_reg_scratch.fit(X_train, y_train)  # train the model
y_pred_scratch = log_reg_scratch.predict(X_test)  # make predictions

# calculate accuracy for scratch implementation
accuracy_scratch = accuracy_score(y_test, y_pred_scratch)
print(accuracy_scratch)

0.9473684210526315


In [9]:
# train log reg using sklearn
log_reg_sklearn = SklearnLogisticRegression(max_iter=1000)
log_reg_sklearn.fit(X_train, y_train)
y_pred_sklearn = log_reg_sklearn.predict(X_test)

# calculate accuracy for sklearn implementation
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(accuracy_sklearn)

0.956140350877193


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
