# Logistic Regression

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hmatalonga/data-science-bits/blob/master/notebooks/logistic_regression.ipynb)

Logistic Regression algorithm implementation from scratch.

**Acknowledgements:** This code was originally inspired by [MLfromscratch](https://github.com/python-engineer/MLfromscratch) repository.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import warnings

sns.set_theme()
warnings.filterwarnings('ignore')

In [2]:
# Set seed for reproducibility
SEED = 42
np.random.seed(42)

In [3]:
class LogisticRegression:

  def __init__(self, learning_rate=0.001, n_iters=1000):
    self.learning_rate = learning_rate
    self.n_iters = n_iters
    self.weights = None
    self.bias = None

  def fit(self, X, y):
    n_samples, n_features = X.shape

    # init parameters
    self.weights = np.zeros(shape=n_features)
    self.bias = 0

    # gradient descent
    for _ in range(self.n_iters):
      y_pred = np.dot(X, self.weights) + self.bias
      y_pred = self._sigmoid(y_pred)
      
      # compute gradients
      dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
      db = (1 / n_samples) * np.sum(y_pred - y)

      # update parameters
      self.weights -= self.learning_rate * dw
      self.bias -= self.learning_rate * db

    return self

  def predict(self, X):
    y_pred = np.dot(X, self.weights) + self.bias
    y_pred = self._sigmoid(y_pred)

    return np.where(y_pred > 0.5, 1, 0)

  def _sigmoid(self, x):
    return 1.0 / (1 + np.exp(-x))

In [4]:
X, y = load_breast_cancer(return_X_y=True)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=SEED)

In [6]:
log_reg = LogisticRegression(learning_rate=0.01, n_iters=1000).fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93        43
           1       1.00      0.92      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.96      0.95       114
weighted avg       0.95      0.95      0.95       114

