# NB_170124T0838_logistic_regression_scratch

# 0.Goals

- implementation logistic regeression from scratch

# 1.Imports

In [8]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

# 2.Implementation of the class Logistic regression

In [6]:
class LogisticRegressionScratch(object):
    def __init__(self, learning_rate: float = 0.001, epoch: int = 1000):
        self.lr = learning_rate
        self.epoch = epoch
        self.W = None
        self.b = None

    def fit(self, X: np.ndarray, Y: np.ndarray):
        # intialize model
        n_samples, n_features = X.shape

        self.W = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epoch):
            # forward
            liner_model = np.dot(X, self.W) + self.b
            Y_hat = 1 / (1 + np.exp(-liner_model))

            # compute gradients backward
            dw = (1 / n_samples) * np.dot(X.T, (Y_hat - Y))
            db = (1 / n_samples) * np.sum(Y_hat - Y)

            # update step
            self.W -= self.lr * dw
            self.b -= self.lr * db

            # print result
            if _ % 10 == 0:
                print(f" iteration: {_}, accuracy: {self.accuracy(X, Y)}")

    def predict(self, X: np.ndarray) -> np.ndarray:
        linear_model = np.dot(X, self.W) + self.b
        Y_hat = 1 / (1 + np.exp(-linear_model))
        Y_hat_cls = [1 if x > 0.5 else 0 for x in Y_hat]
        return np.array(Y_hat_cls)

    def accuracy(self, X, Y):
        accuracy_result = np.sum(Y == self.predict(X)) / len(Y)
        return accuracy_result

# 3.Load data

In [11]:
dataset_breast_cancer = datasets.load_breast_cancer()
dataset_breast_cancer

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [13]:
X, Y = dataset_breast_cancer["data"], dataset_breast_cancer["target"]
X, Y

(array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=123
)

# 4.Use the model

- fit the model

In [22]:
regressor = LogisticRegressionScratch(epoch=300)
regressor.fit(X_train, Y_train)

 iteration: 0, accuracy: 0.3758241758241758
 iteration: 10, accuracy: 0.654945054945055
 iteration: 20, accuracy: 0.3758241758241758
 iteration: 30, accuracy: 0.9076923076923077
 iteration: 40, accuracy: 0.7582417582417582
 iteration: 50, accuracy: 0.8307692307692308
 iteration: 60, accuracy: 0.6659340659340659
 iteration: 70, accuracy: 0.7846153846153846
 iteration: 80, accuracy: 0.6659340659340659
 iteration: 90, accuracy: 0.7296703296703296
 iteration: 100, accuracy: 0.7098901098901099
 iteration: 110, accuracy: 0.7098901098901099
 iteration: 120, accuracy: 0.7142857142857143
 iteration: 130, accuracy: 0.7252747252747253
 iteration: 140, accuracy: 0.7318681318681318
 iteration: 150, accuracy: 0.7934065934065934
 iteration: 160, accuracy: 0.8505494505494505
 iteration: 170, accuracy: 0.8505494505494505
 iteration: 180, accuracy: 0.8527472527472527
 iteration: 190, accuracy: 0.8527472527472527
 iteration: 200, accuracy: 0.8527472527472527
 iteration: 210, accuracy: 0.9054945054945055


  Y_hat = 1 / (1 + np.exp(-liner_model))
  Y_hat = 1 / (1 + np.exp(-linear_model))


- predict with the model

In [24]:
Y_hat = regressor.predict(X_test)
Y_hat

  Y_hat = 1 / (1 + np.exp(-linear_model))


array([1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0])

In [26]:
print("LR classification accuracy:", regressor.accuracy(X_test, Y_test))

LR classification accuracy: 0.8333333333333334


  Y_hat = 1 / (1 + np.exp(-linear_model))


# 5.For practice

In [None]:
# imports
# class logistic regression
# import data
# fit
# predict

In [34]:
import numpy as np
import torch

In [35]:
class LogisticRegressionScratch01(object):
    def __init__(self, lr: float = 0.001, epoch: int = 1000):
        self.lr = lr
        self.epoch = epoch
        self.W = None
        self.b = None

    def fit(self, X: np.ndarray, Y: np.ndarray):
        n_samples, n_features = X.shape

        # predict
        Y_hat = self.predict(X)
        # compute gradients
        dw = (1 / n_samples) * np.dot(X.T, (Y_hat - Y))
        db = (1 / n_samples) * np.sum(Y_hat - Y)
        # update weights
        self.W -= self.lr * dw
        self.b -= self.b * db

    def predict(self, X: np.ndarray) -> np.ndarray:
        liner_argument = np.dot(self.W, X) + self.b
        predict_result = 1 / (1 + np.exp(-1 * liner_argument))
        return predict_result

    def predict_cls(self, X: np.ndarray) -> np.ndarray:
        Y_hat = self.predict(X)
        Y_hat_cls = np.array([1 if x >= 0.5 else 0 for x in Y_hat])
        return Y_hat_cls

    def accuracy(self, X, Y):
        Y_hat_cls = self.predict_cls(X)
        accuracy_result = np.sum(
            [1 if Y_hat_cls[i] == Y[i] else 0 for i in range(len(Y_hat))]
        ) / len(Y_hat)
        return accuracy_result