# Logistic Regression From Scratch

In [2]:
from typing import Any

import numpy as np
from numpy.typing import NDArray

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
raw_x, raw_y = make_classification(n_features=10,n_samples=1000, random_state=3442, n_classes=4, n_informative=4)

In [4]:
raw_x.shape, raw_y.shape

((1000, 10), (1000,))

In [5]:
raw_x[:5]

array([[-1.49145922, -1.36105207, -0.39836706, -1.05390468, -0.22661572,
         2.00852865, -1.59656552, -1.19994206,  0.20283687, -1.20928586],
       [ 2.75266804, -2.44971701, -4.00242896, -0.65984341,  0.27017922,
        -0.8008154 , -0.56313084,  1.07736921, -1.43049395,  2.84724508],
       [-1.78716186, -0.96396832,  0.36593077,  0.98423091,  1.3221235 ,
        -0.56834005, -1.67309031,  0.13677196,  2.32924393, -1.41223455],
       [-0.97497049,  1.16181039,  0.21046438,  0.88443167,  0.78531221,
        -1.2282034 , -1.0789787 ,  1.79818595, -0.96636793,  0.13870262],
       [ 0.67415307,  2.36344234,  2.20219647, -1.21472874,  0.76833011,
         2.04722019,  2.86588265,  0.09576459, -1.62056968, -0.31048037]])

In [6]:
x_train, x_test, y_train, y_test = train_test_split(raw_x, raw_y, random_state=3453, test_size=.2)
(x_train.shape, y_train.shape), (x_test.shape, y_test.shape)

(((800, 10), (800,)), ((200, 10), (200,)))

In [7]:
class LogisticRegression:
    """implement logistic regression from scratch"""

    def __init__(self, learning_rate: float = 0.001, epochs: int = 500, classes: int = 2) -> None:
        """initializing model & hyper parameters"""
        
        self.learning_rate = learning_rate
        self.epochs = epochs  
        self.no_of_classes = classes
        self.bias = None 
        self.weight= None

    def linear_equation(self, x: NDArray[np.float64], w: NDArray[np.float64], b: float | NDArray) -> NDArray:
        """
            equation: z = w . x + b
        """
        if x.shape[1] != w.shape[0]:
            raise ValueError("X and W are mismatched column count")

        return np.dot(x, w) + b
        
    @staticmethod
    def sigmoid(z: NDArray) -> NDArray:
        """
            equation: sigmoid(z) = 1 / 1 + e ^ -z
        """
        result = [1/(1+np.exp(-each)) for each in z]
        return np.array(result)


    @staticmethod
    def softmax(z: NDArray[float]) -> NDArray:
        """
            equation: softmax(z) = e^z / sum(e^z)
        """
        sum_of_exp = sum([np.exp(each) for each in z])
        result = [np.round(np.exp(each)/sum_of_exp, 5) for each in z]
        return np.array(result)

    
    @staticmethod
    def get_dw(y_true: NDArray[int], y_prob: NDArray[float], x: NDArray[float]) -> float:
        """generate partial derivative of loss with respect to weight
        
            Equation:
                dw = 1/n . y_prob_i - y_true_i . x_i
        """
        error = y_prob - y_true
        dw = []
        for each in x.T:
            result = np.mean([np.round(a*b, 3) for a, b in zip(error, each)])
            dw.append(result)
        return np.array(dw)

    @staticmethod
    def get_db(y_true: NDArray[int], y_prob: NDArray[float]) -> float:
        """generate partial derivative of loss with respect to bias"""
        error = y_prob - y_true
        return np.mean(error)
        
    def update_params(self, y_prob: NDArray, y_true: NDArray, x: NDArray) -> tuple[float, float]:
        """
        Returns:
            tuple[float, float] : (weight, bias)
        """
        new_weight = self.weight - (self.learning_rate * self.get_dw(y_prob=y_prob, y_true=y_true, x=x))
        new_bias = self.bias - (self.learning_rate * self.get_db(y_prob=y_prob, y_true=y_true))
        return (new_weight, new_bias)
        

    def fit(self, x: NDArray[np.float64], y: NDArray[np.int16]) -> Any:
        """train the model

        Args:
            x: (ndarray) : Feature matrix with shape (n_samples, n_features)
            y: (ndarray) : Target matrix with shape (n_samples,)

        Return:
            Any

        Raise:
            ValueError : if x and y have mismatched length

        """
        if x.shape[0] != y.shape[0]:
            raise ValueError(f"x and y have mismatched shape x: {x.shape[0]} y:{x.shape[0]}")

        self.weight = np.random.randn(x.shape[1]) * .01 if self.no_of_classes <= 2 else np.random.randn(x.shape[1], self.no_of_classes)
        self.bias = 0 if self.no_of_classes <= 2 else np.zeros(self.no_of_classes)

        # for _ in range(self.epochs):
        #     logits = self.linear_equation(x=x, w=self.weight, b=self.bias)
            
        #     prob = self.sigmoid(logits)

        #     # self.forward_propagation()
        #     # self.backward_propagation()

        #     self.weight, self.bias = self.update_params(x=x, y_prob=prob, y_true=y)
        logits = self.linear_equation(x=x, w=self.weight, b=self.bias)
        prob = self.softmax(logits)
        print("training completed")

    def predict(self, x: np.ndarray) -> Any:
        pred_logits = self.linear_equation(x=x, w=self.weight, b=self.bias)
        pred_prob = self.sigmoid(pred_logits)

        result = list(map(lambda x: 1 if x > .5 else 0, pred_prob))
        return np.array(result)

In [15]:
model = LogisticRegression(classes=4)

p = model.fit(x=x_train, y=y_train)
p[0]

[[1.80e-04 0.00e+00 1.29e-03 3.00e-05]
 [2.00e-05 0.00e+00 4.00e-05 0.00e+00]
 [0.00e+00 1.00e-05 2.00e-05 0.00e+00]
 ...
 [4.30e-04 0.00e+00 6.80e-04 0.00e+00]
 [4.00e-05 0.00e+00 0.00e+00 0.00e+00]
 [0.00e+00 1.00e-05 0.00e+00 0.00e+00]]
--------------------------------------------------
training completed


array([1.80e-04, 0.00e+00, 1.29e-03, 3.00e-05])

In [16]:
model.weight

array([[ 0.9598751 ,  0.08524095,  0.25602445,  0.98297979],
       [ 0.45308351, -0.12973527, -0.82842919,  0.50777622],
       [-1.30475974,  1.89011479,  0.20936792, -0.94777063],
       [ 0.92853219, -0.11910786,  0.8650303 , -1.56942173],
       [ 0.26220022,  0.86355589, -0.17935383, -0.17878888],
       [ 0.82995202, -1.28151344, -2.75796018, -0.23509009],
       [-0.48379702,  1.83144581,  0.13339017,  0.26945298],
       [ 0.79315503, -0.58266278,  1.17098889, -0.04930467],
       [-1.43194209, -0.86334359,  0.45613484, -0.29042935],
       [-0.52867257,  0.46569203,  0.07400278,  0.97001726]])

In [14]:
model.bias

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
y_pred = model.predict(x=x_test)
y_pred

In [None]:
y_test

In [None]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print(f"TN : {tn}\nFP : {fp}\nFN : {fn}\nTP : {tp}")

In [None]:
test_acc = accuracy_score(y_test, y_pred) * 100
print(f"Test Accuracy is {test_acc}")

In [None]:
y_pred_train = model.predict(x_train)
train_acc = accuracy_score(y_train, y_pred_train) * 100
print(f"Train Accuracy is {train_acc}")