In [1]:
from numpy.typing import NDArray
from typing import Any


import pandas as pd
import numpy as np
from sklearn.datasets import make_classification

In [2]:
raw_x, raw_y = make_classification(n_features=10, random_state=3442)

In [3]:
raw_x

array([[ 3.89409590e+00, -2.29426788e-01,  7.24439817e-01,
         2.06125577e+00, -5.19063405e+00,  2.76266395e+00,
         5.10975745e-01,  1.13617223e+00, -8.30594598e-01,
         1.73716220e+00],
       [ 9.88300777e-01, -6.17885394e-01, -6.79869860e-01,
         5.43060295e-01, -1.29439127e+00,  6.61617037e-01,
         5.64940740e-01,  2.63358160e-02,  2.65520676e-01,
         9.14965811e-02],
       [ 8.81405632e-01, -3.23072333e-01, -1.21999907e+00,
         1.55099730e+00,  7.50019880e-02, -1.52633336e+00,
         1.76545314e+00,  5.03409995e-01, -1.13444713e+00,
        -3.67738507e-01],
       [-5.30541766e-01, -1.57287038e+00,  1.26266661e-01,
        -3.56053101e-01,  6.20488433e-01, -2.27144053e-01,
        -2.35088298e+00, -1.55865986e+00,  2.34388853e-01,
        -6.37722247e-01],
       [-2.70509833e+00, -7.51492046e-02, -1.02661646e-01,
        -2.43413661e+00,  2.45062002e+00,  6.94362663e-02,
         8.47877472e-01,  6.73212808e-01, -1.23729699e+00,
        -6.

In [4]:
X = pd.DataFrame(raw_x)
Y = pd.DataFrame(raw_y)

In [5]:
X.shape, Y.shape

((100, 10), (100, 1))

In [6]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,3.894096,-0.229427,0.72444,2.061256,-5.190634,2.762664,0.510976,1.136172,-0.830595,1.737162
1,0.988301,-0.617885,-0.67987,0.54306,-1.294391,0.661617,0.564941,0.026336,0.265521,0.091497
2,0.881406,-0.323072,-1.219999,1.550997,0.075002,-1.526333,1.765453,0.50341,-1.134447,-0.367739
3,-0.530542,-1.57287,0.126267,-0.356053,0.620488,-0.227144,-2.350883,-1.55866,0.234389,-0.637722
4,-2.705098,-0.075149,-0.102662,-2.434137,2.45062,0.069436,0.847877,0.673213,-1.237297,-0.640184


In [7]:
class LogisticRegression:
    """implement logistic regression from scratch"""

    def __init__(self, learning_rate: float = 0.001, epochs: int = 500) -> None:
        """initializing model & hyper parameters"""
        
        self.learning_rate = learning_rate
        self.epochs = epochs  
        self.bias = None 
        self.weight= None

    @staticmethod
    def linear_equation(x: NDArray[np.float64], w: NDArray[np.float64], b: float) -> float:
        if x.shape[1] != w.shape[0]:
            raise ValueError("X and W are mismatched column count")
        logits = np.dot(x, w) + b
        return  logits.reshape(x.shape[0], -1)
        
    @staticmethod
    def sigmoid(z: float) -> float:
        return (1/(1+np.exp(-z)))

    @staticmethod
    def bce(y_true: NDArray, y_pred: NDArray) -> float:
        summation = 0
        for y_t, y_p in zip(y_true, y_pred):
            # log_loss = (y * log(y_cap)) + (1-y) * (1-log(Y_cap)) 
            summation += (y_t * y_p) + ((1 - y_t) * (1 - np.log(y_p)))
            
        return np.mean(summation)

    def update_params(self, loss: float) -> tuple[float, float]:
        """
        Returns:
            tuple[float, float] : (weight, bias)
        """

        

    def fit(self, x: NDArray[np.float64], y: NDArray[np.int16]) -> Any:
        """train the model

        Args:
            x: (ndarray) : Feature matrix with shape (n_samples, n_features)
            y: (ndarray) : Target matrix with shape (n_samples,)

        Return:
            Any

        Raise:
            ValueError : if x and y have mismatched length

        """
        if x.shape[0] != y.shape[0]:
            raise ValueError(f"x and y have mismatched shape x: {x.shape[0]} y:{x.shape[0]}")

        self.weight = np.random.randn(x.shape[1]) * .01
        self.bias = 0

        for _ in range(self.epochs):
            logits = self.linear_equation(x=x, w=self.weight, b=self.bias)
            prob = [self.sigmoid(each) for each in logits]
            
        print(f"Binary loss : {self.bce(y, prob)}") 
        return prob

    def predict(x: np.ndarray, y: np.ndarray) -> Any:
        raise NotImplementedError
        

In [8]:
test = LogisticRegression()

In [9]:
test.fit(x=raw_x, y=raw_y)

Binary loss : 108.412189387927


[array([0.48733201]),
 array([0.49993994]),
 array([0.5000872]),
 array([0.49816329]),
 array([0.49841591]),
 array([0.49744082]),
 array([0.50566729]),
 array([0.4890173]),
 array([0.4982802]),
 array([0.49968759]),
 array([0.49762729]),
 array([0.50330385]),
 array([0.49593841]),
 array([0.4921248]),
 array([0.4994989]),
 array([0.50368093]),
 array([0.50326919]),
 array([0.50051852]),
 array([0.50343459]),
 array([0.50053639]),
 array([0.50031542]),
 array([0.50476568]),
 array([0.50345]),
 array([0.49897651]),
 array([0.49572363]),
 array([0.49998169]),
 array([0.49630966]),
 array([0.50045031]),
 array([0.50524527]),
 array([0.50172775]),
 array([0.50101266]),
 array([0.49607708]),
 array([0.49948343]),
 array([0.50349183]),
 array([0.50065855]),
 array([0.49645487]),
 array([0.50416717]),
 array([0.49604998]),
 array([0.5038229]),
 array([0.49366845]),
 array([0.49397662]),
 array([0.49846627]),
 array([0.50766476]),
 array([0.4985674]),
 array([0.49449822]),
 array([0.50367124])