# Logistic Regression

In [1]:
from numpy.typing import NDArray
import numpy as np

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score as auc

In [3]:
class LogisticRegression:
    def __init__(self):
        self.w = None
        self.b = None

    def _sigmoid(self, x):
        val = 1 / (1 + np.exp(-x))
        return np.clip(val, 1e-6, 1e6)

    def fit(
        self, x: NDArray, y: NDArray, lr=0.001, num_round=10, l1=0.0, l2=0.0
    ) -> None:
        """Fit a logistic regression model

        Parameters
        ----------
        x : NDArray
            training data
        y : NDArray
            training label
        lr : float, optional
            learning rate, by default 0.01
        num_round : int, optional
            number of rounds of training
        l1 : float, optional
            l1 regularization weight, by default 0.0
        l2 : float, optional
            l2 regularization weight, by default 0.0
        """
        w = np.random.rand(x.shape[1] + 1, 1)
        x = np.concatenate([x, np.ones((x.shape[0], 1))], axis=1)
        
        tmp = self._sigmoid(x @ w) # * (n, 1)
        gradient = - np.sum(x * (y - tmp), axis=0).reshape(-1,1)

        for i in range(num_round):
            w = w - lr * (gradient + l2 * w + l1 * np.sign(w))

        self.w = w[:-1, :]
        self.b = w[-1:, :]

    def predict(self, x: NDArray) -> NDArray:
        """Use logistic regression to predict

        Parameters
        ----------
        x : NDArray
            testing data

        Returns
        -------
        NDArray
            predicted p(y=1)
        """
        return self._sigmoid(x @ self.w + self.b)

In [4]:
x, y = load_breast_cancer(return_X_y=True)

In [5]:
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x[:-25, :])
y_train = y[:-25].reshape(-1,1)

x_test = scaler.transform(x[-25:, :])
y_test = y[-25:].reshape(-1,1)

In [9]:
lr = LogisticRegression()
lr.fit(x_train, y_train, l1=0, l2=0.1)

In [10]:
auc(y_train, lr.predict(x_train))

0.9790888722927558

In [11]:
y_pred = lr.predict(x_test)
auc(y_test, y_pred)

1.0