In [10]:
import pandas as pd
import numpy as np

### Создадим свою логистическую регрессию

In [80]:
from sklearn.base import BaseEstimator, RegressorMixin
class MyLogisticRegression(BaseEstimator, RegressorMixin):
    
    def __init__(self, fit_intercept = True):
        self.fit_intercept = fit_intercept
        self.threshold = 0.5
        
    @staticmethod
    def default_log_loss(y, p):
        # обрежем значения, чтобы предотвратить log(0)
        eps = 1e-15
        p = np.clip(p, eps, 1 - eps)
        return -1 * np.mean(y * np.log(p) + (1 - y)* np.log(1 - p))
        
    def fit(self, X, y, learning_rate = 1e-4, tol = 1e-7, max_iter = 10000, mu = 0.9, loss_func = default_log_loss):
        if loss_func is None:
            loss_func = default_log_loss
        
        n_objects, self.n_features = X.shape
        self.coef = np.zeros(self.n_features)
        self.intercept = 0
        
        def loss_difference(curr_loss, previous_loss):
            return abs(curr_loss - previous_loss) / max(1, previous_loss)
        self.loss = loss_func(y, self.predict_proba(X))
        self.last_loss = 1e6
        
        v_coef = np.zeros(self.n_features)
        v_intercept = 0
        
        while loss_difference(self.loss, self.last_loss) > tol and max_iter>0:
            coef_lookahead = self.coef + mu * v_coef
            intercept_lookahead = self.intercept + mu * v_intercept

            predictions = self.predict_proba(X, coef_lookahead, intercept_lookahead)
            dw = (1.0 / n_objects) * (X.T @ (predictions - y))
            db = (1.0 / n_objects) * np.sum(predictions - y)

            v_coef = mu * v_coef - learning_rate * dw
            v_intercept = mu * v_intercept - learning_rate * db

            self.coef += v_coef
            self.intercept += v_intercept

            self.last_loss = self.loss
            predictions_proba = self.predict_proba(X)
            self.loss = loss_func(y, predictions_proba)
            max_iter -= 1
        return self

    def predict_proba(self, X, coef = None, intercept = None):
        def sigmoid(logit):
            return 1.0 / (1.0 + np.exp(-logit))
        if coef is None:
            coef = self.coef
        if intercept is None:
            intercept = self.intercept
        logit = X @ coef + intercept
        sigm_output = sigmoid(logit)
        return sigm_output
        
    def predict(self, X):
        return (self.predict_proba(X) >= self.threshold).astype(int)

## Проверим качество работы логистической регрессии

In [81]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

### Собственная реализация

In [82]:
from sklearn.model_selection import cross_val_score
my_log_reg = MyLogisticRegression()
print(f'Accuracy: {np.mean(cross_val_score(my_log_reg, X, y, cv = 7, scoring = 'accuracy'))}')

Accuracy: 0.9174517141996815


### Встроенная в sklearn

In [83]:
from sklearn.model_selection import cross_val_score
log_reg = LogisticRegression(max_iter = 10000)
print(f'Accuracy: {np.mean(cross_val_score(log_reg, X, y, cv = 7, scoring = 'accuracy'))}')

Accuracy: 0.9561018626059276
