In [83]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [84]:
iris = pd.read_csv('iris.csv')
iris = iris[(iris['variety'] == 'Versicolor') | (iris['variety'] == 'Virginica')]
iris['variety_num'] = iris['variety_num'].replace(2, 0)
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,variety,variety_num
50,7.0,3.2,4.7,1.4,Versicolor,1
51,6.4,3.2,4.5,1.5,Versicolor,1
52,6.9,3.1,4.9,1.5,Versicolor,1
53,5.5,2.3,4.0,1.3,Versicolor,1
54,6.5,2.8,4.6,1.5,Versicolor,1
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica,0
146,6.3,2.5,5.0,1.9,Virginica,0
147,6.5,3.0,5.2,2.0,Virginica,0
148,6.2,3.4,5.4,2.3,Virginica,0


In [85]:
X, y = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']], iris['variety_num']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.25)

In [86]:
class MyLogisticRegression:
  def __init__(self, random_state=42):
      self.coef_ = None
      self.bias_ = None
      self.losses_ = None
      self.random_state = random_state
    
  def fit(self, X, y, max_iter=100, learning_rate=0.1):
    np.random.seed(self.random_state)
    self.coef_ = np.random.randn(X.shape[1])
    np.random.seed(self.random_state)
    self.bias_ = np.random.randn(1).item()
    self.losses_ = []
    for i in range(max_iter):
      grad_coefs, grad_bias = self.grad(X, y)
      self.coef_ = np.array(self.coef_ - learning_rate * grad_coefs)
      self.bias_ = np.array(self.bias_ - learning_rate * grad_bias)
      loss = self.bce_loss(X, y)
      self.losses_.append(loss)
    return self
  
  def grad(self, X, y):
    probas = self.predict_proba(X)
    delta = probas - y
    grads = np.mean(X.T * delta, axis=1)
    return grads, np.mean(delta)
  
  def bce_loss(self, X, y):
    probas = self.predict_proba(X)
    filter_ones = y == 1
    loss = - 1 * (np.sum(np.log(probas[filter_ones])) + np.sum(np.log(1 - probas[~filter_ones]))) / len(y)
    return loss

  def predict_proba(self, X):
    return 1 / (1 + np.exp(-(X.dot(self.coef_) + self.bias_))) 

  def predict(self, X):
    probas = self.predict_proba(X)
    return (probas > 0.5).astype(int)

In [79]:
lr = MyLogisticRegression().fit(X_train, y_train)

In [87]:
#На обучаемой выборке
accuracy_score(y_train, lr.predict(X_train))

0.9466666666666667

In [93]:
#На тестовой выборке
accuracy_score(y_test, lr.predict(X_test))

0.88

Смотрим результат логистической регрессии из библиотеки sklearn

In [90]:
logReg = LogisticRegression().fit(X_train, y_train)
accuracy_score(y_train, lr.predict(X_train))

0.9466666666666667

In [91]:
accuracy_score(y_test, lr.predict(X_test))

0.88