In [5]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris.data  
y = iris.target

y = y[y>=1]
ind = np.where(y>=1)
X = X[ind]

scaler = StandardScaler()
X = scaler.fit_transform(X)

X = np.c_[np.ones(len(X)),X]

x_min, x_max = X.min() - 0.5, X.max() + 0.5
y_min, y_max = y.min() - 0.5, y.max() + 0.5

h = 0.01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
x_t = np.c_[xx.ravel(),yy.ravel()]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def logloss(y, y_proba):
    logloss_2 = np.sum(np.log(y_proba[y == 2] + 1e-30))
    logloss_1 = np.sum(np.log(1 - y_proba[y == 1] + 1e-30))
    logloss_total = - (logloss_1 + logloss_2) / len(y)

def gr_logloss(X, W, y):
    y_proba = sigmoid(X @ W)
    grad = X.T @ (y_proba - y)
    return grad

def visualize(W):
    x_t_ones = np.c_[np.ones(x_t.shape[0]), x_t]
    y_proba = sigmoid(x_t_ones @ W)
    Z = np.where(y_proba >= 0.5, 2, 1)
    
    Z = Z.reshape(xx.shape)

    plt.figure(1, figsize=(8,8))
    plt.pcolormesh(xx,yy,Z,cmap=plt.cm.Paired)

    plt.scatter(X[y == 1][:, 1], X[y == 1][:, 2], edgecolors='k')
    plt.scatter(X[y == 2][:, 1], X[y == 2][:, 2], edgecolors='k')

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks()
    plt.show()

eps = 0.0001
learn_rate = 0.01
n = 250

np.random.seed(8)
W = np.random.randn(X.shape[1])
next_W = W

for i in range(n):
    cur_W = next_W
    
    next_W = cur_W - learn_rate * gr_logloss(X, W, y)
    
    if np.linalg.norm(cur_W - next_W) <= eps:
        break

    print(np.linalg.norm(cur_W - next_W))

    if i % 70 == 0:
        print (f"Итерация: {i}")
        y_proba = sigmoid(X @ next_W)
        y_class = np.where(y_proba >= 0.5, 2, 1)
        accuracy = (y_class == y).sum()/len(y)
        print(f"Logloss {logloss(y, y_proba)}")
        print(f"Accuracy {accuracy}")
        print("__________________________________________")

        # visualize(next_W)

1.5951866970991586
Итерация: 0
Logloss None
Accuracy 0.63
__________________________________________
1.5951866970991588
1.5951866970991588
1.5951866970991588
1.5951866970991584
1.5951866970991586
1.5951866970991586
1.5951866970991586
1.5951866970991586
1.595186697099159
1.595186697099159
1.5951866970991595
1.595186697099159
1.5951866970991586
1.5951866970991586
1.5951866970991586
1.5951866970991586
1.5951866970991575
1.5951866970991575
1.5951866970991575
1.5951866970991575
1.5951866970991575
1.5951866970991575
1.5951866970991568
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.5951866970991584
1.595186697099158
1.595186697099158
1.595186697099158
1.595186697099158
1.595186697099158
1.5951866970991546
1.5951866970991546
1.5951866970991546
1.595186697099

In [159]:
cur_W = next_W
print(cur_W)

[247.95109464 155.09822061 -92.91377843 187.33792273 188.54239691]


In [162]:
y_proba = sigmoid(X @ W)
print(y_proba[:5])

[0.83418819 0.9650226  0.90501757 0.90886348 0.73806801]


In [163]:
grad = X.T @ (y_proba - y)
print(grad)

[-97.19995683 -60.3948776   35.67325809 -74.00951853 -74.8387798 ]


In [164]:
next_W = cur_W - learn_rate * grad
print(next_W)

[248.92309421 155.70216938 -93.27051101 188.07801791 189.29078471]


In [165]:
y_proba = sigmoid(X @ next_W)
y_class = np.where(y_proba >= 0.5, 2, 1)
accuracy = (y_class == y).sum()/len(y)
print(accuracy)

1.0
