In [261]:
import pandas as pd
import re
import numpy as np
from scipy.optimize import minimize
from sklearn.linear_model import LogisticRegression

In [262]:
data = pd.read_csv('iris.data', header = None)

In [263]:
data.columns = ['sepal_length','sepal_width', 'petal_length', 'petal_width', 'class']

In [264]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [265]:
reg_exp = r'(\w+)-(\w+)'
new_columns = data['class'].str.extract(reg_exp)
new_columns.columns = ['iris', 'iris_class']

In [266]:
data = data.join(new_columns)

In [267]:
data = data[data.iris_class != 'setosa']

In [268]:
data = data.reset_index()

In [269]:
def get_class(row):
    if row['iris_class'] == 'versicolor':
        output = 1
    else:
        output = 0
    return output

In [270]:
data['is_versicolor'] = data.apply(get_class, axis = 1)

In [271]:
data.drop(['class', 'iris', 'index', 'iris_class'], axis = 1, inplace = True)

In [272]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,is_versicolor
0,7.0,3.2,4.7,1.4,1
1,6.4,3.2,4.5,1.5,1
2,6.9,3.1,4.9,1.5,1
3,5.5,2.3,4.0,1.3,1
4,6.5,2.8,4.6,1.5,1


$$J = -\sum_{i=1}^{N} y_i\log (h_\theta(x_i)) + (1 - y_i)\log(1 - h_\theta(x_i))$$

In [273]:
def loss(w, matrix, epsilon):
    N = len(matrix)
    summ = 0
    for i in range(N):
        y = matrix[i][-1]
        x = np.array([])
        x = np.append(x, 1)
        for j in range(len(matrix[i])-1):
            x = np.append(x, matrix[i][j])
        h_x = np.dot(w, x.T)
        sigma_x = 1 / (1 + np.e ** (-h_x))
        if sigma_x == 0:
            sigma_x = epsilon
        elif sigma_x == 1:
            sigma_x -= epsilon
        loss_J = y * np.log(sigma_x) + (1 - y) * np.log(1 - sigma_x)
        summ += loss_J
    return summ * (-1)

In [274]:
data_array = data.values

In [275]:
w_init = np.array([1, 1, 1, 1, 1])

In [277]:
loss(w_init, data_array, 10 ** (-5))

907.0000287020378

In [278]:
def get_grad(w, matrix):
    dw0 = dw1 = dw2 = dw3 = dw4 = 0
    for i in range(len(matrix)):
        y = matrix[i][-1]
        x = np.array([])
        x = np.append(x, 1)
        for j in range(len(matrix[i])-1):
            x = np.append(x, matrix[i][j])
        h_x = np.dot(w, x.T)
        sigma_x = 1 / (1 + np.e ** (-h_x))
        
        mult = -y + sigma_x
        
        dw0 += x[0] * mult
        dw1 += x[1] * mult
        dw2 += x[2] * mult
        dw3 += x[3] * mult
        dw4 += x[4] * mult
    dw = np.array([dw0, dw1, dw2, dw3, dw4])
    return dw

In [279]:
w_init = np.array([1, 1, 1, 1, 1])
get_grad(w_init, data_array)

array([ 49.99997102, 329.39984385, 148.69992836, 277.59989154,
       101.29996629])

In [None]:
Обучение логистической регрессии методом градиентного спуска

In [283]:
w_init = np.array([1, 1, 1, 1, 1])

learning_rate = 0.1

for i in range(1000):
    # Calculate gradient
    grad = get_grad(w_init, data_array)
    
    # Update w with gradient
    w_init = w_init - learning_rate * grad

In [282]:
loss(w_init, data_array, 10 ** (-5))

48.56415792122623

In [284]:
data.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'is_versicolor'],
      dtype='object')

In [285]:
def log_reg(row):
    x_1 = row['sepal_length']
    x_2 = row['sepal_width']   
    x_3 = row['petal_length']   
    x_4 = row['petal_width'] 
    X = np.array([1, x_1, x_2, x_3, x_4])
    h_x = np.dot(w_init, X.T)
    sigma_x = 1 / (1 + np.e ** (-h_x))
    if sigma_x >= 0.5:
        output = 1
    else:
        output = 0
    return output

In [286]:
data['log_reg_grad'] = data.apply(log_reg, axis = 1)

In [289]:
data[data['is_versicolor'] != data['log_reg_grad']]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,is_versicolor,log_reg_grad
33,6.0,2.7,5.1,1.6,1,0
83,6.3,2.8,5.1,1.5,0,1


Получается, что алгоритм ошибся в 2 случаях из 100.

Обучение логистической регрессии методом nesterov momentum

In [319]:
w_init = np.array([1, 1, 1, 1, 1])
loss(w_init, data_array, 10 ** (-5))

907.0000287020378

In [320]:
w_init = np.array([1, 1, 1, 1, 1])

learning_rate = 0.05
gamma = 0.9
v_t_m1 = np.array([0.01, 0.01, 0.01, 0.01, 0.01])

for i in range(1000):
    w_add = w_init - gamma * v_t_m1
    v_t = gamma * v_t_m1 + learning_rate * get_grad(w_add, data_array)
    w_init = w_init - v_t
    v_t_m1 = v_t

In [321]:
loss(w_init, data_array, 10 ** (-5))

27.699595833393822

In [322]:
data['log_reg_nesterov'] = data.apply(log_reg, axis = 1)

In [323]:
data[data['is_versicolor'] != data['log_reg_nesterov']]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,is_versicolor,log_reg_grad,log_reg_nesterov
33,6.0,2.7,5.1,1.6,1,0,0
83,6.3,2.8,5.1,1.5,0,1,1


И здесь алгоритм ошибается на тех же примерах.

Обучение логистической регрессии методом rmsprop

In [324]:
w_init = np.array([1, 1, 1, 1, 1])
loss(w_init, data_array, 10 ** (-5))

907.0000287020378

In [333]:
w_init = np.array([1, 1, 1, 1, 1])

learning_rate = 0.1
gamma = 0.9
xi = 10 ** (-8)

E_g2_t_m1 = np.array([0.01, 0.01, 0.01, 0.01, 0.01])

for i in range(1000):
    g2_t = get_grad(w_init, data_array) * get_grad(w_init, data_array)
    E_g2_t = gamma * E_g2_t_m1 + (1 - gamma) * g2_t
    w_init = w_init - learning_rate * (1 / (E_g2_t + xi) ** (1/2)) * get_grad(w_init, data_array)
    
    E_g2_t_m1 = E_g2_t

In [334]:
loss(w_init, data_array, 10 ** (-5))

9.720618441647124

In [335]:
data['log_rmsprop'] = data.apply(log_reg, axis = 1)

In [336]:
data[data['is_versicolor'] != data['log_rmsprop']]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,is_versicolor,log_reg_grad,log_reg_nesterov,log_rmsprop
33,6.0,2.7,5.1,1.6,1,0,0,0
79,7.2,3.0,5.8,1.6,0,0,0,1
83,6.3,2.8,5.1,1.5,0,1,1,1


In [None]:
Здесь к уже известным случаям (33 и 83) добавляется ещё один ошибочно распознанный случай -- 79.

Проверка полученных результатов с помощью встроенной функции
логистической регрессии

In [175]:
X = data
y = data['is_versicolor']
del X['is_versicolor']
X.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,7.0,3.2,4.7,1.4
1,6.4,3.2,4.5,1.5
2,6.9,3.1,4.9,1.5
3,5.5,2.3,4.0,1.3
4,6.5,2.8,4.6,1.5


In [177]:
model = LogisticRegression()

In [178]:
model.fit(X, y)

LogisticRegression()

In [186]:
model.coef_

array([[ 0.39443136,  0.51327025, -2.93075043, -2.4170433 ]])

In [290]:
predictions = model.predict_proba(X)

In [190]:
model.score(X, y)

0.96

Итак, получены следующие значения точности
  * градиентный спуск: 98 %;
  * Nesterov momentum: 98 %;
  * rmsprop: 97 %;
  * LogisticRegression: 96 %.