## Функции потерь и оптимизация

Прочитать про методы оптимизации для нейронных сетей https://habr.com/post/318970/

Реализовать самостоятельно логистическую регрессию

Обучить ее методом градиентного спуска

Методом nesterov momentum

Методом rmsprop

В качестве dataset’а взять Iris, оставив 2 класса:
Iris Versicolor
Iris Virginica

In [61]:
import pandas as pd
import numpy as np
from sklearn import datasets

In [134]:
iris = datasets.load_iris()
print(iris.DESCR) # смотрим что представялет из себя датасет
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.info() # нет ли пустых
df.head(20)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [135]:
# смотрим что из себя предствляет целевая переменная, отбираем Iris Versicolor Iris Virginica
print(iris.target_names)
df['class'] = iris.target
df = df [ df['class'].isin([1,2]) ]


['setosa' 'versicolor' 'virginica']


In [136]:
# подготовим данные для обучения моделям
y = df['class'] - 1
x = df.drop(['class'], axis=1)

In [137]:
theta = np.random.normal(size=(5))
theta

array([-0.25300401, -1.00214626, -0.99247957,  0.55589981,  1.27497644])

In [138]:
def predict(theta, x):
    predict = theta[0] + theta[1] * x.values[:, 0] + theta[2] * x.values[:, 1] + theta[3] * x.values[:, 2] + theta[4] * x.values[:, 3]
    sigm = 1. / (1 + np.exp(-predict))
    return sigm

In [139]:
# Uрадиентный спуск
lr = 0.01
for _ in range(1000):
    sigm = predict(theta, x)
    theta[0] -= lr * np.sum(sigm - y)/len(sigm)
    theta[1] -= lr * np.sum((sigm - y) * x.values[:, 0])/len(sigm)
    theta[2] -= lr * np.sum((sigm - y) * x.values[:, 1])/len(sigm)
    theta[3] -= lr * np.sum((sigm - y) * x.values[:, 2])/len(sigm)
    theta[4] -= lr * np.sum((sigm - y) * x.values[:, 3])/len(sigm)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
print(theta)

[-0.41048097 -1.02866829 -1.12391919  1.43331181  1.88542922]


In [140]:
# Nesterov momentum
lr = 0.01
vel_pred = np.zeros(5)
vel = np.zeros(5)
gamma = 0.975
for _ in range(1000):
    sigm = predict(theta, x)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
    sigm = predict(theta - gamma * vel_pred, x)
    vel = np.dot(gamma, vel_pred) + lr * np.sum(np.dot((sigm - y),x))/len(sigm)
    theta -= vel
    vel_pred = vel
print(theta)

[-0.40983144 -1.02801876 -1.12326966  1.43396134  1.88607875]


In [141]:
# Rmsprop 
lr = 0.01
e_sq_grad = np.zeros(5)
grad = np.zeros(5)
rmsp_loss_hist = []
gamma = 0.975
eps = 0.00000001
for _ in range(1000):
    sigm = predict(theta, x)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
    grad = np.sum(np.dot((sigm - y), x))/len(sigm)
    e_sq_grad = gamma * e_sq_grad + (1 - gamma)  * grad ** 2    
    theta -= lr * grad / np.sqrt(e_sq_grad + eps)
print(theta)

[-0.41479852 -1.03298585 -1.12823674  1.42899426  1.88111167]


Итог. Во всех трех моделях коэффициенты рассчитаны почти одинаково