# Домашнее задание по теме "Функции потерь и оптимизация"

<pre>
ФИО: Никифоров Владимир
</pre>

* Прочитать про методы оптимизации для нейронных сетей https://habr.com/post/318970/
* Реализовать самостоятельно логистическую регрессию
    * Обучить ее методом градиентного спуска
    * Методом nesterov momentum
    * Методом rmsprop
* В качестве dataset'а взять Iris, оставив 2 класса:
    * Iris Versicolor
    * Iris Virginica

In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
from jupyterthemes import jtplot
jtplot.style()

In [2]:
iris = datasets.load_iris()
print(iris.target_names)

['setosa' 'versicolor' 'virginica']


In [3]:
# Сделаем в виде пандас-фрейма
df = pd.concat([pd.DataFrame(data=iris.data,columns=['c1','c2','c3','c4']),pd.DataFrame(data=iris.target,columns=['target'])],axis=1)
# Оставим лишь нужные нам классы Versicolor и Virginica, т.е. где target != 0
df = df[df['target']!=0]
# Для бинарной классификации превратим классы 1 и 2 в классы 0 и 1
df['target'] = df['target'] - 1
df.head()

Unnamed: 0,c1,c2,c3,c4,target
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0


In [4]:
X, Y = df['c1'].values, df['target'].values

In [5]:
X, Y

(array([7. , 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. ,
        6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6,
        6.8, 6.7, 6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6,
        5.5, 5.5, 6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8,
        7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8,
        6.4, 6.5, 7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1,
        6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7,
        6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))

In [6]:
EPOCHS = 200
LEARNING_RATE = 0.01

Наш sigmoid:

$${\displaystyle \sigma (x)={\frac {1}{1+e^{-x}}}.}$$

In [7]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

Наш loss = cost_function:

$$J = -\sum_{i=1}^{N} y_i\log (h_\theta(x_i)) + (1 - y_i)\log(1 - h_\theta(x_i))$$

In [8]:
def cost_function(Y_label, Y_pred):
    return - np.sum((Y_label * np.log(Y_pred)) + ((1. - Y_label) * np.log(1. - Y_pred)))

In [9]:
def der_theta0(X, y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += sigmoid((theta0 + theta1*X[i] - y[i]))
    return total_cost / (len(X))    

In [10]:
def der_theta1(X, y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += sigmoid((theta0 + theta1*X[i] - y[i]) * X[i])
    return total_cost / (len(X))  

In [11]:
theta0 = 1
theta1 = 1
for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    theta0 = theta0 - LEARNING_RATE * dt0
    theta1 -= LEARNING_RATE * dt1
    
    print("t0:", theta0, "t1:", theta1, "cost:", cost_function(Y, sigmoid(theta0 + theta1*X)))

t0: 0.9900138491267563 t1: 0.9900000000000038 cost: 343.4246778235372
t0: 0.9800287000787925 t1: 0.9800000000000089 cost: 339.96396280217334
t0: 0.9700446257828749 t1: 0.9700000000000156 cost: 336.5037701297334
t0: 0.9600617044959436 t1: 0.9600000000000246 cost: 333.04413749570085
t0: 0.950080020194522 t1: 0.9500000000000367 cost: 329.5851053215628
t0: 0.9400996629922982 t1: 0.9400000000000528 cost: 326.12671695902765
t0: 0.9301207295878589 t1: 0.9300000000000743 cost: 322.6690189026184
t0: 0.9201433237446799 t1: 0.9200000000001031 cost: 319.21206101752057
t0: 0.9101675568056156 t1: 0.9100000000001415 cost: 315.755896783835
t0: 0.9001935482442679 t1: 0.9000000000001926 cost: 312.3005835583707
t0: 0.8902214262557658 t1: 0.890000000000261 cost: 308.8461828551859
t0: 0.8802513283896378 t1: 0.8800000000003524 cost: 305.39276064619366
t0: 0.8702834022276218 t1: 0.8700000000004744 cost: 301.94038768320513
t0: 0.8603178061094214 t1: 0.8600000000006374 cost: 298.48913984289834
t0: 0.8503547099

In [12]:
# Минимум при t0: 0.06673209535344779 t1: 0.001641070705943447 cost: 69.36209373947258
theta0 = 0.06673209535344779
theta1 = 0.001641070705943447

Y_pred = sigmoid(theta0 + theta1*X)
Y_pred = [0 if y < 0.5 else 1 for y in Y_pred]

In [13]:
# Полученная точность так себе :)
accuracy_score(y_true=Y, y_pred=Y_pred)

0.5