In [11]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import math

In [2]:
N = 100
X = np.random.uniform(low=0, high=100, size=N)
Y = 2*X + 1 + np.random.normal(scale=5, size=N)

In [3]:
EPOCHS = 20
LEARNING_RATE = 0.0001

def cost_function(X, y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (theta0 + theta1*X[i] - y[i]) ** 2
    return total_cost / (2 * len(X))

def der_theta0(X, y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (theta0 + theta1*X[i] - y[i])
    return total_cost / (len(X))   

def der_theta1(X, y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (theta0 + theta1*X[i] - y[i]) * X[i]
    return total_cost / (len(X))  

In [4]:
theta0 = 1
theta1 = 1
for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    theta0 = theta0 - LEARNING_RATE * dt0
    theta1 -= LEARNING_RATE * dt1
    
    print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

t0: 1.0044180480443585 t1: 1.2692598165353348 cost: 740.9557321317149
t0: 1.0076453450616598 t1: 1.4664952244838436 cost: 403.8768449778698
t0: 1.0100004063840913 t1: 1.6109720993519419 cost: 223.01057354618487
t0: 1.0117165473837304 t1: 1.7168028698300524 cost: 125.96323477390156
t0: 1.0129646736884894 t1: 1.7943250180365429 cost: 73.8905828391873
t0: 1.0138699752221674 t1: 1.8511108434594594 cost: 45.94997830678943
t0: 1.0145241547403263 t1: 1.8927071242097677 cost: 30.957895549245517
t0: 1.0149943853438217 t1: 1.9231769256034736 cost: 22.913595272060082
t0: 1.0153298719387733 t1: 1.945496479866181 cost: 18.59726350243064
t0: 1.0155666575655546 t1: 1.9618459041192726 cost: 16.281246111312615
t0: 1.015731144078406 t1: 1.9738221539437213 cost: 15.03853650304028
t0: 1.0158426710752095 t1: 1.9825950137986215 cost: 14.371731167292275
t0: 1.0159154049869583 t1: 1.9890213614187344 cost: 14.013938527205328
t0: 1.0159597228529411 t1: 1.9937288675229021 cost: 13.821952689932841
t0: 1.015983225

In [8]:
#Nesterov Momentum

EPOCHS = 50
LEARNING_RATE = 0.0001
MOMENTUM = 0.9

#скидываем значения коэфициентов
theta0 = 1
theta1 = 1

#задаем начальные значения для средних
vt0 = 0
vt1 = 0

for _ in range(EPOCHS):
    #рассчитываем производные по коэфициентам
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    #обновляем скользящее среднее
    vt0 = MOMENTUM * vt0 + (1-MOMENTUM) * LEARNING_RATE * dt0
    vt1 = MOMENTUM * vt1 + (1-MOMENTUM) * LEARNING_RATE * dt1
    
    #обновляем коэффициенты
    theta0 = theta0 - vt0
    theta1 = theta1 - vt1 
    
    #выводим значения целевой функции
    print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))
    

t0: 1.000441804804436 t1: 1.0269259816535334 cost: 1297.616271479059
t0: 1.0012693264225934 t1: 1.0773651027093787 cost: 1168.8048002991375
t0: 1.0024216874189185 t1: 1.1476168513396614 cost: 1000.7388199435861
t0: 1.0038353363410275 t1: 1.2338207982964777 cost: 812.5530256549616
t0: 1.005446022325896 t1: 1.3320758515120135 cost: 622.3059076577537
t0: 1.0071905902281209 t1: 1.4385486729441748 cost: 445.30870063240246
t0: 1.0090085662725676 t1: 1.5495694413897714 cost: 293.05440287337217
t0: 1.0108435128295172 t1: 1.6617136650990902 cost: 172.7404184245545
t0: 1.01264413921984 t1: 1.7718692507975753 cost: 87.31973824440387
t0: 1.0143651632870776 t1: 1.8772885094375753 cost: 35.97464399164005
t0: 1.0159679256610892 t1: 1.975625213899716 cost: 14.884458316529132
t0: 1.0174207650559712 t1: 2.06495721218835 cost: 18.15363612624576
t0: 1.0186991684985194 t1: 2.1437954356885913 cost: 38.775746649325846
t0: 1.019785715010421 t1: 2.211080422015497 cost: 69.52895037954693
t0: 1.0206698349365027 

In [14]:
#RMSProp

EPOCHS = 50
LEARNING_RATE = 0.1 #через подбор пришлось увеличить сорость обучения
MOMENTUM = 0.9

#сбрасываем значения параметров
theta0 = 1
theta1 = 1

#задаем начальные значения для накопленных средних
eg0 = 0
eg1 = 0

for _ in range(EPOCHS):
    #расчет производных
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    #скользящее экспоненциальное среднее квадротов производных
    eg0 = MOMENTUM * eg0 + (1-MOMENTUM) * (dt0 ** 2.0)
    eg1 = MOMENTUM * eg1 + (1-MOMENTUM) * (dt1 ** 2.0)
    
    #обновление весов
    theta0 = theta0 - LEARNING_RATE * dt0 / (math.sqrt(eg0+10**(-8)))
    theta1 = theta1 - LEARNING_RATE * dt1 / (math.sqrt(eg1+10**(-8))) 
    
    #выводим значение целевой функции
    print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

t0: 1.3162277660087376 t1: 1.3162277660168358 cost: 641.9009055071758
t0: 1.4998149586483354 t1: 1.500589205401656 cost: 345.4451675728814
t0: 1.6274892375489383 t1: 1.6295227664720873 cost: 193.93843606357734
t0: 1.721263015068636 t1: 1.7249969364323507 cost: 111.35350710621533
t0: 1.7912108064018317 t1: 1.7970800093284378 cost: 65.68842802591483
t0: 1.8431813738737957 t1: 1.8516193773895528 cost: 40.676129702307755
t0: 1.8811374419854834 t1: 1.892572145615293 cost: 27.294304976707593
t0: 1.9080520227261584 t1: 1.9228994095378735 cost: 20.367369813216566
t0: 1.9262925289670534 t1: 1.9449514921816293 cost: 16.92271380834832
t0: 1.9377943324733848 t1: 1.9606440737907913 cost: 15.286426391295068
t0: 1.9441437989416104 t1: 1.9715441010932278 cost: 14.547374006644157
t0: 1.9466246686594262 t1: 1.9789185916738574 cost: 14.230973381396009
t0: 1.946252543337904 t1: 1.9837707284301374 cost: 14.102489264632084
t0: 1.9438078011688162 t1: 1.9868735217706575 cost: 14.052384133836556
t0: 1.93986991