In [64]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D

In [65]:
def calc_mae(y, y_pred):
    err = np.mean(np.abs(y - y_pred))
    return err

def calc_mse(y, y_pred):
    err = np.mean((y - y_pred)**2)
    return err

In [66]:
X = np.array([[ 1,  1],
              [ 1,  1],
              [ 1,  2],
              [ 1,  5],
              [ 1,  3],
              [ 1,  0],
              [ 1,  5],
              [ 1, 10],
              [ 1,  1],
              [ 1,  2]])

y = [45, 55, 50, 55, 60, 35, 75, 80, 50, 60]

1. Подберите скорость обучения (eta) и количество итераций

In [67]:
def gradient_descent(eta, iterations, W, X=X, y=y):
    n = X.shape[1]
    err = np.inf
    for i in range(iterations):
        y_pred = np.dot(X, W)
        err_new = calc_mse(y, y_pred)
        if err_new < err:
            err = err_new
            for k in range(W.shape[0]):
                W[k] -= eta * (1/n * 2 * X[:, k] @ (y_pred - y))
        else:
            return i-1, W, err
    return i, W, err

In [70]:
min_err = np.inf
eta1 = ()
iter1 = ()

for eta in [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]:
    for iters in [1e1, 1e2, 1e3, 1e4, 1e5, 1e6]:
        W = np.array([1, 0.5])
        num, wi, err = gradient_descent(eta, int(iters), W)
        if err < min_err:
            min_err = err
            eta1 = eta
            iter1 = iters

print(f'\n\nОптимальная альфа = {eta1} и оптимальное кол-во итераций = {iter1}. Минимальная ошибка = {min_err} ')



Оптимальная альфа = 0.01 и оптимальное кол-во итераций = 1000.0. Минимальная ошибка = 43.96875000000021 


In [74]:
n = X.shape[0]

eta = eta1
n_iter = int(iter1)

W = np.array([1, 0.5])

for i in range(n_iter+1):
    y_pred = np.dot(X, W)
    err = calc_mse(y, y_pred)
    for k in range(W.shape[0]):
        W[k] -= eta * (1/n * 2 * X[:, k] @ (y_pred - y))
    if i % 100 == 0:
        eta /= 1.1
        print(f'Iteration #{i}: W_new = {W}, MSE = {round(err, 2)}')

Iteration #0: W_new = [2.08 4.27], MSE = 3047.75
Iteration #100: W_new = [26.92778597  7.10095078], MSE = 201.46
Iteration #200: W_new = [36.55234278  5.3556858 ], MSE = 78.6
Iteration #300: W_new = [40.78354544  4.58842244], MSE = 52.71
Iteration #400: W_new = [42.77180005  4.22788312], MSE = 46.47
Iteration #500: W_new = [43.76430815  4.04790708], MSE = 44.77
Iteration #600: W_new = [44.28770223  3.95299763], MSE = 44.25
Iteration #700: W_new = [44.57781114  3.90039085], MSE = 44.08
Iteration #800: W_new = [44.74605536  3.86988236], MSE = 44.02
Iteration #900: W_new = [44.84771779  3.85144744], MSE = 43.99
Iteration #1000: W_new = [44.91148301  3.8398846 ], MSE = 43.98


2*. В этом коде мы избавляемся от итераций по весам, но тут есть ошибка, исправьте ее

In [85]:
n = X.shape[0]

eta = 1e-2 
n_iter = 100

W = np.array([1, 0.5])

for i in range(n_iter):
    y_pred = np.dot(X, W)
    err = calc_mse(y, y_pred)
    
    # надо транспонировать
    
    W -= eta * (1/n * 2 * np.dot(X.T, y_pred - y))
    
    if i % 10 == 0:
        print(f'Iteration #{i}: W_new = {W}, MSE = {round(err,2)}')

Iteration #0: W_new = [2.08 4.27], MSE = 3047.75
Iteration #10: W_new = [ 7.0011236 10.6169007], MSE = 738.65
Iteration #20: W_new = [10.3486292  10.10603105], MSE = 622.03
Iteration #30: W_new = [13.38789582  9.55618391], MSE = 525.24
Iteration #40: W_new = [16.16088505  9.05336203], MSE = 444.66
Iteration #50: W_new = [18.69110735  8.59454545], MSE = 377.58
Iteration #60: W_new = [20.99981865  8.17589626], MSE = 321.72
Iteration #70: W_new = [23.10641138  7.79389815], MSE = 275.22
Iteration #80: W_new = [25.02858024  7.44534246], MSE = 236.5
Iteration #90: W_new = [26.78247081  7.12730145], MSE = 204.27


3*. Вместо того, чтобы задавать количество итераций, задайте другое условие останова алгоритма - когда веса перестают изменяться меньше определенного порога  𝜖 .