Функция:

$$y = 6x_{1}-2x_{3}-3x_{1}^{2}-2x_{2}^{2}-x_{3}^{2}$$

Рассматриваемая функция и ее частные производные

Также функцию можно привести к такому виду:

$ y = -3(x_1-1)^2 - 2x_2^2 - (x_3+1)^2 + 4 $

При каждом y эта функция описывает эллипсоид.

In [None]:
f = lambda x1, x2, x3: 6 * x1 - 2 * x3 - 3 * x1 ** 2 - 2 * x2 ** 2 - x3 ** 2
d_x1 = lambda x1, x2, x3: 6 - 6 * x1
d_x2 = lambda x1, x2, x3: -4 * x2
d_x3 = lambda x1, x2, x3: -2 - 2 * x3

Визуализация функции. Цвет — это y, значение функции.

Можно заметить, что функция бесконечно убывает и не имеет минимума, но имеет максимум в точке (1, 0, -1)

In [None]:
# Source: https://stackoverflow.com/questions/66261066/4d-density-plot-in-python

import plotly.graph_objects as go
import numpy as np


def get_function_figure():
  X, Y, Z = np.mgrid[-10:10:30j, -10:10:30j, -14:10:30j]
  values = f(X, Y, Z)

  return go.Figure(data=go.Volume(
      x=X.flatten(),
      y=Y.flatten(),
      z=Z.flatten(),
      value=values.flatten(),
      isomin=-100,#Min color
      isomax=2,#Max color
      opacity=0.1, # needs to be small to see through all surfaces
      surface_count=17, # needs to be a large number for good volume rendering
  ))

get_function_figure().show()

С помощью этой функции мы будем изображать на графике путь градиентного спуска.

In [None]:
def plot_function_with_points(x1_list, x2_list, x3_list):
    function_figure = get_function_figure()
    #points_figure = px.scatter_3d(x=x1_list, y=x2_list, z=x3_list)
    #function_figure.add_trace(points_figure)
    function_figure.add_scatter3d(x=x1_list, y=x2_list, z=x3_list).show()


Градиентный спуск с фиксированным шагом

In [None]:
def gradient_descent_with_fixed_step(step, max_steps=50, x1=0, x2=0, x3=0, eps=1e-4):
    x1_list = []
    x2_list = []
    x3_list = []
    for current_step in range(max_steps):
        current_x1 = x1
        current_x2 = x2
        current_x3 = x3

        grad_x1 = d_x1(current_x1, current_x2, current_x3)
        grad_x2 = d_x2(current_x1, current_x2, current_x3)
        grad_x3 = d_x3(current_x1, current_x2, current_x3)

        x1 = current_x1 - step * grad_x1
        x2 = current_x2 - step * grad_x2
        x3 = current_x1 - step * grad_x3

        x1_list.append(x1)
        x2_list.append(x2)
        x3_list.append(x3)

        if grad_x1 ** 2 + grad_x2 ** 2 + grad_x3 ** 2 < eps:
            print(current_step)
            break
    plot_function_with_points(x1_list, x2_list, x3_list)


In [None]:
gradient_descent_with_fixed_step(step=0.005)

Output hidden; open in https://colab.research.google.com to view.

Градиентный спуск с дробным шагом

In [None]:
def gradient_descent_with_sequence_step(step_base, max_steps=50, x1=0, x2=0, x3=0, eps=1e-4):
    x1_list = []
    x2_list = []
    x3_list = []
    for current_step_number in range(1, max_steps + 1):
        current_x1 = x1
        current_x2 = x2
        current_x3 = x3

        grad_x1 = d_x1(current_x1, current_x2, current_x3)
        grad_x2 = d_x2(current_x1, current_x2, current_x3)
        grad_x3 = d_x3(current_x1, current_x2, current_x3)

        current_step = step_base / current_step_number

        x1 = current_x1 - current_step * grad_x1
        x2 = current_x2 - current_step * grad_x2
        x3 = current_x1 - current_step * grad_x3

        x1_list.append(x1)
        x2_list.append(x2)
        x3_list.append(x3)

        if grad_x1 ** 2 + grad_x2 ** 2 + grad_x3 ** 2 < eps:
            print(current_step)
            break
    plot_function_with_points(x1_list, x2_list, x3_list)


In [None]:
gradient_descent_with_sequence_step(step_base=0.07)

Output hidden; open in https://colab.research.google.com to view.

Наискорейший спуск

Для рассматриваемой функции размер шага при наискорейшем градиентном спуске будет равен бесконечности, так как функция бесконечно убывает при отдалении от точки максимума (1, 0, -1)
Это было показано мной в приложенном файле pdf с решением вручную.

In [None]:
def find_step_with_maximum_descent(grad_x1, grad_x2, grad_x3, max_step=100):
    return float(‘inf’)


def fastest_gradient_descent(max_steps=50, x1=0, x2=0, x3=0, eps=1e-4, max_step=100):
    x1_list = []
    x2_list = []
    x3_list = []
    for current_step_number in range(max_steps):
        current_x1 = x1
        current_x2 = x2
        current_x3 = x3

        grad_x1 = d_x1(current_x1, current_x2, current_x3)
        grad_x2 = d_x2(current_x1, current_x2, current_x3)
        grad_x3 = d_x3(current_x1, current_x2, current_x3)

        current_step = find_step_with_maximum_descent(grad_x1, grad_x2, grad_x3)

        x1 = current_x1 - current_step * grad_x1
        x2 = current_x2 - current_step * grad_x2
        x3 = current_x1 - current_step * grad_x3

        x1_list.append(x1)
        x2_list.append(x2)
        x3_list.append(x3)

        if grad_x1 ** 2 + grad_x2 ** 2 + grad_x3 ** 2 < eps:
            print(current_step)
            break
    plot_function_with_points(x1_list, x2_list, x3_list)


Градиентный спуск с шагом по формуле циклического косинусного ожига

In [None]:
from math import cos, pi

def gradient_descent_cosine(step_min, step_max, steps_in_cycle=10, max_steps=50, x1=0, x2=0, x3=0, eps=1e-4):
    x1_list = []
    x2_list = []
    x3_list = []

    multiplier = 0.5 * (step_max - step_min)

    for current_step_number in range(max_steps):
        current_x1 = x1
        current_x2 = x2
        current_x3 = x3

        grad_x1 = d_x1(current_x1, current_x2, current_x3)
        grad_x2 = d_x2(current_x1, current_x2, current_x3)
        grad_x3 = d_x3(current_x1, current_x2, current_x3)

        # Нужно находить занчение по модулю steps_in_cycle + 1, чтобы значения изменялись от 0 до 1
        cos_argument = current_step_number % (steps_in_cycle + 1) / steps_in_cycle

        current_step = step_min + multiplier * (1 + cos(cos_argument * pi))

        x1 = current_x1 - current_step * grad_x1
        x2 = current_x2 - current_step * grad_x2
        x3 = current_x1 - current_step * grad_x3

        x1_list.append(x1)
        x2_list.append(x2)
        x3_list.append(x3)

        if grad_x1 ** 2 + grad_x2 ** 2 + grad_x3 ** 2 < eps:
            print(current_step)
            break
    plot_function_with_points(x1_list, x2_list, x3_list)

In [None]:
gradient_descent_cosine(step_max=0.01, step_min=0.001)

Output hidden; open in https://colab.research.google.com to view.

Градиентный спуск в sklearn
Генерация данных:

In [None]:
import numpy as np

count = 100

X = np.random.random((count, 3)) * 10
X

array([[3.60697445, 8.37056143, 7.12099966],
       [9.93748702, 2.71482279, 3.39045489],
       [2.30249381, 4.05635028, 1.38161827],
       [6.21597305, 8.91906255, 9.38347817],
       [2.61897792, 1.66668233, 5.21860965],
       [1.56536282, 0.2763781 , 9.981863  ],
       [3.20442533, 8.42530676, 7.02564541],
       [3.5272869 , 7.46216878, 3.55410668],
       [9.75791055, 9.03159288, 1.54100367],
       [0.03616006, 2.13403795, 1.65724634],
       [2.74519161, 2.51272692, 9.80899731],
       [5.17527666, 2.3402638 , 8.79911438],
       [3.43706982, 2.05655039, 1.99292567],
       [6.43006591, 2.10852989, 3.45450614],
       [2.54880711, 8.50768844, 4.94600352],
       [6.34859125, 7.99785177, 8.71573887],
       [5.58151187, 4.27863571, 6.55857701],
       [1.38878293, 6.49218906, 5.69351446],
       [1.95498353, 4.5827368 , 3.41102991],
       [2.48506154, 2.58646347, 4.90892395],
       [9.05014232, 7.22397418, 5.59587903],
       [7.33109004, 6.79762342, 2.21097339],
       [9.

In [None]:
e = np.random.rand(1,count)
y = f(X[:, 0],X[:, 1],X[:, 2])
y

array([-222.47218022, -269.65264257,  -39.66953115, -344.53509709,
        -48.09003442, -117.71298998, -216.96104545, -147.26935067,
       -395.69903123,  -14.95615636, -134.5990973 , -155.27511736,
        -31.23433101, -113.19126888, -183.31289355, -304.14913281,
       -152.71628667, -125.55363001,  -60.19611852,  -50.91119208,
       -338.29160006, -218.97382197, -253.47376448,  -36.15998409,
        -51.47045357, -282.96659932,  -86.98061224, -311.98581358,
       -278.43069308, -343.65852929, -172.37378244, -162.11445005,
       -136.05158136, -334.62086666,  -43.15162571, -369.15948817,
       -280.93239648, -275.2463652 , -115.40551787, -231.99665545,
       -329.56526594, -168.13557411, -196.93260211, -163.63933142,
        -25.5574703 , -161.08743973, -107.64347018, -221.23286287,
       -240.94553228, -197.52805239,  -95.06798262,  -81.51614131,
       -260.12941376, -117.79149027, -374.77507004,  -40.77324778,
       -154.66719595, -185.1302601 , -248.69281598,   -4.22593

In [None]:
Обучение модели:

In [None]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(tol=0.0001)
sgd.fit(X, y)
sgd.intercept_, sgd.coef_

(array([34.27955331]), array([-20.94776749, -14.60150544,  -6.76223035]))