# Решение задачи оптимизации

Решите простую задачу безусловной оптимизации в двумерном пространстве:  
$$f(\boldsymbol x) = -8x_1 - 16x_2 + x_1^2 + 4x_2^2$$
Используя два метода:
 - аналитически (функция квадратичная, выпуклая)
 - методом градиентного спуска, используя один из методов оптимизации torch.optim

# Аналитически 

$\nabla f(x) = \big(\frac{\partial f(x)}{\partial x_1}, \frac{\partial f(x)}{\partial x_2} \big) = (-8 + 2x_1, -16 + 8x_2)$

$\frac{\partial f(x)}{\partial x_1} = -8 + 2x_1 = 0 \implies x_1 = 4$

$\frac{\partial f(x)}{\partial x_2} = -16 + 8x_2 = 0 \implies x_2 = 2$

$H = \nabla^2 f(x) =
\begin{pmatrix} 
\frac{\partial^2 f}{\partial x_1^2} & \frac{\partial^2f}{\partial x_1 \partial x_2}\\
\frac{\partial^2 f}{\partial x_2 \partial x_1} & \frac{\partial^2f}{\partial x_2^2}\\
\end{pmatrix} =
\begin{pmatrix} 
2 & 0\\
0 & 8\\
\end{pmatrix}$

$H$ положительно определена $\implies f^* = f(4, 2) = -32$

In [1]:
import torch

In [2]:
def f(x1, x2):
    return -8 * x1 - 16 * x2 + x1 ** 2 + 4 * x2 ** 2

# Метод градиентного спуска

In [3]:
x1 = torch.tensor(3., dtype=torch.float32, requires_grad=True)
x2 = torch.tensor(3., dtype=torch.float32, requires_grad=True)

for i in range(101):
    
    # Вычисление значения функции для текущих значений x1 и x2
    y = f(x1, x2)
    
    # Вычисление градиентов функции относительно x1 и x2
    y.backward()
    
    if i % 5 == 0:
        print(f'iteration {i}, x1 = {round(x1.item(), 7)}, x2 = {round(x2.item(), 7)}, '
              f'loss = {round(y.item(), 7)}, grad x1 = {round(x1.grad.item(), 7)}, grad x2 = {round(x2.grad.item(), 7)}')
        
    # Временно отключаем вычисление градиентов, чтобы обновить x1 и x2
    with torch.no_grad():
        # Обновление x1 и x2 с использованием градиентного спуска
        x1 -= 0.05 * x1.grad
        x2 -= 0.05 * x2.grad
    
    # Обнуление градиентов x1 и x2, готовим их для следующей итерации
    x1.grad.zero_()
    x2.grad.zero_()

iteration 0, x1 = 3.0, x2 = 3.0, loss = -27.0, grad x1 = -2.0, grad x2 = 8.0
iteration 5, x1 = 3.4095099, x2 = 2.07776, loss = -31.6271381, grad x1 = -1.1809802, grad x2 = 0.6220798
iteration 10, x1 = 3.6513214, x2 = 2.0060465, loss = -31.8782749, grad x1 = -0.6973572, grad x2 = 0.0483723
iteration 15, x1 = 3.7941089, x2 = 2.0004702, loss = -31.9576111, grad x1 = -0.4117823, grad x2 = 0.0037613
iteration 20, x1 = 3.8784232, x2 = 2.0000367, loss = -31.985218, grad x1 = -0.2431536, grad x2 = 0.0002937
iteration 25, x1 = 3.9282103, x2 = 2.0000029, loss = -31.9948463, grad x1 = -0.1435795, grad x2 = 2.29e-05
iteration 30, x1 = 3.9576092, x2 = 2.0000002, loss = -31.9982033, grad x1 = -0.0847816, grad x2 = 1.9e-06
iteration 35, x1 = 3.9749687, x2 = 2.0000002, loss = -31.9993706, grad x1 = -0.0500627, grad x2 = 1.9e-06
iteration 40, x1 = 3.9852192, x2 = 2.0000002, loss = -31.9997826, grad x1 = -0.0295615, grad x2 = 1.9e-06
iteration 45, x1 = 3.991272, x2 = 2.0000002, loss = -31.9999237, grad 

In [4]:
print(f'f* = {round(y.item(), 7)}, x1 = {round(x1.item(), 7)}, x2 = {round(x2.item(), 7)}')

f* = -32.0, x1 = 3.9999759, x2 = 2.0000002


# Используем torch.optim

In [5]:
x1 = torch.tensor(3., dtype=torch.float32, requires_grad=True)
x2 = torch.tensor(3., dtype=torch.float32, requires_grad=True)

optimizer  = torch.optim.SGD([x1, x2], lr=0.05)

for i in range(101):
    
    # Вычисление значения функции для текущих значений x1 и x2
    y = f(x1, x2)
    
    # Обнуление градиентов для предотвращения их накопления
    optimizer.zero_grad()
    
    # Вычисление градиентов функции относительно x1 и x2
    y.backward()
    
    # Выполнение одного шага оптимизации с использованием градиентов и learning rate
    optimizer.step() 
    
    if i % 5 == 0:
        print(f'iteration {i}, x1 = {round(x1.item(), 7)}, x2 = {round(x2.item(), 7)}, '
              f'loss = {round(y.item(), 7)}, grad x1 = {round(x1.grad.item(), 7)}, grad x2 = {round(x2.grad.item(), 7)}')

iteration 0, x1 = 3.0999999, x2 = 2.5999999, loss = -27.0, grad x1 = -2.0, grad x2 = 8.0
iteration 5, x1 = 3.4685588, x2 = 2.0466559, loss = -31.6271381, grad x1 = -1.1809802, grad x2 = 0.6220798
iteration 10, x1 = 3.6861892, x2 = 2.003628, loss = -31.8782749, grad x1 = -0.6973572, grad x2 = 0.0483723
iteration 15, x1 = 3.814698, x2 = 2.000282, loss = -31.9576111, grad x1 = -0.4117823, grad x2 = 0.0037613
iteration 20, x1 = 3.8905809, x2 = 2.0000219, loss = -31.985218, grad x1 = -0.2431536, grad x2 = 0.0002937
iteration 25, x1 = 3.9353893, x2 = 2.0000017, loss = -31.9948463, grad x1 = -0.1435795, grad x2 = 2.29e-05
iteration 30, x1 = 3.9618483, x2 = 2.0000002, loss = -31.9982033, grad x1 = -0.0847816, grad x2 = 1.9e-06
iteration 35, x1 = 3.9774718, x2 = 2.0000002, loss = -31.9993706, grad x1 = -0.0500627, grad x2 = 1.9e-06
iteration 40, x1 = 3.9866972, x2 = 2.0000002, loss = -31.9997826, grad x1 = -0.0295615, grad x2 = 1.9e-06
iteration 45, x1 = 3.9921448, x2 = 2.0000002, loss = -31.99

In [6]:
print(f'f* = {round(y.item(), 7)}, x1 = {round(x1.item(), 7)}, x2 = {round(x2.item(), 7)}')

f* = -32.0, x1 = 3.9999759, x2 = 2.0000002
