In [1]:
import numpy as np
np.set_printoptions(suppress=True,precision=7)

# Лабораторна робота №1

## Цільова функція:

$$11x^2 + 14y^2 + z^2 + 0,01xy - 0,001yz - y$$

In [2]:
target_function = lambda x: 11*x[0]**2 + 14*x[1]**2 + x[2]**2 + 0.01*x[0]*x[1] - 0.001*x[1]*x[2] - x[1]

## Умови зупинки:

$$||x^{k+1} - x^k||\leq\epsilon$$

In [3]:
def x_norm_stop(x_prev,x_cur,epselon):
    return np.linalg.norm(x_prev-x_cur) < epselon

$$||f(x^{k+1}) - f(x^k)||\leq\epsilon$$

In [4]:
def func_abs_stop(func_prev,func_cur,epselon):
    return abs(func_prev - func_cur) < epselon

$$||f'(x^{k+1})||\leq\epsilon$$

In [5]:
def grad_abs_stop(grad,epselon):
    return np.linalg.norm(grad)<epselon

## Градієнтний спуск

In [6]:
class GradientDescent:
    
    def __init__(self, target_func, initial_x, step_size, grad_step_size, adaptive_beta=0):
        self.f = target_func
        self.x = initial_x
        
        self.f_value = self.f(self.x)
        
        self.grad_step_size = grad_step_size
        self.step_size = step_size
        self.adaptive_beta = adaptive_beta
        self.adaptive_alpha = step_size
        
        self.grad = None
        
    @staticmethod
    def partial_deriv(f, x, h, var_num):
        x_back, x_forward = x.copy(), x.copy()
        
        # Increase x_back in such a way: (x-h;y)
        x_back[var_num] = x_back[var_num] - h 
        # Increase x_forward in such a way: (x+h;y)
        x_forward[var_num] = x_forward[var_num] + h
        
        return (f(x_forward) - f(x_back))/(2*h)
    
    @staticmethod
    def compose_grad_vec(f, x, h):
        # Compose vector from partial derivatives
        return np.array([GradientDescent.partial_deriv(f,x,h,i) for i in range(x.shape[0])])
        
    def backward(self):
        self.grad = GradientDescent.compose_grad_vec(self.f, self.x, self.grad_step_size) 
        
    def zero_grad(self):
        self.grad = np.zeros(self.x.shape[0])
        
    def step(self):
        
        # if beta > 0 we are using adaptive step_size
        if self.adaptive_beta == 0:
            # x_k+1 = x_k - alpha_k * f(x_k)'
            self.x = self.x - self.step_size * self.grad
            self.f_value = self.f(self.x)
        else:
            self.step_size = self.adaptive_alpha
            
            # decrease alpha_k until f(x_k) > f(x_k - x_k - alpha_k * f(x_k)')
            while self.f(self.x) < self.f(self.x - self.step_size * self.grad):
                # alpha_k = alpha_k * beta
                self.step_size = self.step_size * self.adaptive_beta
            
            # x_k+1 = x_k - alpha_k * f(x_k)'
            self.x = self.x - self.step_size * self.grad
            self.f_value = self.f(self.x)
                
                
    def info(self):
        print('Current x: {}'.format(self.x))
        print('Current f(x): {}'.format(self.f_value))
        print('Current grad: {}'.format(self.grad))
        print('Step size: {}'.format(self.step_size))
        print('Gradient step size: {}'.format(self.grad_step_size))

Застосуємо метод з постійною величиною кроку

In [7]:
grad_descent = GradientDescent(target_func=target_function,
                               initial_x=np.array([10.,10.,10.]),
                               step_size=0.01,
                               grad_step_size=0.00001)

In [8]:
eps = 0.00001
num_itter = 0
previous_x = grad_descent.x + eps + 1

while not x_norm_stop(grad_descent.x,previous_x,eps):
    num_itter +=1
    print('\nItteration: {}'.format(num_itter))
    
    previous_x = grad_descent.x
    grad_descent.zero_grad()
    # Compute gradient
    grad_descent.backward()
    # x_k+1 = x_k + h_k
    grad_descent.step()
    grad_descent.info()
    
print('\nConverged in {} itterations'.format(num_itter))


Itteration: 1
Current x: [7.799  7.2091 9.8001]
Current f(x): 1485.9885783270622
Current grad: [220.1       279.0899999  19.99     ]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 2
Current x: [6.0824991 5.1998701 9.6041701]
Current f(x): 872.8123886763333
Current grad: [171.650091  200.9229899  19.5929909]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 3
Current x: [4.7438293 3.7533943 9.4121387]
Current f(x): 529.7523277356087
Current grad: [133.8669787 144.6475837  19.2031403]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 4
Current x: [3.6998115 2.7120636 9.2239334]
Current f(x): 335.9929141694834
Current grad: [104.4017786 104.1330656  18.820524 ]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 5
Current x: [2.8855818 1.9624081 9.0394819]
Current f(x): 225.2957521482421
Current grad: [81.422974  74.9655552 18.4451548]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 6
Current x: [2.2505575 1.4227356 8.8587119]
Current f(x): 161.12703215125885


Step size: 0.01
Gradient step size: 1e-05

Itteration: 209
Current x: [-0.0000162  0.0357199  0.1466623]
Current f(x): 0.0036474565289776884
Current grad: [-0.         0.0000115  0.2992744]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 210
Current x: [-0.0000162  0.0357198  0.1437294]
Current f(x): 0.0027958742820742427
Current grad: [-0.         0.0000113  0.293289 ]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 211
Current x: [-0.0000162  0.0357197  0.1408552]
Current f(x): 0.0019780146915059915
Current grad: [-0.         0.0000111  0.2874232]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 212
Current x: [-0.0000162  0.0357196  0.1380385]
Current f(x): 0.0011925423401087931
Current grad: [-0.         0.0000108  0.2816747]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 213
Current x: [-0.0000162  0.0357195  0.1352781]
Current f(x): 0.00043817469323495506
Current grad: [-0.         0.0000106  0.2760412]
Step size: 0.01
Gradient step size: 1e-05

Ittera

Gradient step size: 1e-05

Itteration: 326
Current x: [-0.0000162  0.0357148  0.0138127]
Current f(x): -0.017666848535273078
Current grad: [-0.         0.0000011  0.0281527]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 327
Current x: [-0.0000162  0.0357148  0.0135368]
Current f(x): -0.01767438431769914
Current grad: [-0.         0.0000011  0.0275897]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 328
Current x: [-0.0000162  0.0357148  0.0132664]
Current f(x): -0.017681621683146773
Current grad: [-0.         0.000001   0.0270379]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 329
Current x: [-0.0000162  0.0357148  0.0130014]
Current f(x): -0.017688572448928158
Current grad: [-0.         0.000001   0.0264971]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 330
Current x: [-0.0000162  0.0357148  0.0127418]
Current f(x): -0.017695247964389885
Current grad: [-0.         0.000001   0.0259672]
Step size: 0.01
Gradient step size: 1e-05

Itteration: 331
Current 

Current grad: [-0.         0.         0.0009841]
Step size: 0.01
Gradient step size: 1e-05

Converged in 492 itterations


Застосуємо метод з адаптивною величиною кроку

In [9]:
grad_descent = GradientDescent(target_func=target_function,
                               initial_x=np.array([10.,10.,10.]),
                               step_size=1,
                               grad_step_size=0.00001,
                               adaptive_beta=0.5)

In [10]:
eps = 0.00001
num_itter = 0
previous_x = grad_descent.x + eps + 1

while not x_norm_stop(grad_descent.x,previous_x,eps):
    num_itter +=1
    print('\nItteration: {}'.format(num_itter))
    
    previous_x = grad_descent.x
    grad_descent.zero_grad()
    # Compute gradient
    grad_descent.backward()
    # x_k+1 = x_k + h_k
    grad_descent.step()
    grad_descent.info()
    
print('\nConverged in {} itterations'.format(num_itter))


Itteration: 1
Current x: [-3.75625  -7.443125  8.750625]
Current f(x): 1015.1663677337114
Current grad: [220.1       279.0899999  19.99     ]
Step size: 0.0625
Gradient step size: 1e-05

Itteration: 2
Current x: [1.4132457 5.6477383 7.6563317]
Current f(x): 521.53542286233
Current grad: [ -82.7119312 -209.453813    17.5086931]
Step size: 0.0625
Gradient step size: 1e-05

Itteration: 3
Current x: [-0.533497  -4.1737085  6.6996432]
Current f(x): 296.11776221167185
Current grad: [ 31.1478828 157.143149   15.3070156]
Step size: 0.0625
Gradient step size: 1e-05

Itteration: 4
Current x: [0.2026699 3.1935335 5.8619269]
Current f(x): 174.389422278079
Current grad: [ -11.7786705 -117.8758725   13.4034601]
Step size: 0.0625
Gradient step size: 1e-05

Itteration: 5
Current x: [-0.0779972 -2.3324104  5.1293857]
Current f(x): 104.88564909652312
Current grad: [ 4.4906739 88.4151037 11.7206604]
Step size: 0.0625
Gradient step size: 1e-05

Itteration: 6
Current x: [0.0307067 1.8121772 4.4880667]
Cur

## Градієнтний спуск для квадратичної функції 

Цільова функція - квадратична. Покращимо алгоритм з урахуванням цього факту

### Загальний вигляд квадратичної функції

$$\frac{1}{2}(Ax,x) + (b,x)$$А - симетрична, додатньо визначена матриця


В нашому випадку:

$$A = \begin{bmatrix}
       22   & 0,01  & 0     \\
       0,01 & 28    & 0,001 \\
       0    & 0,001 & 2
     \end{bmatrix}$$

$$b = \begin{bmatrix}
       0  \\
       -1 \\
       0   
     \end{bmatrix}$$

In [11]:
A_matrix = np.array([[22,0.01,0],
                     [0.01,28,-0.001],
                     [0,-0.001,2]])
b_vector = np.array([0,-1,0])

In [12]:
class QuadraticGradientDescent:
    
    def __init__(self, A, b, initial_x):
        self.A = A
        self.b = b
        
        # Function in matrix format
        self.f = lambda x: ((self.A@x)@x)/2 + b@x
        self.x = initial_x
        
        self.f_value = self.f(self.x)
        
        self.step_size = None
        
        self.grad = None
    
    @staticmethod
    def compute_step_size(A, b, grad,x):
        # alpha_k = (Ax + b)f(x_k)' / (Af(x_k)',f(x_k)')
        return ((A@x+b)@grad)/((A@grad)@grad)
        
    @staticmethod
    def compose_grad_vec(A, b, x):
        # f(x_k)' = Ax + b
        return A@x + b
    
    def backward(self):
        self.grad = QuadraticGradientDescent.compose_grad_vec(self.A,self.b,self.x)
        
    def zero_grad(self):
        self.grad = np.zeros(self.x.shape[0])
        
    def step(self):
        self.step_size = QuadraticGradientDescent.compute_step_size(self.A, self.b, self.grad, self.x)
        # x_k+1 = x_k - alpha_k * f(x_k)'
        self.x = self.x - self.step_size * self.grad
        self.f_value = self.f(self.x)
        
    def info(self):
        print('Current x: {}'.format(self.x))
        print('Current f(x): {}'.format(self.f_value))
        print('Current grad: {}'.format(self.grad))
        print('Step size: {}'.format(self.step_size))

In [13]:
quad_grad_descent = QuadraticGradientDescent(A=A_matrix,
                                             b=b_vector,
                                             initial_x=np.array([10,10,10]))

In [14]:
eps = 0.00001
num_itter = 0
previous_x = quad_grad_descent.x + eps + 1

while not x_norm_stop(quad_grad_descent.x,previous_x,eps):
    num_itter +=1
    print('\nItteration: {}'.format(num_itter))
    
    previous_x = quad_grad_descent.x
    quad_grad_descent.zero_grad()
    # Compute gradient
    quad_grad_descent.backward()
    # x_k+1 = x_k + h_k
    quad_grad_descent.step()
    quad_grad_descent.info()
    
print('\nConverged in {} itterations'.format(num_itter))


Itteration: 1
Current x: [ 1.4137959 -0.8874316  9.2201807]
Current f(x): 118.90729619227575
Current grad: [220.1  279.09  19.99]
Step size: 0.03901046845393336

Itteration: 2
Current x: [-0.0973143  0.3684723  8.3239889]
Current f(x): 70.92186914359814
Current grad: [ 31.0946353 -25.8431682  18.4412489]
Step size: 0.0485971350164743

Itteration: 3
Current x: [ 0.1582914 -0.7447252  6.3329895]
Current f(x): 48.895255176638244
Current grad: [-2.1372298  9.307927  16.6476093]
Step size: 0.11959671234495632

Itteration: 4
Current x: [-0.0035475  0.2732197  5.7430644]
Current f(x): 33.753214456411236
Current grad: [  3.4749628 -21.8570551  12.6667238]
Step size: 0.04657283021378687

Itteration: 5
Current x: [ 0.0052934 -0.5067607  4.3947444]
Current f(x): 23.418336717596976
Current grad: [-0.0753125  6.6443741 11.4858555]
Step size: 0.11738959703968287

Itteration: 6
Current x: [0.000108  0.2005508 3.9855426]
Current f(x): 16.24628904899507
Current grad: [  0.1113876 -15.1936403   8.78999

Current x: [-0.0000161  0.0357911  0.0018751]
Current f(x): -0.01785361437807651
Current grad: [-0.0000118 -0.00708    0.004096 ]
Step size: 0.046553065472924614

Itteration: 49
Current x: [-0.0000165  0.0355389  0.0014391]
Current f(x): -0.01785469512374062
Current grad: [0.0000036 0.0021489 0.0037143]
Step size: 0.1173825457942197

Itteration: 50
Current x: [-0.0000161  0.0357676  0.0013067]
Current f(x): -0.017855445147022184
Current grad: [-0.0000082 -0.0049134  0.0028426]
Step size: 0.04655306547292568

Itteration: 51
Current x: [-0.0000164  0.0355925  0.0010042]
Current f(x): -0.01785596565331849
Current grad: [0.0000025 0.0014913 0.0025777]
Step size: 0.11738254579421921

Itteration: 52
Current x: [-0.0000162  0.0357513  0.0009123]
Current f(x): -0.017856326877844667
Current grad: [-0.0000057 -0.0034099  0.0019727]
Step size: 0.04655306547292562

Itteration: 53
Current x: [-0.0000164  0.0356298  0.0007023]
Current f(x): -0.017856577562916556
Current grad: [0.0000017 0.0010349 0.