# Ускоренные градиентные методы

In [7]:
import numpy as np
import numpy.linalg as lg

### Функция и ее градиент

In [8]:
def fx(x): 
    return np.exp(sum(np.square(x)))
def gradfx(x):
    return fx(x) * (2*x) 
def grad2fx(x):
    return fx(x) * 2 * (np.eye(len(x)) + 2 * x.reshape(len(x),1) * x)

def hx(x): 
    x1 = x
    x1[0] = x[0]/10
    return np.exp(sum(np.square(x1)))
def gradhx(x_):
    x = x_
    x[0] = x[0]/10
    return fx(x) * (2*x) 
def grad2hx(x_):
    x = x_
    x[0] = x[0]/10
    return fx(x) * 2 * (np.eye(len(x)) + 2 * x.reshape(len(x),1) * x)

#альтернативная ДВУМЕРНАЯ функция ( для проверки)
def gx(x):
    k = np.array([3, 1])
    return sum(np.square(k * x))
def gradgx(x):
    k = np.array([18, 2])
    return x * k
def grad2gx(x):
    return np.array([[18 ,0],[0, 2]])


## Вспомогательные функции

### "Умные" производные 

In [9]:
def func(f, x0, gradx0):
    def fun(a):
        A = x0 -  gradx0 * a
        return f(A)
    return fun

def der1_e(f_, x0, a, gradx0, e):
    f_(x0)
    f = func(f_, x0, gradx0)
    def der(a):
        return (f(a + e) - f(a)) / e
    return der

def der2_e(f_, x0, a, gradx0, e):
    df = der1_e(f_, x0, a, gradx0, e)
    def der2(a):
        return (df(a + e) - df(a))/e
    return der2

### геометрический метод поиска альфа основанный на ортогональности градиентов 

In [10]:
def mygeom(x0, e, grad):
    #print('    start mygeom')
    a = 0
    scalar = abs(sum(grad(x0)*grad(x0 - a * grad(x0))))
    while scalar > e: 
        a+=e
        scalar1 = scalar
        scalar = abs(sum(grad(x0)*grad(x0 - a * grad(x0))))
        #print('    a = ', a)
        #print('    scalar = ', scalar)
        if (scalar1 < scalar):       
            break
            
    #print('    end mygeom')
    return a

# Ускоренный градиентный метод p-го порядка

In [11]:
def grad_descent_a1(f, x0, x1, e):
    if(f(x0)-f(x1) > 0):
        vec = x0 - x1
    else:
        vec = x1 - x0
        
    a = 0
    df = der1_e(f, x0, a, vec, e)
    while e < abs(df(a)):
        d2f = der2_e(f, x0, a, vec, e)
        a = a - df(a)/d2f(a)
        df = der1_e(f, x0, a, vec, e)
    return a

In [12]:
def p_gradient(f, grad, x0, e):
    n  = 0
    n1 = 0;
    while abs(lg.norm(grad(x0))) > e * 5:
        x_st = x0
        for k in range(len(x0)):
            if(abs(lg.norm(grad(x_st))) <= e * 5):
                break
            n+=1
            print('')
            print('------[----', n, '----]------')
            a = mygeom(x0, e/100, grad)
            x0 = x0 - grad(x0) * a
            print('')
            print('a = ', a)
            print('  x  = ', x0)
            print('f(x) = ', f(x0))
        if(abs(lg.norm(grad(x0))) <= e * 5):
                break
        print('')
        print('скачок[----', n1, '----]скачок')
        #a = grad_descent_a1(f, x_st, x0, e / 100)
        #x0 = x0 - grad(x0) * a
        print('a1 = ', a)
        print('  x1  = ', x0)
        print('f(x1) = ', f(x0))
        n1+=1
        
    print('сделано ', n, ' шагов')
    print('сделано ', n1, ' скачков')
    return x0

## тесты

In [13]:
x0 = np.array([1.2, 1, 1])
e = 0.05
x_min = p_gradient(fx, gradfx, x0, e)
print(round(fx(x_min), 3), x_min)
''''''


------[---- 1 ----]------

a =  0.01650000000000001
  x  =  [-0.03500354 -0.02916962 -0.02916962]
f(x) =  1.0029312692576424

------[---- 2 ----]------

a =  0.47750000000000037
  x  =  [-0.00147717 -0.00123098 -0.00123098]
f(x) =  1.000005212656834

------[---- 3 ----]------

a =  0
  x  =  [-0.00147717 -0.00123098 -0.00123098]
f(x) =  1.000005212656834
сделано  3  шагов
сделано  0  скачков
1.0 [-0.00147717 -0.00123098 -0.00123098]


''

In [14]:
x0 = np.array([5,1])
e = 0.05
x_min = p_gradient(gx, gradgx, x0, e)
print(round(gx(x_min), 3), x_min)
''''''


------[---- 1 ----]------

a =  0.05600000000000004
  x  =  [-0.04   0.888]
f(x) =  0.8029440000000024

------[---- 2 ----]------

a =  0.23550000000000018
  x  =  [0.12956  0.469752]
f(x) =  0.371739083904027

скачок[---- 0 ----]скачок
a1 =  0.23550000000000018
  x1  =  [0.12956  0.469752]
f(x1) =  0.371739083904027

------[---- 3 ----]------

a =  0.06400000000000004
  x  =  [-0.01969312  0.40962374]
f(x) =  0.1712819824265875

------[---- 4 ----]------

a =  0.22100000000000017
  x  =  [0.05864611 0.22857005]
f(x) =  0.08319856476819916

скачок[---- 1 ----]скачок
a1 =  0.22100000000000017
  x1  =  [0.05864611 0.22857005]
f(x1) =  0.08319856476819916

------[---- 5 ----]------

a =  0.06500000000000004
  x  =  [-0.00996984  0.19885594]
f(x) =  0.04043826516668597

------[---- 6 ----]------

a =  0.21200000000000016
  x  =  [0.02807507 0.11454102]
f(x) =  0.020213530152572167

скачок[---- 2 ----]скачок
a1 =  0.21200000000000016
  x1  =  [0.02807507 0.11454102]
f(x1) =  0.020213530152

''

In [16]:
x0 = np.array([5,1,1])
e = 0.05
x_min = p_gradient(hx, gradhx, x0, e)
print(round(hx(x_min), 3), x_min)


------[---- 1 ----]------

a =  0.06800000000000005
  x  =  [ 0.         -0.00491163 -0.00491163]
f(x) =  1.0000482493717615

------[---- 2 ----]------

a =  0
  x  =  [ 0.         -0.00491163 -0.00491163]
f(x) =  1.0000482493717615

------[---- 3 ----]------

a =  0
  x  =  [ 0.         -0.00491163 -0.00491163]
f(x) =  1.0000482493717615
сделано  3  шагов
сделано  0  скачков
1.0 [ 0.         -0.00491163 -0.00491163]


# Овражный метод

## Вспомогательные функции

In [17]:
def grad_descent_a2(f, x0, x1, e):
    if(f(x0) - f(x1) < 0):
        vec = x0 - x1
    else:
        vec = x1 - x0
    a = 0
    df = der1_e(f, x0, a, vec, e)
    while e < abs(df(a)):
        d2f = der2_e(f, x0, a, vec, e)
        a = a - df(a)/d2f(a)
        
        df = der1_e(f, x0, a, vec, e)
    return a

In [18]:

def ravine_gradient(f, grad, x0, e):
    x1 = x0 - grad(x0)/4
    n  = 0
    n1 = 0;
    while abs(lg.norm(grad(x0))) > e * 5:
        for k in range(len(x0)):
            if(abs(lg.norm(grad(x0))) < e * 5):
                break
            n+=1
            print('')
            print('------[----', n, '----]------')
            
            a = mygeom(x0, e/100, grad)
            x0 = x0 - grad(x0) * a
            
            a = mygeom(x1, e/100, grad)
            x1 = x1 - grad(x1) * a
            
            print('  x0  = ', x0 ,    ' x1  = ', x1 )
            print('f(x0) = ', f(x0), 'f(x1) = ', f(x1) )
            
        if(abs(lg.norm(grad(x0))) < e * 5):
                break
                
        print('')
        print('скачок[----', n1, '----]скачок')
        a = grad_descent_a2(f, x0, x1, e / 100)
        x0 = x0 - grad(x0) * a
        x1 = x0 - grad(x0)/4
        print('a1 = ', a)
        print('  x1  = ', x0)
        print('f(x1) = ', f(x0))
        n1+=1
        
    print('сделано ', n, ' шагов')
    print('сделано ', n1, ' скачков')
    return x0


In [19]:
x0 = np.array([1.4, 1.3, 1])
e = 0.05
x_min = ravine_gradient(fx, gradfx, x0, e)

print(round(fx(x_min), 3), x_min)


------[---- 1 ----]------
  x0  =  [-0.21060878 -0.19556529 -0.15043484]  x1  =  [nan nan nan]
f(x0) =  1.1109688616370808 f(x1) =  nan

------[---- 2 ----]------
  x0  =  [-2.69630891e-05 -2.50371542e-05 -1.92593494e-05]  x1  =  [nan nan nan]
f(x0) =  1.0000000017247899 f(x1) =  nan
сделано  2  шагов
сделано  0  скачков
1.0 [-2.69630891e-05 -2.50371542e-05 -1.92593494e-05]


  return np.exp(sum(np.square(x)))
  scalar = abs(sum(grad(x0)*grad(x0 - a * grad(x0))))
  x1 = x1 - grad(x1) * a


In [20]:
x0 = np.array([4, 10])
e = 0.05
x_min = ravine_gradient(gx, gradgx, x0, e)
print(round(gx(x_min), 3), x_min)


------[---- 1 ----]------
  x0  =  [-0.32  8.8 ]  x1  =  [0.112 4.44 ]
f(x0) =  78.3616 f(x1) =  19.826496000000017

------[---- 2 ----]------
  x0  =  [1.3072 3.828 ]  x1  =  [-0.612752  1.24764 ]
f(x0) =  30.032530560000282 f(x1) =  4.935790691136647

скачок[---- 0 ----]скачок
a1 =  -0.815264994068489
  x1  =  [20.4900592  10.06966879]
f(x1) =  3879.980965443646

------[---- 3 ----]------
  x0  =  [-0.16392047  8.94186589]  x1  =  [0.57372166 4.47093294]
f(x0) =  80.19879488258462 f(x1) =  22.951650261749474

------[---- 4 ----]------
  x0  =  [1.05466433 1.55588466]  x1  =  [-0.35570743  3.66616501]
f(x0) =  12.431628680982248 f(x1) =  14.579515882915969

скачок[---- 1 ----]скачок
a1 =  0.45171091059332735
  x1  =  [-7.52059658  0.15026451]
f(x1) =  509.05693555797393

------[---- 5 ----]------
  x0  =  [0.06016477 0.13343488]  x1  =  [-0.2105767   0.06671744]
f(x0) =  0.05038306667213433 f(x1) =  0.4035341521803952

------[---- 6 ----]------
  x0  =  [-0.00373022  0.11768957]  x1 

In [22]:
x0 = np.array([5,1,1])
e = 0.05
x_min = ravine_gradient(hx, gradhx, x0, e)
print(round(hx(x_min), 3), x_min)


------[---- 1 ----]------
  x0  =  [ 0.         -0.00491163 -0.00491163]  x1  =  [   0.         5453.08400889 5453.08400889]
f(x0) =  1.0000482493717615 f(x1) =  inf
сделано  1  шагов
сделано  0  скачков
1.0 [ 0.         -0.00491163 -0.00491163]


  return np.exp(sum(np.square(x)))
  return fx(x) * (2*x)
  return np.exp(sum(np.square(x1)))


## Модифицированный метод Ньютона

## Вспомогательные функции

In [23]:
def grad_descent_a3(f, grad, grad2, x, e):
    a = 0
    df = der1_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
    while e < abs(df(a)):
        d2f = der2_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
        a = a - df(a)/d2f(a)
        df = der1_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
    return a

In [24]:
def Newton_gradient(f, grad, grad2, x, e):
    n = 0
    while abs(lg.norm(grad(x))) > e * 5:
        print('')
        print('------[----', n, '----]------')
        n+=1
        p = np.dot(lg.inv(grad2fx(x)),gradfx(x))
        a = grad_descent_a3(f, grad, grad2, x, e/100)
        print('x', x)
        print('a', a)
        print('p', p)
        x -= a * p
        
    print('сделано ', n, ' шагов')
    
    return x0

In [26]:
x0 = np.array([2, 1.0, 1.0])
e = 0.05
x_min = Newton_gradient(fx, gradfx, grad2fx, x0, e)
print(round(fx(x_min), 3), x_min)


------[---- 0 ----]------
x [2. 1. 1.]
a 12.994191384948987
p [0.15384615 0.07692308 0.07692308]
сделано  1  шагов
1.0 [0.00089363 0.00044682 0.00044682]


In [27]:
x0 = np.array([4., 10.])
e = 0.05
x_min = Newton_gradient(gx, gradgx,grad2gx, x0, e)
print(round(gx(x_min), 3), x_min)


------[---- 0 ----]------
x [ 4. 10.]
a 232.99816191126627
p [0.01716738 0.04291845]
сделано  1  шагов
0.0 [3.15551715e-05 7.88879285e-05]


In [30]:
x0 = np.array([5.0,1.0,1.0])
e = 0.05
x_min = Newton_gradient(hx, gradhx, grad2hx, x0, e)
print(round(hx(x_min), 3), x_min)


------[---- 0 ----]------
x [5.e-14 1.e+00 1.e+00]
a 4.999746493614176
p [0.09090909 0.18181818 0.18181818]

------[---- 1 ----]------
x [-4.54522409e-07  9.09551830e-02  9.09551830e-02]
a 1.0328401085605683
p [-0.04382108  0.08769104  0.08769104]
сделано  2  шагов
1.0 [0.0004526  0.00038436 0.00038436]
