# Ускоренные градиентные методы

In [3]:
import numpy as np
import numpy.linalg as lg

### Функция и ее градиент

In [4]:
def fx(x): 
    return np.exp(sum(np.square(x)))
def gradfx(x):
    return fx(x) * (2*x) 
def grad2fx(x):
    return fx(x) * 2 * (np.eye(len(x)) + 2 * x.reshape(len(x),1) * x)

def hx(x): 
    x1 = x
    x1[0] = x[0]/10
    return np.exp(sum(np.square(x1)))
def gradhx(x_):
    x = x_
    x[0] = x[0]/10
    return fx(x) * (2*x) 
def grad2hx(x_):
    x = x_
    x[0] = x[0]/10
    return fx(x) * 2 * (np.eye(len(x)) + 2 * x.reshape(len(x),1) * x)

#альтернативная ДВУМЕРНАЯ функция ( для проверки)
def gx(x):
    k = np.array([3, 1])
    return sum(np.square(k * x))
def gradgx(x):
    k = np.array([18, 2])
    return x * k
def grad2gx(x):
    return np.array([[18 ,0],[0, 2]])


## Вспомогательные функции

### "Умные" производные 

In [5]:
def func(f, x0, gradx0):
    def fun(a):
        return f((x0 -  gradx0 * a))
    return fun

def der1_e(f_, x0, a, gradx0, e):
    e = e / 2
    f_(x0)
    f = func(f_, x0, gradx0)
    def der(a):
        return (f(a + e) - f(a - e)) / (e * 2)
    return der

def der2_e(f_, x0, a, gradx0, e):
    e = e / 2
    df = der1_e(f_, x0, a, gradx0, e / 5)
    def der2(a):
        return (df(a + e) - df(a - e)) / ( e * 2 )
    return der2

### геометрический метод поиска альфа основанный на ортогональности градиентов 

In [6]:
def mygeom(x0, e, grad):
    #print('    start mygeom')
    a = 0
    scalar = abs(sum(grad(x0)*grad(x0 - a * grad(x0))))
    while scalar > e: 
        a+=e
        scalar1 = scalar
        scalar = abs(sum(grad(x0)*grad(x0 - a * grad(x0))))
        #print('    a = ', a)
        #print('    scalar = ', scalar)
        if (scalar1 < scalar):       
            break
            
    #print('    end mygeom')
    return a

# Ускоренный градиентный метод p-го порядка

In [7]:
def grad_descent_a1(f, x0, x1, e):
    if(f(x0)-f(x1) > 0):
        vec = x0 - x1
    else:
        vec = x1 - x0
    a = 0
    df = der1_e(f, x0, a, x1-x0, e / 5)
    while e < abs(df(a)):
        d2f = der2_e(f, x0, a, x1-x0, e / 5)
        a = a - df(a)/d2f(a)
        df = der1_e(f, x0, a, x1-x0, e / 5)
    return a

def grad_descent_a2(f, x0, grad, e):
    a = 0
    df = der1_e(f, x0, a, grad, e / 5)
    while e < abs(df(a)):
        d2f = der2_e(f, x0, a, grad, e / 5)
        a = a - df(a)/d2f(a)
        df  = der1_e(f, x0, a, grad, e / 5)
    return a

In [8]:
def p_gradient(f, grad, x0, e):
    n  = 0
    n1 = 0;
    while abs(lg.norm(grad(x0))) > e:
        x1 = x0
        
        for k in range(len(x0)):
            if(abs(lg.norm(grad(x1))) <= e):
                x0 = x1
                break
            n+=1
            print('')
            print('------[----', n, '----]------')
            #a = mygeom(x1, e/1000, grad)
            a = grad_descent_a1(f, x0, grad(x0), e/100)
            x1 = x1 - grad(x1) * a
            print('')
            print('a = ', a)
            print('  x  = ', x1)
            print('f(x) = ', f(x1))
        if(abs(lg.norm(grad(x0))) <= e):
                break
        print('')
        print('скачок[----', n1, '----]скачок')
        a = grad_descent_a1(f, x0, x1, e / 100)
        x0 = x0 -  (x1 - x0) * a
        print('a1 = ', a)
        print('  x1  = ', x0)
        print('f(x1) = ', f(x0))
        n1+=1
        
    print('сделано ', n, ' шагов')
    print('сделано ', n1, ' скачков')
    return x0

## тесты

In [9]:
x0 = np.array([1.2, 1, 1])
e = 0.05
x_min = p_gradient(fx, gradfx, x0, e)
print(round(fx(x_min), 3), x_min)
''''''


------[---- 1 ----]------

a =  0.01629356663858428
  x  =  [-0.01955227 -0.01629356 -0.01629356]
f(x) =  1.0009136689390412

------[---- 2 ----]------

a =  0.01629356663858428
  x  =  [-0.01891454 -0.01576212 -0.01576212]
f(x) =  1.000855013790037

------[---- 3 ----]------

a =  0.01629356663858428
  x  =  [-0.01829764 -0.01524804 -0.01524804]
f(x) =  1.0008001288157413

скачок[---- 0 ----]скачок
a1 =  -0.9849809712926177
  x1  =  [4.83294138e-09 4.02745115e-09 4.02745115e-09]
f(x1) =  1.0
сделано  3  шагов
сделано  1  скачков
1.0 [4.83294138e-09 4.02745115e-09 4.02745115e-09]


  return np.exp(sum(np.square(x)))


''

In [10]:
x0 = np.array([5,1])
e = 0.05
x_min = p_gradient(gx, gradgx, x0, e)
print(round(gx(x_min), 3), x_min)
''''''


------[---- 1 ----]------

a =  0.05883800325890875
  x  =  [-0.29542029  0.88232399]
f(x) =  1.5639539767249726

------[---- 2 ----]------

a =  0.05883800325890875
  x  =  [0.01745463 0.77849563]
f(x) =  0.6087974220673952

скачок[---- 0 ----]скачок
a1 =  -1.0042739033087158
  x1  =  [-0.00384029  0.77754894]
f(x1) =  0.6047150862325942

------[---- 3 ----]------

a =  0.9438476806735299
  x  =  [ 0.06140334 -0.69022659]
f(x) =  0.5103460789868295

------[---- 4 ----]------

a =  0.9438476806735299
  x  =  [-0.98179392  0.61271094]
f(x) =  9.05068845573443

скачок[---- 1 ----]скачок
a1 =  -0.010929028543020093
  x1  =  [-0.01452837  0.77574742]
f(x1) =  0.6036837246580706

------[---- 5 ----]------

a =  0.5509956578416818
  x  =  [ 0.12956287 -0.0791195 ]
f(x) =  0.157338734825748

------[---- 6 ----]------

a =  0.5509956578416818
  x  =  [-1.15543156  0.0080695 ]
f(x) =  12.015263995169338

скачок[---- 2 ----]скачок
a1 =  -0.03627561684591283
  x1  =  [-0.05591534  0.74789943]
f(

''

In [11]:
x0 = np.array([5,1,1])
e = 0.05
x_min = p_gradient(hx, gradhx, x0, e)
print(round(hx(x_min), 3), x_min)


------[---- 1 ----]------

a =  0.07257886905127837
  x  =  [ 0.         -0.07257867 -0.07257867]
f(x) =  1.0105910186477602

------[---- 2 ----]------

a =  0.07257886905127837
  x  =  [ 0.         -0.06193173 -0.06193173]
f(x) =  1.0077005775382284

------[---- 3 ----]------

a =  0.07257886905127837
  x  =  [ 0.         -0.05287264 -0.05287264]
f(x) =  1.0056066903976044

скачок[---- 0 ----]скачок
a1 =  -0.9497823061429453
  x1  =  [0.00000000e+00 1.99091608e-07 1.99091608e-07]
f(x1) =  1.0000000000000793
сделано  3  шагов
сделано  1  скачков
1.0 [0.00000000e+00 1.99091608e-07 1.99091608e-07]


# Овражный метод

In [12]:
def ravine_gradient(f, grad, x0, e):
    x1 = x0 - e*10
    n  = 0
    n1 = 0
    while abs(lg.norm(grad(x0))) > e * 5:
        for k in range(len(x0)):
            if(abs(lg.norm(grad(x0))) < e * 5):
                break
            n+=1
            print('')
            print('------[----', n, '----]------')
            
            a = mygeom(x0, e/100, grad)
            
            x0 = x0 - grad(x0) * a
            
            a = mygeom(x1, e/100, grad)
            x1 = x1 - grad(x1) * a
            
            print('  x0    = ', x0 ,   ' x1    = ', x1 )
            print('  f(x0) = ', f(x0), ' f(x1) = ', f(x1) )
            
        if(abs(lg.norm(grad(x0))) < e * 5):
                break
                
        print('')
        print('скачок[----', n1, '----]скачок')
        a = grad_descent_a1(f, x0, x1, e / 100)
        x0 = x0 - a * (x1-x0) 
        x1 = x0 - e*10
        print('a1 = ', a)
        print('  x1  = ', x0)
        print('f(x1) = ', f(x0))
        n1+=1
        
    print('сделано ', n, ' шагов')
    print('сделано ', n1, ' скачков')
    return x0


In [13]:
x0 = np.array([1.4, 1.3, 1])
e = 0.05
x_min = ravine_gradient(fx, gradfx, x0, e)

print(round(fx(x_min), 3), x_min)


------[---- 1 ----]------
  x0    =  [-0.21060878 -0.19556529 -0.15043484]  x1    =  [-0.00648569 -0.00576506 -0.00360316]
  f(x0) =  1.1109688616370808  f(x1) =  1.0000882866804481

------[---- 2 ----]------
  x0    =  [-2.69630891e-05 -2.50371542e-05 -1.92593494e-05]  x1    =  [-0.00648569 -0.00576506 -0.00360316]
  f(x0) =  1.0000000017247899  f(x1) =  1.0000882866804481
сделано  2  шагов
сделано  0  скачков
1.0 [-2.69630891e-05 -2.50371542e-05 -1.92593494e-05]


In [14]:
x0 = np.array([5,1,1])
e = 0.05
x_min = ravine_gradient(hx, gradhx, x0, e)
print(round(hx(x_min), 3), x_min)



------[---- 1 ----]------
  x0    =  [ 0.         -0.00491163 -0.00491163]  x1    =  [ 0.         -0.00121127 -0.00121127]
  f(x0) =  1.0000482493717615  f(x1) =  1.0000029343363694
сделано  1  шагов
сделано  0  скачков
1.0 [ 0.         -0.00491163 -0.00491163]


In [15]:
x0 = np.array([4, 10])
e = 0.05
x_min = ravine_gradient(gx, gradgx, x0, e)
print(round(gx(x_min), 3), x_min)


------[---- 1 ----]------
  x0    =  [-0.32  8.8 ]  x1    =  [-0.3115  8.3505]
  f(x0) =  78.3616  f(x1) =  70.60414049999999

------[---- 2 ----]------
  x0    =  [1.3072 3.828 ]  x1    =  [1.2388355 3.7326735]
  f(x0) =  30.032530560000282  f(x1) =  27.745272022144746

скачок[---- 0 ----]скачок
a1 =  -22.858260994617122
  x1  =  [-0.25549358  1.64900198]
f(x1) =  3.3067002830289414

------[---- 3 ----]------
  x0    =  [0.11241718 1.38516167]  x1    =  [0.02644228 1.01686676]
  f(x0) =  2.0324114357435157  f(x1) =  1.0403107432367256

------[---- 4 ----]------
  x0    =  [-0.15569779  1.01809382]  x1    =  [-0.1420479   0.29692509]
  f(x0) =  1.2546912516095627  f(x1) =  0.2697629728750749

скачок[---- 1 ----]скачок
a1 =  -1.4438500249659227
  x1  =  [-0.1359894  -0.02316567]
f(x1) =  0.16697470336334935

------[---- 5 ----]------
  x0    =  [ 0.00108792 -0.02057111]  x1    =  [ 0.01081182 -0.46404795]
  f(x0) =  0.0004338227025942484  f(x1) =  0.2163925559355663
сделано  5  шагов
с

## Модифицированный метод Ньютона

## Вспомогательные функции

In [23]:
def grad_descent_a3(f, grad, grad2, x, e):
    a = 0
    df = der1_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
    while e < abs(df(a)):
        d2f = der2_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
        a = a - df(a)/d2f(a)
        df = der1_e(f, x, a, np.dot( lg.inv(grad2fx(x)),gradfx(x)), e)
    return a

In [24]:
def Newton_gradient(f, grad, grad2, x, e):
    n = 0
    while abs(lg.norm(grad(x))) > e * 5:
        print('')
        print('------[----', n, '----]------')
        n+=1
        p = np.dot(lg.inv(grad2fx(x)),gradfx(x))
        a = grad_descent_a3(f, grad, grad2, x, e/100)
        print('x', x)
        print('a', a)
        print('p', p)
        x -= a * p
    print('сделано ', n, ' шагов')
    return x0

In [26]:
x0 = np.array([2, 1.0, 1.0])
e = 0.05
x_min = Newton_gradient(fx, gradfx, grad2fx, x0, e)
print(round(fx(x_min), 3), x_min)


------[---- 0 ----]------
x [2. 1. 1.]
a 12.994191384948987
p [0.15384615 0.07692308 0.07692308]
сделано  1  шагов
1.0 [0.00089363 0.00044682 0.00044682]


In [27]:
x0 = np.array([4., 10.])
e = 0.05
x_min = Newton_gradient(gx, gradgx,grad2gx, x0, e)
print(round(gx(x_min), 3), x_min)


------[---- 0 ----]------
x [ 4. 10.]
a 232.99816191126627
p [0.01716738 0.04291845]
сделано  1  шагов
0.0 [3.15551715e-05 7.88879285e-05]


In [30]:
x0 = np.array([5.0,1.0,1.0])
e = 0.05
x_min = Newton_gradient(hx, gradhx, grad2hx, x0, e)
print(round(hx(x_min), 3), x_min)


------[---- 0 ----]------
x [5.e-14 1.e+00 1.e+00]
a 4.999746493614176
p [0.09090909 0.18181818 0.18181818]

------[---- 1 ----]------
x [-4.54522409e-07  9.09551830e-02  9.09551830e-02]
a 1.0328401085605683
p [-0.04382108  0.08769104  0.08769104]
сделано  2  шагов
1.0 [0.0004526  0.00038436 0.00038436]
