In [18]:
import math
import decimal
import time

import numpy as np

In [19]:
class Oracle_Chebyshev_Oscillator():
    def __init__(self, n):
        self.n = n

    def F(self, x):
        F = np.zeros(self.n)
        F[0] = x[0] - 1
        for i in range(self.n-1):
            F[i+1] = x[i+1] - 2 * x[i] ** 2 + 1
        return F

    def f1(self, x):
        return np.sqrt(np.sum(self.F(x) ** 2))

    def hat_f1(self, x):
        return self.f1(x) / np.sqrt(self.n)

    def jacobian_F(self, x):
        J = np.zeros((self.n, self.n))
        J[0,0] = 1
        for i in range(self.n-1):
            J[i+1,i+1] = 1
            J[i+1,i] = - 4 * x[i]
        return J

    def grad_hat_f1(self, x):
        g = np.zeros(self.n)
        g[0] = 2 * (x[0] - 1) - 8 * x[0] * (x[1] - 2 * x[0] ** 2 + 1)
        for i in range(self.n-2):
            g[i+1] = 2 * (x[i+1] - 2 * x[i] ** 2 + 1) - 8 * x[i+1] * (x[i+2] - 2 * x[i+1] ** 2 + 1)

        g[self.n-1] = 2 * (x[self.n-1] - 2 * x[self.n-2] ** 2 + 1)
        return g / (2 * self.n * self.hat_f1(x))


In [20]:
class Oracle_Trigonometric_Oscillator():
    def __init__(self, n):
        self.n = n

    def F(self, x):
        F = np.zeros(self.n)
        F[0] = x[0] - 1
        for i in range(self.n-1):
            F[i+1] = x[i+1] + np.cos(np.pi * x[i])
        return F

    def f1(self, x):
        return np.sqrt(np.sum(self.F(x) ** 2))

    def hat_f1(self, x):
        return self.f1(x) / np.sqrt(self.n)

    def jacobian_F(self, x):
        J = np.zeros((self.n, self.n))
        J[0,0] = 1
        for i in range(self.n-1):
            J[i+1,i+1] = 1
            J[i+1,i] = - np.pi * np.sin(np.pi * x[i-1])
        return J

    def grad_hat_f1(self, x):
        g = np.zeros(self.n)
        g[0] = 2 * (x[0] - 1) - 2 * np.pi * np.sin(np.pi * x[0]) * (x[1] + np.cos(np.pi * x[0]))
        for i in range(self.n-2):
            g[i+1] = 2 * (x[i+1] + np.cos(np.pi * x[i]))  - 2 * np.pi * np.sin(np.pi * x[i+1]) * (x[i+2] + np.cos(np.pi * x[i+1]))

        g[self.n - 1] = 2 * (x[self.n-1] + np.cos(np.pi * x[self.n-2]))
        return g / (2 * self.n * self.hat_f1(x))


In [21]:
n = 4
x_0 = np.ones(n)
x_1 = np.ones(n)
x_0[0] = -1
oracle = Oracle_Chebyshev_Oscillator(n)

In [22]:
b = oracle.f1(x_0)
c = oracle.grad_hat_f1(x_0)

In [23]:



c

array([-0.5,  0. ,  0. ,  0. ])

In [24]:
A = oracle.jacobian_F(x_0)

In [25]:
A

array([[ 1.,  0.,  0.,  0.],
       [ 4.,  1.,  0.,  0.],
       [ 0., -4.,  1.,  0.],
       [ 0.,  0., -4.,  1.]])

In [26]:
def  PureGradientMethod(x, oracle):
    start_time1 = time.time()
    iter_max = 1000000
    iter_counter = 0
    oracle_counter = 0
    func_res = []
    grad_mapping = []
    L0 = 1
    L = L0
    eps = 10 ** -5
    x_new = x
    x_curr = x
    func_res.append(oracle.hat_f1(x_new))

    while ((iter_counter <= iter_max) and (np.linalg.norm(x_new - x_curr, ord=2) > eps))  or (iter_counter == 0): 
        x_curr = x_new.copy()
        g_curr = oracle.grad_hat_f1(x_curr)
        f_curr = oracle.hat_f1(x_curr)
        while True:
            x_new = x_curr - g_curr / L 
            f_new = oracle.hat_f1(x_new)
            oracle_counter = oracle_counter + 1
       

            if (f_new <= f_curr + g_curr @ (x_new - x_curr) + L / 2 * np.sum((x_new - x_curr) ** 2)):
                func_res.append(f_new)
                grad_mapping.append(np.linalg.norm(x_new - x_curr, ord=2))    
                iter_counter = iter_counter + 1
#                print('iter = ', iter_counter)
                L = L / 2

                break
                
            else:
                L = 2 * L
        
    start_time2 = time.time()

        
    return iter_counter, oracle_counter, min(grad_mapping), min(func_res), (start_time2 - start_time1)

In [27]:
n = 3
x_0 = np.ones(n)
x_0[0] = -1
oracle = Oracle_Trigonometric_Oscillator(n)
repeats = 1
iter_counter = np.zeros(repeats)
oracle_counter = np.zeros(repeats)
times = np.zeros(repeats)
mapping = np.zeros(repeats)
func = np.zeros(repeats)
for i in range(repeats):
    iter_counter[i], oracle_counter[i], mapping[i], func[i], times[i] = PureGradientMethod(x_0, oracle)

    

print('iter_counter = ', np.mean(iter_counter))
print('oracle_counter = ', np.mean(oracle_counter))
print('grad_mapping = ', np.mean(mapping))
print('func = ', np.mean(func))
print('time = ', np.mean(times))
    

iter_counter =  313.0
oracle_counter =  641.0
grad_mapping =  8.809677550775502e-06
func =  1.0326411390306695e-06
time =  0.07163000106811523


In [28]:
def MomentumStepExtrapolation(oracle, x_prev, x_curr):
    tau_prev = 0
    tau_curr = 1
    y_prev = x_prev + tau_prev * (x_curr - x_prev)
    y_curr = x_prev + tau_curr * (x_curr - x_prev)
    while (oracle.grad_hat_f1(y_curr) @ (x_curr - x_prev) < 0 and oracle.hat_f1(y_curr) <= oracle.hat_f1(y_prev)):
        y_prev = y_curr
        tau_curr = tau_curr * 2
        y_curr = x_prev + tau_curr * (x_curr - x_prev)

    return y_curr


In [44]:
def MomentumStepArmijo(oracle, x_prev, x_curr):
    alpha = 1 / 3
    beta = 2 / 3
    tau = 1
    phi = oracle.hat_f1(x_prev)
    grad_phi = oracle.grad_hat_f1(x_prev) @ (x_curr - x_prev)
    y = x_prev + tau * (x_curr - x_prev)
    if (((phi + beta * grad_phi * tau) <= oracle.hat_f1(y)) and ((phi + alpha * grad_phi * tau) >= oracle.hat_f1(y))):
        return y
    if (phi + beta * grad_phi * tau > oracle.hat_f1(y)):
        tau_1 = tau
        tau_2 = 2 * tau_1
        while True:
            y1 = x_prev + tau_1 * (x_curr - x_prev)
            y2 = x_prev + tau_2 * (x_curr - x_prev)
            if (phi + beta * grad_phi * tau_1 > oracle.hat_f1(y1)) and (phi + alpha * grad_phi * tau_2 < oracle.hat_f1(y2)):
                break
            if (phi + beta * grad_phi * tau_1 <= oracle.hat_f1(y1)) and (phi + alpha * grad_phi * tau_1 >= oracle.hat_f1(y1)):
                return y1
            tau_1 = tau_2
            tau_2 = tau_1 * 2
    else:
        tau_2 = tau
        tau_1 = tau_2 / 2
        while True:
            y1 = x_prev + tau_1 * (x_curr - x_prev)
            y2 = x_prev + tau_2 * (x_curr - x_prev)
            if (phi + beta * grad_phi * tau_1 > oracle.hat_f1(y1)) and (phi + alpha * grad_phi * tau_2 < oracle.hat_f1(y2)):
                break
            if (phi + beta * grad_phi * tau_1 <= oracle.hat_f1(y1)) and (phi + alpha * grad_phi * tau_1 >= oracle.hat_f1(y1)):
                return y1
            tau_2 = tau_1
            tau_1 = tau_2 / 2
#    print('4')
    y1 = x_prev + tau_1 * (x_curr - x_prev)
    y2 = x_prev + tau_2 * (x_curr - x_prev)
    if (tau_2 < 10-3):
        return y2
    while True:
#        print('4')
        hat_tau = (tau_1 + tau_2) / 2
        y_hat = x_prev + hat_tau * (x_curr - x_prev)
        if (phi + alpha * grad_phi * hat_tau < oracle.hat_f1(y_hat)):
            tau_2 = hat_tau
        elif (phi + beta * grad_phi * hat_tau > oracle.hat_f1(y_hat)):
            tau_1 = hat_tau
        else: 
            return y_hat
#    print('7')
#    return x_curr + tau_1 * (x_curr - x_prev)


In [53]:

def PureGradientMethodDoubling(x, oracle):
    start_time1 = time.time()
    iter_max = 1000000
    iter_counter = 0
    oracle_counter = 0
    func_res = []
    grad_mapping = []
    L0 = 1
    L = L0
    eps = 10 ** -7
    x_curr = x
    func_res.append(oracle.hat_f1(x_curr))

    while True:

        x_new = x_curr - 1 / L * oracle.grad_hat_f1(x_curr)
        oracle_counter = oracle_counter  + 1
        if (oracle.hat_f1(x_new) <= oracle.hat_f1(x_curr)
            + oracle.grad_hat_f1(x_curr) @ (x_new - x_curr)
            + L / 2 * np.linalg.norm(x_new - x_curr, ord=2) ** 2):

            iter_counter = iter_counter + 1
            y_curr = x_new
            L = L / 2
            break
        else:
            L = 2 * L

    while True:
        x_prev = x_curr
        iter_counter = iter_counter + 1
        while True:

            x_new = y_curr - 1 / L * oracle.grad_hat_f1(y_curr)
            oracle_counter = oracle_counter  + 1
            if (oracle.hat_f1(x_new) <= oracle.hat_f1(y_curr)
                + oracle.grad_hat_f1(y_curr) @ (x_new - y_curr)
                + L / 2 * np.linalg.norm(x_new - y_curr, ord=2) ** 2):

                x_curr = x_new
                L = L / 2
                break
            else:
                L = 2 * L

        func_res.append(oracle.hat_f1(x_new))
        grad_mapping.append(np.linalg.norm(x_new - y_curr, ord=2))
        if ((np.linalg.norm(x_new - y_curr, ord=2) <= eps) or (iter_counter == iter_max)):
            break

        y_curr = MomentumStepArmijo(oracle, x_prev, x_curr)
        
    start_time2 = time.time()
#    print('eps = ', eps)
#    print('L0 = ', L0)
#    print('size = ', oracle.n)
#    print('iter_counter = ', iter_counter)
#    print('oracle_counter = ', oracle_counter)
#    print('time = ', start_time2 - start_time1)
#    print('grad_mapping = ', min(grad_mapping))
#    print('func_res = ', min(func_res))
          
    
#    print(x_new)
    return iter_counter, oracle_counter, min(grad_mapping), min(func_res), (start_time2 - start_time1)

In [68]:
n = 13
x_0 = np.ones(n)
x_0[0] = -1
oracle = Oracle_Trigonometric_Oscillator(n)
repeats = 1
iter_counter = np.zeros(repeats)
oracle_counter = np.zeros(repeats)
times = np.zeros(repeats)
mapping = np.zeros(repeats)
func = np.zeros(repeats)
for i in range(repeats):
    iter_counter[i], oracle_counter[i], mapping[i], func[i], times[i] = PureGradientMethodDoubling(x_0, oracle)

    

print('iter_counter = ', np.mean(iter_counter))
print('oracle_counter = ', np.mean(oracle_counter))
print('grad_mapping = ', np.mean(mapping))
print('func = ', np.mean(func))
print('time = ', np.mean(times))

iter_counter =  1000000.0
oracle_counter =  2000002.0
grad_mapping =  1.8977554985382925e-07
func =  0.1095306439511149
time =  1183.360366821289


In [185]:
def  StochasticSquares(x_, p_, oracle):
    start_time1 = time.time()
    iter_max = 1000000
    iter_counter = 0
    oracle_counter = 0
    func_res = []
    grad_mapping = []
    p = p_
    I_p = np.identity(p)
    I_n = np.identity(oracle.n)
    L = 1
    eps = 10 ** -6
    x_new = x_
    x_curr = x_
    func_res.append(oracle.hat_f1(x_new))

    while ((iter_counter <= iter_max) and (np.linalg.norm(x_new - x_curr, ord=2) > eps)) or (iter_counter == 0): 
#        print(x_curr)
        x_curr = x_new.copy()
        J_curr = oracle.jacobian_F(x_curr)
        a = np.sort(np.random.choice(range(J_curr.shape[0]), size=p, replace=False))
#        print(a)
        G_curr = J_curr[a, :].T
        g_curr = oracle.grad_hat_f1(x_curr)
        f_curr = oracle.hat_f1(x_curr)
        gamma_curr = 1 / (p * f_curr)
        while True:
            B_curr = (I_n - gamma_curr / L * G_curr @ np.linalg.inv(I_p + gamma_curr / L * G_curr.T @ G_curr) @ G_curr.T) / L
            x_new = x_curr - B_curr @ g_curr
            f_new = oracle.hat_f1(x_new)
            oracle_counter = oracle_counter  + 1

            if (f_new <= f_curr
                  + g_curr @ (x_new - x_curr)
                  + 1 / (2 * p * f_curr) * np.linalg.norm(G_curr.T @ (x_new - x_curr), ord=2) ** 2
                  + L / 2 * np.linalg.norm(x_new - x_curr, ord=2) ** 2):
                 
                func_res.append(f_new)
                grad_mapping.append(np.linalg.norm(x_new - x_curr, ord=2))    
                iter_counter = iter_counter + 1
#                print('iter = ', iter_counter)
                L = L / 2

                break
        
            else:
                L = 2 * L
        
    start_time2 = time.time()
#    print(iter_counter, '.......', oracle_counter, '.......', min(grad_mapping),
#         '.......', min(func_res))
#    print(x_new)
    return iter_counter, oracle_counter, min(grad_mapping), min(func_res), (start_time2 - start_time1)

In [198]:
n = 9
p = 2
x_0 = np.ones(n)
x_0[0] = -1
oracle = Oracle_Trigonometric_Oscillator(n)
repeats = 1
iter_counter = np.zeros(repeats)
oracle_counter = np.zeros(repeats)
times = np.zeros(repeats)
mapping = np.zeros(repeats)
func = np.zeros(repeats)
for i in range(repeats):
    iter_counter[i], oracle_counter[i], mapping[i], func[i], times[i] = StochasticSquares(x_0, p, oracle)

    

print('iter_counter = ', np.mean(iter_counter))
print('oracle_counter = ', np.mean(oracle_counter))
print('grad_mapping = ', np.mean(mapping))
print('func = ', np.mean(func))
print('time = ', np.mean(times))

iter_counter =  1000001.0
oracle_counter =  2000001.0
grad_mapping =  1.398979419278218e-05
func =  0.5114404671747018
time =  380.40443897247314


In [69]:
def  StochasticSquaresDoubling(x, p_, oracle):
    start_time1 = time.time()
    p = p_
    I_n = np.identity(oracle.n)
    iter_max = 1000000
    iter_counter = 0
    oracle_counter = 0
    func_res = []
    grad_mapping = []
    I_p = np.identity(p)
    L = 1
    k = 0
    eps = 10 ** -7
    x_curr = x
    func_res.append(oracle.hat_f1(x_curr))

    J_k = oracle.jacobian_F(x_curr)
    G_k = J_k[np.sort(np.random.choice(range(J_k.shape[0]), size=p, replace=False)), :].T
#    if ((G_k.T[0] == J_k[0]).all()):
#        print('yes')
    gamma_k = 1 / p / oracle.hat_f1(x_curr) / 2
    g_k = oracle.grad_hat_f1(x_curr)
    while True:
        B_k = (I_n - gamma_k / L * G_k @ np.linalg.inv(I_p + gamma_k / L * G_k.T @ G_k) @ G_k.T) / L
        x_new = x_curr - B_k @ g_k
        oracle_counter = oracle_counter  + 1

        if (oracle.hat_f1(x_new) <= oracle.hat_f1(x_curr)
            + oracle.grad_hat_f1(x_curr) @ (x_new - x_curr)
            + 1 / (2 * p * oracle.hat_f1(x_curr)) * np.linalg.norm(G_k.T @ (x_new - x_curr), ord=2) ** 2
            + L / 2 * np.linalg.norm(x_new - x_curr, ord=2) ** 2):
                  #print(iter_counter, '.......', np.linalg.norm(x_new - x_k, ord=2), '.......', Oracle.hat_f1(x_new))

            grad_mapping.append(np.linalg.norm(x_new - x_curr, ord=2))
            y_curr = x_new
            break
        else:
            L = L * 2

    while True:
        iter_counter = iter_counter + 1
        x_prev = x_curr
        L = L / 2
        J_k = oracle.jacobian_F(y_curr)
        G_k = J_k[np.sort(np.random.choice(range(J_k.shape[0]), size=p, replace=False)), :].T
        gamma_k = 1 / p / oracle.hat_f1(y_curr) / 2
        g_k = oracle.grad_hat_f1(y_curr)
        while True:
            B_k = (I_n - gamma_k / L * G_k @ np.linalg.inv(I_p + gamma_k / L * G_k.T @ G_k) @ G_k.T) / L
            x_new = y_curr - B_k @ g_k
            oracle_counter = oracle_counter  + 1

            if (oracle.hat_f1(x_new) <= oracle.hat_f1(y_curr)
                + oracle.grad_hat_f1(y_curr) @ (x_new - y_curr)
                + 1 / (2 * p * oracle.hat_f1(y_curr)) * np.linalg.norm(G_k.T @ (x_new - y_curr), ord=2) ** 2
                + L / 2 * np.linalg.norm(x_new - y_curr, ord=2) ** 2):
                x_curr = x_new
                L = L / 2
#                print('yes')
                break
            else:
#                print('no')
                L = 2 * L

#        print(iter_counter, '.......', np.linalg.norm(x_new - y_curr, ord=2), '.......', oracle.hat_f1(x_new))
        func_res.append(oracle.hat_f1(x_new))
        grad_mapping.append(np.linalg.norm(x_new - y_curr, ord=2))
        if ((np.linalg.norm(x_new - y_curr, ord=2) <= eps) or (iter_counter == iter_max)):
            break
#        print('mom')
#        print('x_curr', x_curr)
#        print('x_prev', x_prev)

        y_curr = MomentumStepArmijo(oracle, x_prev, x_curr)
#        print(iter_counter, '.......')
#        print('y_curr', y_curr)
#    print(iter_counter, '.......', oracle_counter, '.......', np.min(np.array(grad_mapping)), '.......', np.min(np.array(func_res)))
    start_time2 = time.time()
    return iter_counter, oracle_counter, min(grad_mapping), min(func_res), (start_time2 - start_time1) 

In [110]:
n = 3
p = 1

x_0 = np.ones(n)
x_0[0] = -1
oracle = Oracle_Chebyshev_Oscillator(n)
repeats = 1
iter_counter = np.zeros(repeats)
oracle_counter = np.zeros(repeats)
times = np.zeros(repeats)
mapping = np.zeros(repeats)
func = np.zeros(repeats)
for i in range(repeats):
    iter_counter[i], oracle_counter[i], mapping[i], func[i], times[i] = StochasticSquaresDoubling(x_0, p, oracle)

    

print('iter_counter = ', np.mean(iter_counter))
print('oracle_counter = ', np.mean(oracle_counter))
print('grad_mapping = ', np.mean(mapping))
print('func = ', np.mean(func))
print('time = ', np.mean(times))



iter_counter =  81.0
oracle_counter =  263.0
grad_mapping =  9.125771809225193e-08
func =  5.176054571854602e-06
time =  0.07690095901489258
