In [1]:
import numpy as np

In [88]:
def f(x):
    f = np.zeros(len(x))
    for i in range(len(x)):
        f[i] = np.power(x[i], 4)/4 + np.power(x[i], 2)/2 + x[i]
    return f     

def delta_f(x):
    return np.power(x, 3) + x + 1   

def delta2_f(x):
    return 3 * np.power(x, 2) + 1

In [148]:
def steep(x0, alpha, kmax, tollgrad, grad_method = 'exact'):
    x = np.zeros((len(x0), kmax))
    x[:,0] = x0
    
    p = np.zeros((len(x0), kmax))
    p[:, 0] = -delta_f(x0)
    
    delta_fk_norm = np.linalg.norm(delta_f(x0))
    
    
    k = 0
    while(k < kmax-1 and delta_fk_norm >= tollgrad):
        
        # Compute the new value for xk
        x[:, k+1] = x[:, k] + alpha * p[:, k]

        p[:, k+1] = -delta_f(x[:, k+1])
        
        # Compute the gradient of f in xk
        delta_fk_norm = np.linalg.norm(delta_f(x[:, k+1]))
        
        # Increase the step by one
        k += 1

    # Compute f(xk)
    fk = f(x[:, k-1]).sum()

    # "Cut" xseq to the correct size
    x = x[:, :k]
    
    return x, k-1, fk

In [149]:
n = 10**5
np.random.seed(288874)
x0 = np.random.random(n)*5
# x0 = [2, 5]
alpha = 0.03
kmax = 1000
tol = 10**(-6)

In [150]:
x0

array([0.87672636, 0.09457948, 0.52191602, ..., 2.09480059, 1.48428634,
       4.01296899])

In [95]:
x_seq, k, fk = steep(x0, alpha, kmax, tol)

In [96]:
fk

-39535.304490182265

In [97]:
x_seq[:,k-1]

array([-0.6823278, -0.6823278, -0.6823278, ..., -0.6823278, -0.6823278,
       -0.6823278])

In [99]:
x_seq

array([[ 0.87672636,  0.80020772,  0.73082952, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ],
       [ 0.09457948,  0.06171672,  0.02985816, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ],
       [ 0.52191602,  0.4719935 ,  0.42467921, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ],
       ...,
       [ 2.09480059,  1.72618511,  1.49009337, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ],
       [ 1.48428634,  1.31165655,  1.17460795, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ],
       [ 4.01296899,  1.92384396,  1.62251411, ..., -0.6823278 ,
        -0.6823278 , -0.6823278 ]])

In [151]:
def fr(x, kmax, tollgrad, grad_method = 'exact'):
    x = np.zeros((len(x0), kmax))
    x[:, 0] = x0
    
    gradf_k = delta_f(x0)
        
    p_k = -delta_f(x0)
    
    
    delta_fk_norm = np.linalg.norm(delta_f(x0))
        
    k = 0
    while(k < kmax-1 and delta_fk_norm >= tollgrad):
        
        # Compute dk
        d_k = -gradf_k
        
        # Compute alpha[k]
        alpha_k = (gradf_k.T @ gradf_k) / (d_k.T @ (delta2_f(x[:, k]) * d_k)) 
        
        # Our Q matrix is diagonal matrix with shape(10000, 10000). Such matrix requires a lot of storage. 
        # For this reason I use element-wise multiplication which is fine for our solution.

        # Compute the new value for xk
        x[:, k+1] = x[:, k] + alpha_k * p_k

        # Compute the new value for gradfk
        gradf_k1 = delta_f(x[:, k+1])
        
        # Compute the new value for betak
        beta_k1 = (gradf_k1.T @ gradf_k1) / (gradf_k.T @ gradf_k) 
        
        # Compute the new value for pk
        p_k = -gradf_k1 + beta_k1 * p_k
        
        # Compute the gradient of f in xk
        delta_fk_norm = np.linalg.norm(delta_f(x[:, k+1]))
        
        # Increase the step by one
        k += 1

    # Compute f(xk)
    fk = f(x[:, k-1]).sum()

    # "Cut" xseq to the correct size
    x = x[:, :k]
    print(alpha_k)
    return x, k-1, fk

In [116]:
x_seq_fr, k_fr, fk_fr = fr(x0, kmax, tol)

0.4172290753398313


In [117]:
x_seq_fr[:, k_fr]

array([-0.68234031, -0.68233874, -0.68234031, ..., -0.68234031,
       -0.68234031, -0.68234031])

In [118]:
fk_fr

-39535.304471643656

In [113]:
k_fr

7

In [124]:
def pr(x, kmax, tollgrad, grad_method = 'exact'):
    x = np.zeros((len(x0), kmax))
    x[:, 0] = x0
    
    gradf_k = delta_f(x0)
        
    p_k = -delta_f(x0)
    
    
    delta_fk_norm = np.linalg.norm(delta_f(x0))
        
    k = 0
    while(k < kmax-1 and delta_fk_norm >= tollgrad):
        
        # Compute dk
        d_k = -gradf_k
        
        # Compute alpha[k]
        alpha_k = (gradf_k.T @ gradf_k) / (d_k.T @ (delta2_f(x[:, k]) * d_k)) 
        
        # Our Q matrix is diagonal matrix with shape(10000, 10000). Such matrix requires a lot of storage. 
        # For this reason I use element-wise multiplication which is fine for our solution.

        # Compute the new value for xk
        x[:, k+1] = x[:, k] + alpha_k * p_k

        # Compute the new value for gradfk
        gradf_k1 = delta_f(x[:, k+1])
        
        # Compute the new value for betak
        beta_k1 = (gradf_k1.T @ (gradf_k1 - gradf_k)) / (gradf_k.T @ gradf_k) 
        
        # Compute the new value for pk
        p_k = -gradf_k1 + beta_k1 * p_k
        
        # Compute the gradient of f in xk
        delta_fk_norm = np.linalg.norm(delta_f(x[:, k+1]))
        
        # Increase the step by one
        k += 1

    # Compute f(xk)
    fk = f(x[:, k-1]).sum()

    # "Cut" xseq to the correct size
    x = x[:, :k]
    print(x[:, k-1])
    return x, k-1, fk

In [30]:
x_seq_pr, k_pr, fk_pr = pr(x0, kmax, tol)

[-0.6823278 -0.6823278 -0.6823278 ... -0.6823278 -0.6823278 -0.6823278]


In [27]:
x_seq_pr

array([[ 0.87672636,  0.83310573,  0.75480019, ..., -0.68834753,
        -0.68234025, -0.6823278 ],
       [ 0.09457948,  0.07584555,  0.04053627, ..., -0.68375106,
        -0.68229948, -0.6823278 ],
       [ 0.52191602,  0.49345692,  0.44070077, ..., -0.68572819,
        -0.68232016, -0.6823278 ],
       ...,
       [ 2.09480059,  1.88466564,  1.59153962, ..., -0.68959058,
        -0.6823499 , -0.6823278 ],
       [ 1.48428634,  1.38587607,  1.22541423, ..., -0.68943525,
        -0.68234814, -0.6823278 ],
       [ 4.01296899,  2.82203101,  2.31624996, ..., -0.68965958,
        -0.68235109, -0.6823278 ]])

In [28]:
k_pr

9

In [138]:
def delta_f(x, method = 'exact', k = 10):
    if method == 'exact':
        return np.power(x, 3) + x + 1
    elif method =='forward':
        h = np.zeros(len(x))
        for i in range(len(x)):
            e_i = np.zeros(len(x))
            e_i[i] = 1
            h[i] = 10**(-k) * np.linalg.norm(x) * e_i[i]
        return (f0(x+h) - f0(x)) / h   
    elif method =='backward':
        h = np.zeros(len(x))
        for i in range(len(x)):
            e_i = np.zeros(len(x))
            e_i[i] = 1
            h[i] = 10**(-k) * np.linalg.norm(x) * e_i[i]
        return (f0(x) - f0(x-h)) / h
    elif method =='centered':
        h = np.zeros(len(x))
        for i in range(len(x)):
            e_i = np.zeros(len(x))
            e_i[i] = 1
            h[i] = 10**(-k) * np.linalg.norm(x) * e_i[i]
        return (f0(x+h) - f0(x-h)) / (2*h)   

In [136]:
delta_f(v)

array([  11,  131, 1011])

In [147]:
for k in range(2,4,2):
    print(k, delta_f(v,method='centered', k=k))

2 [  11.0258  131.0645 1011.129 ]


In [49]:
a = np.zeros(5)

In [51]:
a[2] = 1

In [52]:
a

array([0., 0., 1., 0., 0.])

In [None]:
def pr_old(x, kmax, tollgrad):
    x = np.zeros((len(x0), kmax))
    x[:, 0] = x0
    
    gradf = np.zeros((len(x0), kmax))
    gradf[:, 0] = delta_f(x0)
        
    p = np.zeros((len(x0), kmax))
    p[:, 0] = -delta_f(x0)
    
    d = np.zeros((len(x0), kmax))
    alpha = np.zeros(kmax) 
    beta = np.zeros(kmax)

    delta_fk_norm = np.linalg.norm(delta_f(x0))
        
    k = 0
    while(k < kmax-1 and delta_fk_norm >= tollgrad):
        
        # Compute dk
        d[:, k] = -gradf[:, k]
        
        # Compute alpha[k]
#         alpha[k] = (gradf[:, k].T @ gradf[:, k]) / (d[:, k].T @ np.diag(delta2_f(x[:, k])) @ d[:, k]) 
        alpha[k] = (gradf[:, k].T @ gradf[:, k]) / (d[:, k].T @ (delta2_f(x[:, k]) * d[:, k])) 
        # Our Q matrix is diagonal matrix with shape(10000, 10000). Such matrix requires a lot of storage. 
        # For this reason I use element-wise multiplication which is fine for our solution.

        # Compute the new value for xk
        x[:, k+1] = x[:, k] + alpha[k] * p[:, k]

        # Compute the new value for gradfk
        gradf[:, k+1] = delta_f(x[:, k+1])
        
        # Compute the new value for betak
        beta[k+1] = (gradf[:, k+1].T @ (gradf[:, k+1] - gradf[:, k])) / (gradf[:, k].T @ gradf[:, k]) 
        
        # Compute the new value for pk
        p[:, k+1] = -gradf[:, k+1] + beta[k+1] * p[:, k]
        
        # Compute the gradient of f in xk
        delta_fk_norm = np.linalg.norm(delta_f(x[:, k+1]))
        
        # Increase the step by one
        k += 1

    # Compute f(xk)
    fk = f(x[:, k-1])

    # "Cut" xseq to the correct size
    x = x[:, :k]
    print(alpha[k-1])
    return x, k-1, fk

In [None]:
def fr_copy(x, kmax, tollgrad):
    x = np.zeros((len(x0), kmax))
    x[:, 0] = x0
    
    gradf = np.zeros((len(x0), kmax))
    gradf[:, 0] = delta_f(x0)
        
    p = np.zeros((len(x0), kmax))
    p[:, 0] = -delta_f(x0)
    
    d = np.zeros((len(x0), kmax))
    alpha = np.zeros(kmax) 
    beta = np.zeros(kmax)

    delta_fk_norm = np.linalg.norm(delta_f(x0))
        
    k = 0
    while(k < kmax-1 and delta_fk_norm >= tollgrad):
        
        # Compute dk
        d[:, k] = -gradf[:, k]
        
        # Compute alpha[k]
        alpha[k] = (gradf[:, k].T @ gradf[:, k]) / (d[:, k].T @ (delta2_f(x[:, k]) * d[:, k])) 

        # Compute the new value for xk
        x[:, k+1] = x[:, k] + alpha[k] * p[:, k]

        # Compute the new value for gradfk
        gradf[:, k+1] = delta_f(x[:, k+1])
        
        # Compute the new value for betak
        beta[k+1] = (gradf[:, k+1].T @ gradf[:, k+1]) / (gradf[:, k].T @ gradf[:, k]) 
        
        # Compute the new value for pk
        p[:, k+1] = -gradf[:, k+1] + beta[k+1] * p[:, k]
        
        # Compute the gradient of f in xk
        delta_fk_norm = np.linalg.norm(delta_f(x[:, k+1]))
        
        # Increase the step by one
        k += 1

    # Compute f(xk)
    fk = f(x[:, k-1])

    # "Cut" xseq to the correct size
    x = x[:, :k]
    print(alpha[k-1])
    return x, k-1, fk