In [None]:
import numpy as np

The Hessian matrix is given by \
\
$\begin{bmatrix} 4/(x_1^2+4)^{(3/2)} & 0 \\ 0 & 4/(x_2^2 +4)^{(3/2)}\end{bmatrix}$

In [None]:
def evalh(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  return np.array([[4/(x[0]**2 + 4)**1.5,0] , [0,4/(x[1]**2 + 4)**1.5]])

In [None]:
def find_condition_number(A):
  assert type(A) is np.ndarray
  assert A.shape[0] == A.shape[1]
  a,b = np.linalg.eig(A)
  return max(a)/min(a)

In [None]:
def evalf(x):
  assert type(x) is np.ndarray and len(x) == 2
  return (x[0]**2+4)**0.5 + (x[1]**2+4)**0.5

In [None]:
def evalg(x):
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([x[0]/(x[0]**2+4)**0.5 ,x[1]/(x[1]**2+4)**0.5 ])

In [None]:
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  hsn_mtrx = evalh(x)
  return np.array([[1/hsn_mtrx[0][0], 0], [0, 1/hsn_mtrx[1][1]]])

In [None]:
def compute_D_k_1(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  if np.linalg.det(evalh(x)) == 0:
    raise ValueError('Determinant is 0 ')
  return np.linalg.inv(evalh(x))

In [None]:
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): #add appropriate arguments to the function
  assert type(x) is np.ndarray and len(gradf) == 2
  assert type(gradf) is np.ndarray and len(gradf) == 2
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0.

  #Complete the code
  alpha = alpha_start
  p_k = -gradf
  #implement the backtracking line search
  while evalf(x + alpha*p_k) > (evalf(x) + gamma*alpha*(np.dot(gradf.transpose(),p_k))):
    alpha = rho* alpha
  return alpha


In [None]:
def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma): #add appropriate arguments to the function
  assert type(x) is np.ndarray and len(gradf) == 2
  assert type(gradf) is np.ndarray and len(gradf) == 2
  assert type(direction) is np.ndarray and len(direction) == 2
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0.
  alpha = alpha_start
  while evalf(x + alpha*direction) > ( evalf(x) +  (gamma*alpha*np.dot(gradf,direction)) ):
    alpha = rho*alpha
  return alpha

In [None]:
BACKTRACKING_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH_SCALING = 2
BACKTRACKING_LINE_SEARCH_SCALED = 3
CONSTANT_STEPLENGTH = 4

In [None]:
def find_minimizer_gd(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments
  assert type(tol) is float and tol>=0

  x = start_x
  g_x = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
    #print('Params for Backtracking LS: alpha start:', alpha_start, 'rho:', rho,' gamma:', gamma)
  condition_number=[]
  k = 0
  #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    condition_number.append(find_condition_number(evalh(x)))
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,g_x, alpha_start,rho, gamma) #call the new function you wrote to compute the steplength
      #raise ValueError('BACKTRACKING LINE SEARCH NOT YET IMPLEMENTED')
    elif line_search_type == CONSTANT_STEPLENGTH:
      step_length = 1.0

    else:
      raise ValueError('Line search type unknown. Please check!')

    #implement the gradient descent steps here
    x = np.subtract(x, np.multiply(step_length,g_x)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point

    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,evalf(x),k,condition_number


In [None]:
#complete the code for gradient descent with scaling to find the minimizer

def find_minimizer_gdscaling(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments
  assert type(tol) is float and tol>=0
  x = start_x
  g_x = evalg(x)
  d_k = compute_D_k(x)
  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH_SCALING):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
    #print('Params for Backtracking LS: alpha start:', alpha_start, 'rho:', rho,' gamma:', gamma)

  k = 0
  #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    direction = -np.matmul(d_k,g_x)
    if line_search_type == BACKTRACKING_LINE_SEARCH_SCALING:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x,direction,alpha_start,rho,gamma)
    elif line_search_type == CONSTANT_STEPLENGTH:
      step_length = 1.0

    else:
      raise ValueError('Line search type unknown. Please check!')

    #implement the gradient descent steps here
    x = np.subtract(x, np.matmul(np.multiply(step_length,g_x),d_k)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point

    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,evalf(x),k


In [None]:
import scipy
from scipy.linalg import sqrtm

In [None]:
def find_minimizer_gdscaling_non_diagonal(start_x, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments
  assert type(tol) is float and tol>=0
  x = start_x
  g_x = evalg(x)

  condition_number_scaling = []


  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH_SCALED):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
  k = 0
  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    d_k = compute_D_k_1(x)

    if line_search_type == BACKTRACKING_LINE_SEARCH_SCALED:

      direction = np.matmul(-d_k,g_x)

      p = scipy.linalg.sqrtm(d_k)
      hessn = np.matmul(np.matmul(p,evalh(x)),p)
      condition_number_scaling.append(find_condition_number(hessn))
      step_length = compute_steplength_backtracking_scaled_direction(x, g_x,direction, alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEPLENGTH:
      step_length = 1.0
    else:
      raise ValueError('Line search type unknown. Please check!')

    #implement the gradient descent steps here
    x = np.subtract(x, np.multiply(step_length,np.matmul(d_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))


  return x ,evalf(x),k,condition_number_scaling

$ \huge{2.}$ \
\
The code is implemented below:

In [None]:
tol = 1e-9
alpha = 1.0
rho = 0.5
gamma = 0.5
start = np.array([2.0,2.0])

In [None]:
x,f_x,no_of_iterations,condition_no = find_minimizer_gdscaling_non_diagonal(start,tol,CONSTANT_STEPLENGTH)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
iter: 7032  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 7033  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 7034  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 7035  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 7036  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 7037  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 7038  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 7039  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  g

KeyboardInterrupt: ignored

We see that the iterations is not stopping and we are simply oscillating between two points, $(2, 2)$ and $(-2, -2)$. This could possibly be because of the selection of an incorrect choice of the steplength.

In [None]:
x,f_x,no_of_iterations,condition_no = find_minimizer_gdscaling_non_diagonal(start,tol,BACKTRACKING_LINE_SEARCH_SCALED,alpha,rho,gamma)


iter: 1  x: [0. 0.]  f(x): 4.0  grad at x: [0. 0.]  gradient norm: 0.0


In [None]:
print("Newton’s method (with backtracking line search)")
print("Minimizer : ",x)
print("Function values : ",f_x)
print("Number of iteration : ",no_of_iterations)

Newton’s method (with backtracking line search)
Minimizer :  [0. 0.]
Function values :  4.0
Number of iteration :  1


**Observations:** We can see that in this method, we reach the minimizer in a single iteration. However, in Newton's method with constant steplength, we keep osciallating between two values. \
We may conclude from this, that Newton's Method with backtracking is a better approach than the one which uses constant steplength.

$ \huge{3.}$

In [None]:
x,f_x,no_of_iterations,condition_no = find_minimizer_gd(start,tol,BACKTRACKING_LINE_SEARCH,alpha,rho,gamma)

In [None]:
print("Newtons method without scaling and step length by backtracking")
print("Minimizer : ",x)
print("Function values : ",f_x)
print("Number of iteration : ",no_of_iterations)


Newtons method without scaling and step length by backtracking
Minimizer :  [7.62525638e-10 7.62525638e-10]
Function values :  4.0
Number of iteration :  32


**Newton's Method with backtracking line search:**

Optimizer:  (0, 0) \
Minimum Value:  4.0 \
Number of iterations:  1

**Gradient Descent Algorithm Backtracking Line Search (Without Scaling)**

Optimizer (7.62525638e-10, 7.62525638e-10)
Minimum value 4.0
number of iterations 32

**Observations:** \

From Newton's Method with backtracking line search, we reach the minimizer in a single iteration; whereas by the gradient descent with backtracking line search without scaling, it took 32 iterations to reach the minimizer (approx). Also, through both the methods, we get the same minimium value (4). \
However, by Newton's Method with constant steplength, we kept oscillating between two points, therefore we cannot make any further comments about this method. So we can say that the Newton's Method with backtracking line search is a better approach as compatred to the other two algorithms. \


$ \huge{4.}$

In [None]:
start = np.array([8.0,8.0])
x,f_x,no_of_iterations,condition_no = find_minimizer_gdscaling_non_diagonal(start,tol,CONSTANT_STEPLENGTH,alpha,rho,gamma)

iter: 1  x: [-128. -128.]  f(x): 256.03124809288414  grad at x: [-0.99987795 -0.99987795]  gradient norm: 1.414040960485301
iter: 2  x: [524288. 524288.]  f(x): 1048576.0000076294  grad at x: [1. 1.]  gradient norm: 1.4142135623628054
iter: 3  x: [-3.6028797e+16 -3.6028797e+16]  f(x): 7.205759403792794e+16  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951
iter: 4  x: [1.16920131e+49 1.16920131e+49]  f(x): 2.3384026197294447e+49  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 5  x: [-3.99583814e+146 -3.99583814e+146]  f(x): 7.99167628880894e+146  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951


  return np.array([[4/(x[0]**2 + 4)**1.5,0] , [0,4/(x[1]**2 + 4)**1.5]])


ValueError: ignored

$D^{k}$ doesn't exist for these points as the Hessian comes out to be singular.

In [None]:
tol = 1e-9
x,f_x,no_of_iterations,condition_no = find_minimizer_gdscaling_non_diagonal(start,tol,BACKTRACKING_LINE_SEARCH_SCALED,alpha,rho,gamma)


iter: 1  x: [-0.5 -0.5]  f(x): 4.123105625617661  grad at x: [-0.24253563 -0.24253563]  gradient norm: 0.3429971702850177
iter: 2  x: [-0.234375 -0.234375]  f(x): 4.027372165879384  grad at x: [-0.11639103 -0.11639103]  gradient norm: 0.16460177506779788
iter: 3  x: [-0.11557817 -0.11557817]  f(x): 4.006673590120265  grad at x: [-0.05769283 -0.05769283]  gradient norm: 0.08158998647858538
iter: 4  x: [-0.0575961 -0.0575961]  f(x): 4.001658311393147  grad at x: [-0.02878611 -0.02878611]  gradient norm: 0.04070971277400298
iter: 5  x: [-0.02877417 -0.02877417]  f(x): 4.000413954866833  grad at x: [-0.01438559 -0.01438559]  gradient norm: 0.020344301811841013
iter: 6  x: [-0.0143841 -0.0143841]  f(x): 4.000103449894273  grad at x: [-0.00719187 -0.00719187]  gradient norm: 0.010170834833290178
iter: 7  x: [-0.00719168 -0.00719168]  f(x): 4.000025860048939  grad at x: [-0.00359582 -0.00359582]  gradient norm: 0.005085253008744151
iter: 8  x: [-0.00359579 -0.00359579]  f(x): 4.00000646486072

In [None]:
print("Newton’s method (with backtracking line search)")
print("Minimizer : ",x)
print("Function values : ",f_x)
print("Number of iteration : ",no_of_iterations)

Newton’s method (with backtracking line search)
Minimizer :  [2.83764947e-12 2.83764947e-12]
Function values :  4.0
Number of iteration :  13


**Newton's Method with backtracking line search:**

Optimizer: (2.83764947e-12, 2.83764947e-12)

Minimum Value: 4.0

Number of iterations: 13

**Observations:** $D^{k}$ doesn't exist for these points as the Hessian comes out to be singular.

Newton's Method with backtracking line search gives the minimizer (approximately) in 13 iterations. Further, the minimum value of the function is same as the actual value. Therefore this method is efficient in this case as well.

$ \huge{5.}$

In [None]:
x,f_x,no_of_iterations,condition_no = find_minimizer_gd(start,tol,BACKTRACKING_LINE_SEARCH,alpha,rho,gamma)

In [None]:
print("Newtons method without scaling and step length by backtracking")
print("Minimizer : ",x)
print("Function values : ",f_x)
print("Number of iteration : ",no_of_iterations)


Newtons method without scaling and step length by backtracking
Minimizer :  [8.3177047e-10 8.3177047e-10]
Function values :  4.0
Number of iteration :  39


**Observations:** \
\
**Newton's Method with Backtracking Line Search:**

Optimizer: (2.83764947e-12, 2.83764947e-12)

Function Value: 4.0

Number of iterations: 13 \

**Gradient Descent Algorithm Backtracking Line Search (Without scaling):**

Optimizer (8.3177047e-10, 8.3177047e-10)\
\
Function value 4.0\
\
Number of iterations 39\
\
**Observations:** Through Newton's method with backtracking line search, we approach the actual minimizer with lesser number of iterations as compared to the gradient descent with backtracking line search without scaling, which took more number of iterations and still not getting nearer to the actual minimizer compared to the one we got by Netwon's Method with backtracking line search. Minimum value is same through both approaches.

Newton's Method with constant step-length is unable to provide the solution as the Hessian matrix obtained is singular.

All in all, we can say from above seen results that Netwon's method with bactracking line search works well and gives better and faster results as compared to the other approaches.