In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

$f(x) = 100x_1^2 + 0.001x_2^4$

We will use the newton's method to solve for the problem, $\min f(x)$

# General Code for the given problem

In [None]:
def evalf(x):  
  assert type(x) is np.ndarray and len(x) == 2 
  return 100*x[0]**2 + 0.001*x[1]**4
  
def evalg(x):  
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([200*x[0], 0.004*x[1]**3 ])

def evalh(x):
  assert type(x) is np.ndarray 
  assert len(x) == 2
  return np.array([
                   [200, 0],
                   [0, 0.012*x[1]**2]
                   ])


#Defining a function which calculated the matrix Dk. If the second parameter is true then it calculate the diagonal matrix Dk by taking only the digonal inverse of H, and calculate the H inverse if its true. (Psedo inverse if det of H is zero)
def compute_D_k(x, diagonal_only = False):
  assert type(x) is np.ndarray
  assert len(x) == 2

  if diagonal_only == False:

    if np.linalg.det(evalh(x)) == 0:
      print('The determinant of the hessian is zero. Hence we find Pseudo Inverse')
      return np.linalg.pinv(evalh(x))
    else:
      return np.linalg.inv(evalh(x))

  else:
    return np.array([
                   [1/(evalh(x)[0][0]),0],
                   [0, 1/(evalh(x)[1][1])]
                   ])


def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): 
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 

  alpha = alpha_start
  p = -gradf

  while (evalf(x + alpha*p) > (evalf(x) + gamma * alpha * np.dot(gradf.T, p)) ):
    alpha = alpha*rho
  
  return alpha


def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 

  alpha = alpha_start
  p = -gradf

  while (evalf(x + alpha*np.matmul(direction,p)) > (evalf(x) + gamma * alpha * np.matmul(gradf.T, np.matmul(direction, p))) ):
    alpha = alpha*rho
  
  return alpha
  


#line search type 
EXACT_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH = 2
CONSTANT_STEP_LENGTH = 3
  
  
#complete the code for gradient descent to find the minimizer
def find_minimizer_gd(start_x, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  #A = np.array([])
  x = start_x
  g_x = evalg(x)
  k = 0
  

  if (line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2] 

  while np.linalg.norm(g_x) > tol:

    if line_search_type == EXACT_LINE_SEARCH:
      step_length = compute_steplength_exact(g_x,A)

    elif line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,g_x, alpha_start,rho, gamma)

    elif line_search_type == CONSTANT_STEP_LENGTH: 
      step_length = float(input('Enter a valid value for the constant step length.'))

    else:  
      raise ValueError('Line search type unknown. Please check!')

    
    x = np.subtract(x, np.multiply(step_length,g_x)) 
    k += 1 
    g_x = evalg(x)

  return x, k, 



def find_minimizer_gdscaling(start_x, tol, line_search_type,diagonal_Dk, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
 
  x = start_x
  g_x = evalg(x)
  k = 0
  
 
  if (line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2] 
    
 
  while np.linalg.norm(g_x) > tol:
 
    #if line_search_type == EXACT_LINE_SEARCH:
      #step_length = #step_length using exact line search with sclaing
 
    d = compute_D_k(x, diagonal_Dk)
    

    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, d , alpha_start,rho, gamma)
 
    elif line_search_type == CONSTANT_STEP_LENGTH: 
      step_length = 1.0
 
    else:  
      raise ValueError('Line search type unknown. Please check!')
 
    x = np.subtract(x, step_length * np.matmul(d,g_x)) 
    k += 1 
    g_x = evalg(x)
 
  return x, k




# ***Part 3 :***  *Comparing the newton's method for a constant value of $\eta $, with the one where $\eta$ is calculated using Backtracking Line search*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (1.0, 1.0)$

$\tau = 10^{-9}$

In [None]:
start_x = np.array([1.0, 1.0])
tol = 10e-9
alpha = 1.0
rho = 0.5
gamma = 0.5

x_constant, k_constant = find_minimizer_gdscaling(start_x,tol,CONSTANT_STEP_LENGTH,False,alpha, rho, gamma)
x_bls_scaling, k_bls_scaling = find_minimizer_gdscaling(start_x,tol, BACKTRACKING_LINE_SEARCH,False,alpha,rho, gamma)

print("Newton's method with Constant eta :-", end='\n')
print('Optimal X :', x_constant)
print('Optimal value of f(x) :', evalf(x_constant))
print('Number of Iterations :', k_constant)

print('\n\n')

print("Newton's method with BLS :-", end='\n')
print('Optimal X :', x_bls_scaling)
print('Optimal value of f(x) :', evalf(x_bls_scaling))
print('Number of Iterations :', k_bls_scaling)



Newton's method with Constant eta :-
Optimal X : [0.         0.01156102]
Optimal value of f(x) : 1.7864242338403204e-11
Number of Iterations : 11



Newton's method with BLS :-
Optimal X : [0.         0.01156102]
Optimal value of f(x) : 1.7864242338403204e-11
Number of Iterations : 11


***Remarks :*** *We observe that the the optimal value of X, the function and the number of iterations is all same in both the case, wether we use backtraking line search or we use constant method to find the value of the step length, $\eta$. Because even when we use the BLS method the hessian matrix inverse turn out to be such that the step length is 1.0. Also the hessian matrix is alredy a diagonal matix so Dk does not change*

# ***Part 4 :***  *Comparing the above two methods with BLS(without scaling) and BLS(with scaling with diagonal $D^k$)*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (1.0, 1.0)$

$\tau = 10^{-9}$

In [None]:
x_bls, k_bls = find_minimizer_gd(start_x,tol,BACKTRACKING_LINE_SEARCH,alpha, rho, gamma)
#x_bls_diag, k_bls_diag = find_minimizer_gdscaling(start_x,tol,BACKTRACKING_LINE_SEARCH,True,alpha, rho, gamma)





In [None]:
df = pd.DataFrame(columns=['Method', 'Optimal_X', 'Optimal_f(x)', 'Iterations'])

df['Method'] = ['Constant Step Lenth, eta =1.0 ', 'BLS with scaling', 'BLS without Scaling', 'BLS with scaling with Diagonal Dk']
df['Optimal_X'] = [x_constant, x_bls_scaling, x_bls, x_bls_diag]
df['Optimal_f(x)'] = [evalf(x_constant), evalf(x_bls_scaling), evalf(x_bls), evalf(x_bls_diag)]
df['Iterations'] = [k_constant, k_bls_scaling, k_bls, k_bls_diag]

df

Unnamed: 0,Method,Optimal_X,Optimal_f(x),Iterations
0,"Constant Step Lenth, eta =1.0","[0.0, 0.011561019943888407]",1.786424e-11,11
1,BLS with scaling,"[0.0, 0.011561019943888407]",1.786424e-11,11
2,BLS without Scaling,"[1.4119627465647396e-11, 0.013385327284182453]",3.210081e-11,48818419
3,BLS with scaling with Diagonal Dk,"[0.0, 0.011561019943888407]",1.786424e-11,11


***Remarks :*** *We observe that the number of iterations and the optimal value is same for all the cases except for the BLS without scaling method, in which it took more than 4.88 crore iterations to reach to the optimal value. The optimal value of the X is (0, 0.011561) which is very closed achieved by BLS without scaling.*