In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

$f(x) = \sqrt{x_1^2+1} + \sqrt{x_2^2 +1}$

We will use the newton's method to solve for the problem, $\min f(x)$

# General Code for the given problem

In [4]:
def evalf(x):  
  assert type(x) is np.ndarray and len(x) == 2 
  return (x[0]**2 + 1 )**0.5 + (x[1]**2 + 1 )**0.5 
  
def evalg(x):  
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([ (x[0]) / ((x[0]**2 + 1 )**0.5) ,  (x[1]) / ((x[1]**2 + 1 )**0.5) ])

def evalh(x):
  assert type(x) is np.ndarray 
  assert len(x) == 2
  return np.array([
                   [ 1 / (x[0]**2 + 1)**1.5 , 0],
                   [0, 1 / (x[1]**2 + 1)**1.5]
                   ])


#Defining a function which calculated the matrix Dk. If the second parameter is true then it calculate the diagonal matrix Dk by taking only the digonal inverse of H, and calculate the H inverse if its true. (Psedo inverse if det of H is zero)
def compute_D_k(x, diagonal_only = False):
  assert type(x) is np.ndarray
  assert len(x) == 2

  if diagonal_only == False:

    if np.linalg.det(evalh(x)) == 0:
      print('The determinant of the hessian is zero. Hence we find Pseudo Inverse')
      return np.linalg.pinv(evalh(x))
    else:
      return np.linalg.inv(evalh(x))

  else:
    return np.array([
                   [1/(evalh(x)[0][0]),0],
                   [0, 1/(evalh(x)[1][1])]
                   ])


def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): 
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 

  alpha = alpha_start
  p = -gradf

  while (evalf(x + alpha*p) > (evalf(x) + gamma * alpha * np.dot(gradf.T, p)) ):
    alpha = alpha*rho
  
  return alpha


def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 

  alpha = alpha_start
  p = -gradf

  while (evalf(x + alpha*np.matmul(direction,p)) > (evalf(x) + gamma * alpha * np.matmul(gradf.T, np.matmul(direction, p))) ):
    alpha = alpha*rho
  
  return alpha
  


#line search type 
EXACT_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH = 2
CONSTANT_STEP_LENGTH = 3
  
  
#complete the code for gradient descent to find the minimizer
def find_minimizer_gd(start_x, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  #A = np.array([])
  x = start_x
  g_x = evalg(x)
  k = 0
  

  if (line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2] 

  while np.linalg.norm(g_x) > tol:

    if line_search_type == EXACT_LINE_SEARCH:
      step_length = compute_steplength_exact(g_x,A)

    elif line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,g_x, alpha_start,rho, gamma)

    elif line_search_type == CONSTANT_STEP_LENGTH: 
      step_length = float(input('Enter a valid value for the constant step length.'))

    else:  
      raise ValueError('Line search type unknown. Please check!')

    
    x = np.subtract(x, np.multiply(step_length,g_x)) 
    k += 1 
    g_x = evalg(x)

  return x, k, 



def find_minimizer_gdscaling(start_x, tol, line_search_type,diagonal_Dk, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
 
  x = start_x
  g_x = evalg(x)
  k = 0
  
 
  if (line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2] 
    
 
  while np.linalg.norm(g_x) > tol:
 
    #if line_search_type == EXACT_LINE_SEARCH:
      #step_length = #step_length using exact line search with sclaing
 
    d = compute_D_k(x, diagonal_Dk)
    

    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, d , alpha_start,rho, gamma)
 
    elif line_search_type == CONSTANT_STEP_LENGTH: 
      step_length = 0.9999
 
    else:  
      raise ValueError('Line search type unknown. Please check!')
 
    x = np.subtract(x, step_length * np.matmul(d,g_x)) 
    k += 1 
    g_x = evalg(x)
 
  return x, k




# ***Part 2 :***  *Comparing the newton's method for a constant value of $\eta $, with the one where $\eta$ is calculated using Backtracking Line search*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (1.0, 1.0)$

$\tau = 10^{-9}$

In [5]:
start_x = np.array([1.0, 1.0])
tol = 10e-9
alpha = 1.0
rho = 0.5
gamma = 0.5

x_constant, k_constant = find_minimizer_gdscaling(start_x,tol,CONSTANT_STEP_LENGTH,False,alpha, rho, gamma)
x_bls_scaling, k_bls_scaling = find_minimizer_gdscaling(start_x,tol, BACKTRACKING_LINE_SEARCH,False,alpha,rho, gamma)

print("Newton's method with Constant eta :-", end='\n')
print('Optimal X :', x_constant)
print('Optimal value of f(x) :', evalf(x_constant))
print('Number of Iterations :', k_constant)

print('\n\n')

print("Newton's method with BLS :-", end='\n')
print('Optimal X :', x_bls_scaling)
print('Optimal value of f(x) :', evalf(x_bls_scaling))
print('Number of Iterations :', k_bls_scaling)



Newton's method with Constant eta :-
Optimal X : [2.50433338e-11 2.50433338e-11]
Optimal value of f(x) : 2.0
Number of Iterations : 12



Newton's method with BLS :-
Optimal X : [0. 0.]
Optimal value of f(x) : 2.0
Number of Iterations : 1


***Remarks :*** *We observe that the optimal answer for the problem is found when the constant step length is 0.999 instead of 1.0, because at 1.0 the solution is not converging and the algorith is overshooting the value and oscillating. Hene we report the optimal X, the optimal value of the function and the number of iteration taken by the algorithm*

# ***Part 3 :***  *Comparing the above two methods with BLS(without scaling)*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (1.0, 1.0)$

$\tau = 10^{-9}$

In [6]:
x_bls, k_bls = find_minimizer_gd(start_x,tol,BACKTRACKING_LINE_SEARCH,alpha, rho, gamma)



In [7]:
df = pd.DataFrame(columns=['Method', 'Optimal_X', 'Optimal_f(x)', 'Iterations'])

df['Method'] = ['Constant Step Lenth, eta =1.0 ', 'BLS with scaling', 'BLS without Scaling']
df['Optimal_X'] = [x_constant, x_bls_scaling, x_bls]
df['Optimal_f(x)'] = [evalf(x_constant), evalf(x_bls_scaling), evalf(x_bls)]
df['Iterations'] = [k_constant, k_bls_scaling, k_bls]

df

Unnamed: 0,Method,Optimal_X,Optimal_f(x),Iterations
0,"Constant Step Lenth, eta =1.0","[2.50433337718496e-11, 2.50433337718496e-11]",2.0,12
1,BLS with scaling,"[0.0, 0.0]",2.0,1
2,BLS without Scaling,"[2.7899147700188517e-19, 2.7899147700188517e-19]",2.0,4


# ***Part 4 :***  *Comparing the newton's method for a constant value of $\eta $, with the one where $\eta$ is calculated using Backtracking Line search*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (10.0, 10.0)$

$\tau = 10^{-9}$

In [8]:
start_x = np.array([10.0, 10.0])
tol = 10e-9
alpha = 1.0
rho = 0.5
gamma = 0.5

x_constant, k_constant = find_minimizer_gdscaling(start_x,tol,CONSTANT_STEP_LENGTH,False,alpha, rho, gamma)
x_bls_scaling, k_bls_scaling = find_minimizer_gdscaling(start_x,tol, BACKTRACKING_LINE_SEARCH,False,alpha,rho, gamma)

print("Newton's method with Constant eta :-", end='\n')
print('Optimal X :', x_constant)
print('Optimal value of f(x) :', evalf(x_constant))
print('Number of Iterations :', k_constant)

print('\n\n')

print("Newton's method with BLS :-", end='\n')
print('Optimal X :', x_bls_scaling)
print('Optimal value of f(x) :', evalf(x_bls_scaling))
print('Number of Iterations :', k_bls_scaling)



The determinant of the hessian is zero. Hence we find Pseudo Inverse
Newton's method with Constant eta :-
Optimal X : [-9.8789228e+242 -9.8789228e+242]
Optimal value of f(x) : inf
Number of Iterations : 5



Newton's method with BLS :-
Optimal X : [-9.92761578e-15 -9.92761578e-15]
Optimal value of f(x) : 2.0
Number of Iterations : 17


  import sys
  This is separate from the ipykernel package so we can avoid doing imports until


# ***Part 5 :***  *Comparing the above two methods with BLS(without scaling)*

$\eta^k =1.0 \ \ \  \forall \ k = 1,2,3 ... $

$x^0 = (10.0, 10.0)$

$\tau = 10^{-9}$

In [9]:
x_bls, k_bls = find_minimizer_gd(start_x,tol,BACKTRACKING_LINE_SEARCH,alpha, rho, gamma)


In [10]:
df = pd.DataFrame(columns=['Method', 'Optimal_X', 'Optimal_f(x)', 'Iterations'])

df['Method'] = ['Constant Step Lenth, eta =1.0 ', 'BLS with scaling', 'BLS without Scaling']
df['Optimal_X'] = [x_constant, x_bls_scaling, x_bls]
df['Optimal_f(x)'] = [evalf(x_constant), evalf(x_bls_scaling), evalf(x_bls)]
df['Iterations'] = [k_constant, k_bls_scaling, k_bls]

df

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Method,Optimal_X,Optimal_f(x),Iterations
0,"Constant Step Lenth, eta =1.0","[-9.878922796403032e+242, -9.878922796403032e+...",inf,5
1,BLS with scaling,"[-9.927615776976137e-15, -9.927615776976137e-15]",2.0,17
2,BLS without Scaling,"[2.1245585314893373e-14, 2.1245585314893373e-14]",2.0,13


***Remarks :*** *In part 4 and 5 we observe that for the initian value of x = (10,10) the value of the determinant of the hessian is zero, thats why we see absurd answer which is converging at inf. But for other two cases, where the constant step length is 0.999, the answer if converging in both BLS with scaling and without scaling. And we can see the optimal value of the x, f(x) and the number of iteration in the above table.*