$\large\textbf{Exercise 2.}$\
The given function is: $q(x) = \sqrt{x_{1}^2 +4} + \sqrt{x_{2}^2 +4}$

In [71]:
import numpy as np 
import pandas as pd

In [72]:
def evalf(x):  
  assert type(x) is np.ndarray and len(x) == 2
  #after checking if the argument is valid, we can compute the objective function value
  #compute the function value and return it 
  return (np.sqrt((x[0]**2)+4)+np.sqrt((x[1]**2)+4))

In [73]:
def evalg(x):  
  assert type(x) is np.ndarray and len(x) == 2 
  #after checking if the argument is valid, we can compute the gradient value
  #compute the gradient value and return it 
  return np.array([(x[0])/(np.sqrt((x[0]**2)+4)),  (x[1])/(np.sqrt((x[1]**2)+4))]) 

In [74]:
def evalh(x):
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([[ 4/((np.sqrt((x[0]**2)+4))*((x[0]**2)+4)), 0], [0, 4/((np.sqrt((x[1]**2)+4))*((x[1]**2)+4))]])

In [75]:
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  #compute and return D_k
  hess = evalh(x)

  return np.linalg.inv(hess)

 Since the eigen values of the hessian will always be equal(diagonal elements of the diagonal hessian matrix).

In [76]:
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  #Complete the code 
  alpha = alpha_start
  p = -gradf
  #implement the backtracking line search
  while evalf(x + alpha*p) > (evalf(x)-(gamma*alpha*np.dot(p,p))):
    #while evalf(x + alpha*p) > evalf(x) + gamma * alpha* (np.matmul(np.matrix.transpose(gradf), p) ):  
    alpha = rho*alpha

  #print('final step length:',alpha)
  return alpha

In [77]:
def compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  #assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  p = -gradf
  D_k = compute_D_k(x)
  m = np.matmul(D_k,p)
  while evalf(x)<evalf(x+alpha*m)+(np.matmul(np.matrix.transpose(gradf), m))*alpha*gamma:
    alpha = alpha*rho
  return alpha


In [78]:
#line search type 
EXACT_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH = 2
CONSTANT_STEP_LENGTH = 3

In [79]:
def find_minimizer_gdscaling(start_x, tol, line_search_type, *args):
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
  x = start_x
  g_x = evalg(x)
  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
  k = 0
  while (np.linalg.norm(g_x) > tol):
    D_k = compute_D_k(x)
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
      #raise ValueError('BACKTRACKING LINE SEARCH NOT YET IMPLEMENTED')
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
     #call the new function you wrote to compute the steplength
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    print("Iter:",k,"X: ",x)
  return x , evalf(x), k



$\large\textbf{Exercise 2.2}$

I. **Constant Step Length**

In [80]:
my_start_x = np.array([2.0, 2.0])
my_tol= 1e-9

opt_x, opt_f, iter = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Iter: 1 X:  [-2. -2.]
Iter: 2 X:  [2. 2.]
Iter: 3 X:  [-2. -2.]
Iter: 4 X:  [2. 2.]
Iter: 5 X:  [-2. -2.]
Iter: 6 X:  [2. 2.]
Iter: 7 X:  [-2. -2.]
Iter: 8 X:  [2. 2.]
Iter: 9 X:  [-2. -2.]
Iter: 10 X:  [2. 2.]
Iter: 11 X:  [-2. -2.]
Iter: 12 X:  [2. 2.]
Iter: 13 X:  [-2. -2.]
Iter: 14 X:  [2. 2.]
Iter: 15 X:  [-2. -2.]
Iter: 16 X:  [2. 2.]
Iter: 17 X:  [-2. -2.]
Iter: 18 X:  [2. 2.]
Iter: 19 X:  [-2. -2.]
Iter: 20 X:  [2. 2.]
Iter: 21 X:  [-2. -2.]
Iter: 22 X:  [2. 2.]
Iter: 23 X:  [-2. -2.]
Iter: 24 X:  [2. 2.]
Iter: 25 X:  [-2. -2.]
Iter: 26 X:  [2. 2.]
Iter: 27 X:  [-2. -2.]
Iter: 28 X:  [2. 2.]
Iter: 29 X:  [-2. -2.]
Iter: 30 X:  [2. 2.]
Iter: 31 X:  [-2. -2.]
Iter: 32 X:  [2. 2.]
Iter: 33 X:  [-2. -2.]
Iter: 34 X:  [2. 2.]
Iter: 35 X:  [-2. -2.]
Iter: 36 X:  [2. 2.]
Iter: 37 X:  [-2. -2.]
Iter: 38 X:  [2. 2.]
Iter: 39 X:  [-2. -2.]
Iter: 40 X:  [2. 2.]
Iter: 41 X:  [-2. -2.]
Iter: 42 X:  [2. 2.]
Iter: 43 X:  [-2. -2.]
Iter: 44 X:  [2. 2.]
Iter: 45 X:  [-2. -2.]
Iter: 46 X:  [2. 2

KeyboardInterrupt: ignored

It keeps oscillating between (-2,-2) and (2,2). As the step size is not good for using the gradient descent method. Thus I stopped the code.


II. **Backtracking line search**

In [81]:
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter= find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Iter: 1 X:  [-2. -2.]
Iter: 2 X:  [2. 2.]
Iter: 3 X:  [-2. -2.]
Iter: 4 X:  [2. 2.]
Iter: 5 X:  [-2. -2.]
Iter: 6 X:  [2. 2.]
Iter: 7 X:  [-2. -2.]
Iter: 8 X:  [2. 2.]
Iter: 9 X:  [-2. -2.]
Iter: 10 X:  [2. 2.]
Iter: 11 X:  [-2. -2.]
Iter: 12 X:  [2. 2.]
Iter: 13 X:  [-2. -2.]
Iter: 14 X:  [2. 2.]
Iter: 15 X:  [-2. -2.]
Iter: 16 X:  [2. 2.]
Iter: 17 X:  [-2. -2.]
Iter: 18 X:  [2. 2.]
Iter: 19 X:  [-2. -2.]
Iter: 20 X:  [2. 2.]
Iter: 21 X:  [-2. -2.]
Iter: 22 X:  [2. 2.]
Iter: 23 X:  [-2. -2.]
Iter: 24 X:  [2. 2.]
Iter: 25 X:  [-2. -2.]
Iter: 26 X:  [2. 2.]
Iter: 27 X:  [-2. -2.]
Iter: 28 X:  [2. 2.]
Iter: 29 X:  [-2. -2.]
Iter: 30 X:  [2. 2.]
Iter: 31 X:  [-2. -2.]
Iter: 32 X:  [2. 2.]
Iter: 33 X:  [-2. -2.]
Iter: 34 X:  [2. 2.]
Iter: 35 X:  [-2. -2.]
Iter: 36 X:  [2. 2.]
Iter: 37 X:  [-2. -2.]
Iter: 38 X:  [2. 2.]
Iter: 39 X:  [-2. -2.]
Iter: 40 X:  [2. 2.]
Iter: 41 X:  [-2. -2.]
Iter: 42 X:  [2. 2.]
Iter: 43 X:  [-2. -2.]
Iter: 44 X:  [2. 2.]
Iter: 45 X:  [-2. -2.]
Iter: 46 X:  [2. 2

KeyboardInterrupt: ignored

##Observation

The minimizer keeps changing from (2,2) to (-2,-2) and vice versa . So, it goes upto infinity iterations. hence I stopped it.

$\large\textbf{Exercise 2.3}$

I. **Backtracking Line Search (without scaling)**

In [84]:
def find_minimizer(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0. 
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,gradf, alpha_start,rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,g_x)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)
    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,evalf(x),k



In [85]:
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter = find_minimizer(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [7.62525638e-10 7.62525638e-10]
Minimum value: 4.0
Num of iterations: 32


For Constant Step Length:


In [86]:
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter = find_minimizer(my_start_x, my_tol, CONSTANT_STEP_LENGTH )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [7.62525638e-10 7.62525638e-10]
Minimum value: 4.0
Num of iterations: 32


We get the same solution with same number of iterations using both the methods.

$\large\textbf{Exercise 2.4}$

I. **Constant step-length**

In [87]:
my_start_x = np.array([8.0,8.0])

In [92]:
def find_minimizer_gdscaling(start_x, tol, line_search_type, *args):
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
  x = start_x
  g_x = evalg(x)
  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
  k = 0
  while (np.linalg.norm(g_x) > tol):
    D_k = compute_D_k(x) 
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
      #raise ValueError('BACKTRACKING LINE SEARCH NOT YET IMPLEMENTED')
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1
    else:  
      raise ValueError('Line search type unknown. Please check!')
    #print(np.matmul(D_k, g_x))
     #call the new function you wrote to compute the steplength
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
  return x , evalf(x), k



In [93]:
my_start_x = np.array([8.0, 8.0])
my_tol= 1e-9

opt_x, opt_f, iter = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

  This is separate from the ipykernel package so we can avoid doing imports until


LinAlgError: ignored

In [94]:
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  #compute and return D_k
  hess = evalh(x)
  return np.linalg.inv(hess)

alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

  """
  This is separate from the ipykernel package so we can avoid doing imports until


LinAlgError: ignored

In both the above methods, we got an error of singular matrix. which indicates that these method are not applicable for this function for the given starting point. we awill now try this without scaling.

$\large\textbf{Exercise 2.5}$

**Backtracking line search(without scaling)** 

In [95]:
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter= find_minimizer(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [8.3177047e-10 8.3177047e-10]
Minimum value: 4.0
Num of iterations: 39


COnstant Step Length (without scaling):

In [96]:
opt_x, opt_f, iter= find_minimizer(my_start_x, my_tol, CONSTANT_STEP_LENGTH )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [1.10902729e-09 1.10902729e-09]
Minimum value: 4.0
Num of iterations: 38


Without scaling, we get the minimum of function in 38 and 39 iterations respectively. which indicates that this method are applicable to this starting point. The minimum values got are also almost same.