$\large\textbf{Exercise 1.}$\
The given function is :: $f(x) = 400* x_{1}^2 + 0.004x_{2}^{4}$

In [82]:
import numpy as np 

In [83]:
def evalf(x):  
  assert type(x) is np.ndarray and len(x) == 2
  return 400*(x[0]**2) + 0.004*(x[1])**4

In [84]:
def evalg(x):  
  assert type(x) is np.ndarray and len(x) == 2
  #after checking if the argument is valid, we can compute the gradient value
  #compute the gradient value and return it 
  return np.array([800*x[0], 4*0.004* (x[1]**3)])

In [85]:
def evalh(x): #Hessian matrix
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([[800.0, 0.0], [0.0, 12*0.004*(x[1]**2)]])

In [86]:
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  #compute and return D_k
  hess = evalh(x)
  return np.array([[1/hess[0][0],0],[0, 1/hess[1][1]]])

In [87]:
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 

  alpha = alpha_start
  p = -gradf
  #implement the backtracking line search
  while evalf(x + alpha*p) > (evalf(x)-(gamma*alpha*np.dot(p,p))):
    #while evalf(x + alpha*p) > evalf(x) + gamma * alpha* (np.matmul(np.matrix.transpose(gradf), p) ):  
    alpha = rho*alpha

  #print('final step length:',alpha)
  return alpha

In [88]:
def compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  p = -gradf
  D_k = compute_D_k(x)
  m = np.matmul(D_k,p)
  while evalf(x)<evalf(x+alpha*m)+(np.matmul(np.matrix.transpose(gradf), m))*alpha*gamma:
    alpha = alpha*rho
  return alpha


In [89]:
#line search type 
EXACT_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH = 2
CONSTANT_STEP_LENGTH = 3

In [90]:
def find_minimizer_gdscaling(start_x, tol, line_search_type, *args):
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
  x = start_x
  g_x = evalg(x)
  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
  k = 0
  while (np.linalg.norm(g_x) > tol):
    D_k = compute_D_k(x)
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
      #raise ValueError('BACKTRACKING LINE SEARCH NOT YET IMPLEMENTED')
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
     #call the new function you wrote to compute the steplength
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    gradf = evalg(x)
    g_x = evalg(x) #compute gradient at new point    
  return x , evalf(x), k



Que-3: \\
I. **Constant Step Length**

In [91]:
my_start_x = np.array([2.0, 2.0])
my_tol= 1e-9

opt_x, opt_f, iter = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [0.         0.00304488]
Minimum value: 3.4382653805813626e-13
Num of iterations: 16


II. **Backtracking Line Search**

In [92]:
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter= find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [0.         0.00304488]
Minimum value: 3.4382653805813626e-13
Num of iterations: 16


##Observation

The newton's method with constant step length and with backtracking line search both converge exactly in 16 iterations. The minimum function value in both cases is same and sufficiently close to the optimal value. For the minimizer the optimal value is [0, 0] . The first coordinate is 0 in both cases while second coordinate is 0.003 which can be improved further to precision, using high tolerance value. \\
Both methods of line search behave the same in this case, because the backtracking line search returns step length value 1.0 at each iteration for this particular function, which is same as the constant step length 1.0

Que-4: \\
**I. Backtracking line search (without scaling)**

In [93]:
def find_minimizer(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0. 
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,gradf, alpha_start,rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,g_x)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)
    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,k

In [94]:
opt_x, opt_f, iter = find_minimizer(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

KeyboardInterrupt: ignored

Here I had to interrupt the code as it was running for quite long time.

II. **Backtracking Line-search(with scaling, using diagonal matrix)**

In [95]:
def find_minimizer_gdscaling(start_x, tol, line_search_type, *args):
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 
  x = start_x
  g_x = evalg(x)
  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]
  k = 0
  while (np.linalg.norm(g_x) > tol):
    D_k = compute_D_k(x)
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,g_x, alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
      #raise ValueError('BACKTRACKING LINE SEARCH NOT YET IMPLEMENTED')
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
     #call the new function you wrote to compute the steplength
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
  return x , evalf(x), k



In [96]:
my_start_x = np.array([2.0, 2.0])
my_tol= 1e-9
alpha_start = 1.0
rho = 0.5
gamma = 0.5
opt_x, opt_f, iter = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start, rho, gamma )
print("Minimizer:",opt_x)
print("Minimum value:",opt_f)
print("Num of iterations:",iter)

Minimizer: [0.         0.00304488]
Minimum value: 3.4382653805813626e-13
Num of iterations: 16


##Observation:
The Newton's method is equivalent to scaling with a diagonal matrix for this function, since the hessian is itself a diagonal matrix. Let D = $diag(d_{1}, d_{2},... d_{k})$ be a Diagonal matrix. Then $D^{-1} = diag(1/d_{1},1/ d_{2},... 1/d_{k})$, which is same as the matrix used for diagonal scaling. Thus the diagonal scaling method behaves exactly the same as the two variants of Newton's method.