#**EXERCISE-1**

In [None]:
import numpy as np
import math
from sklearn.datasets import load_digits
from tabulate import tabulate 

In [None]:
digits = load_digits()
print(digits.data.shape)
print(digits.target.shape)

(1797, 64)
(1797,)


In [None]:
A = digits.data
y = 1.0*np.ones([A.shape[0],1])
for i in range(digits.target.shape[0]):
  y[i] = digits.target[i]

# Que 1)

In [None]:
def evalf(x,lamda):
  assert type(x) is np.ndarray 
  return (lamda/2)*np.matmul(x.T,x)+0.5*(np.linalg.norm(np.matmul(A,x) - y))**2

In [None]:
def evalg(x, lamda):
  assert type(x) is np.ndarray
  return np.add(np.matmul(A.T, np.subtract(np.matmul(A, x), y)), lamda * x)

In [None]:
def evalh(x, lamda):
  assert type(x) is np.ndarray
  return np.add(np.matmul(A.T,A), lamda * np.identity(len(x)))

In [None]:
BACKTRACKING_LINE_SEARCH = 1

In [None]:
def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma,lamda):
  assert type(x) is np.ndarray 
  assert type(gradf) is np.ndarray 
  assert type(direction) is np.ndarray 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  while evalf(x+alpha*direction, lamda) > (evalf(x,lamda) + gamma*alpha*np.matmul(gradf.T,direction)):
    alpha=rho*alpha

  return alpha

In [None]:
def find_minimizer_Newton(start_x, tol, line_search_type,lamda, *args):
  assert type(start_x) is np.ndarray 
  assert type(tol) is float and tol>=0 
  
  x = start_x
  g_x = evalg(x,lamda)

  alpha_start = float(args[0])
  rho = float(args[1])
  gamma = float(args[2])

  k = 0
  while (np.linalg.norm(g_x) > tol):
    D_k = np.linalg.inv(evalh(x,lamda))
    p_k = -np.matmul(D_k,g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x,p_k, alpha_start, rho, gamma,lamda)
     

    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x)))
    k += 1
    g_x = evalg(x,lamda) 

  return x,  k, evalf(x,lamda)

In [None]:
#For direct OLSLR take lamda = 0
my_start_x =np.zeros((64, 1))
my_tol= 1e-3
lamda = 0
x, k, f_value = find_minimizer_Newton(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,lamda, 0.9 , 0.5, 0.5)


LinAlgError: ignored

In [None]:
#For regularized take lamda = 0.001
my_start_x =np.zeros((64, 1))
my_tol= 1e-3
lamda = 0.001
x, k, f_value = find_minimizer_Newton(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,lamda, 0.9 , 0.5, 0.5)

table = [["x_opt","min value", "Number of iter"]]
table.append([x, f_value, k])
print(tabulate(table, headers = 'firstrow', tablefmt = 'fancy_grid'))

╒═════════════════════╤═════════════╤══════════════════╕
│ x_opt               │   min value │   Number of iter │
╞═════════════════════╪═════════════╪══════════════════╡
│ [[ 0.00000000e+00]  │     3064.45 │                9 │
│  [ 9.69076881e-02]  │             │                  │
│  [-4.32192761e-03]  │             │                  │
│  [-7.75916338e-03]  │             │                  │
│  [ 7.49591986e-02]  │             │                  │
│  [ 1.13946582e-02]  │             │                  │
│  [-2.71293921e-02]  │             │                  │
│  [-7.34410663e-03]  │             │                  │
│  [ 9.98267906e-01]  │             │                  │
│  [-2.88089376e-02]  │             │                  │
│  [ 1.18688356e-01]  │             │                  │
│  [ 6.60922719e-02]  │             │                  │
│  [-5.57075914e-02]  │             │                  │
│  [-6.97056149e-02]  │             │                  │
│  [ 9.65844013e-02]  │        

Yes, we faced difficulty in direct OLSLR because inverse of a matrix will not always exist. This issue can be solved using regularization ,that is via Regularized OLSR.

**Que 2)**

In [None]:
def find_minimizer_BFGS(start_x, tol, line_search_type, lamda, *args):
  assert type(start_x) is np.ndarray
  assert type(tol) is float and tol>=0 
  
  x = start_x
  g_x = evalg(x, lamda)
  I = np.identity(len(x))
  B_k = I

  alpha_start = float(args[0])
  rho = float(args[1])
  gamma = float(args[2])

  k = 0
  while (np.linalg.norm(g_x) > tol): 
    p_k = -np.matmul(B_k, g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x,p_k, alpha_start, rho, gamma,lamda)

    x_prev = x
    s_k = np.multiply(step_length,p_k)  #s_k = x_k+1 - x_k equivalently s_k = alpha*p_k
    x = np.add(x, s_k)
    y_k = evalg(x,lamda)-evalg(x_prev,lamda)

    u_k = 1/(np.matmul(y_k.T,s_k))
    a_1 = np.subtract(I , u_k*np.matmul(s_k,y_k.T))
    a_2 = np.subtract(I , u_k*np.matmul(y_k, s_k.T))
    B_k = np.matmul(np.matmul(a_1,B_k),a_2) + u_k*np.matmul(s_k,s_k.T)

    k += 1 #increment iteration
    g_x = evalg(x,lamda) #compute gradient at new point

  return x , k, evalf(x,lamda)

In [None]:
#for unregularized taken lamda = 0 
my_start_x =np.zeros((64, 1))
my_tol= 1e-3
lamda = 0
x_bfgs, k_bfgs, fun_val_bfgs= find_minimizer_BFGS(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,lamda, 0.9, 0.5 , 0.5)

table_bfgs= [["x_opt","min value", "Number of iter"]]
table_bfgs.append([x_bfgs, fun_val_bfgs, k_bfgs])
print(tabulate(table_bfgs, headers = 'firstrow', tablefmt = 'fancy_grid'))

╒═════════════════════╤═════════════╤══════════════════╕
│ x_opt               │   min value │   Number of iter │
╞═════════════════════╪═════════════╪══════════════════╡
│ [[ 0.00000000e+00]  │     3064.45 │               69 │
│  [ 9.69033631e-02]  │             │                  │
│  [-4.32277345e-03]  │             │                  │
│  [-7.76028323e-03]  │             │                  │
│  [ 7.49594382e-02]  │             │                  │
│  [ 1.13947197e-02]  │             │                  │
│  [-2.71328244e-02]  │             │                  │
│  [-7.33176357e-03]  │             │                  │
│  [ 9.98337971e-01]  │             │                  │
│  [-2.88095548e-02]  │             │                  │
│  [ 1.18688288e-01]  │             │                  │
│  [ 6.60916266e-02]  │             │                  │
│  [-5.57069865e-02]  │             │                  │
│  [-6.97063707e-02]  │             │                  │
│  [ 9.65876436e-02]  │        

In [None]:
#For regularized lamda = 0.001
my_start_x =np.zeros((64, 1))
my_tol= 1e-3
lamda = 0.001
x_bfgs, k_bfgs, fun_val_bfgs= find_minimizer_BFGS(my_start_x,  my_tol, BACKTRACKING_LINE_SEARCH,lamda, 0.9 , 0.5 , 0.5 )

table_bfgs1= [["x_opt","min value", "Number of iter"]]
table_bfgs1.append([x_bfgs, fun_val_bfgs, k_bfgs])
print(tabulate(table_bfgs1, headers = 'firstrow', tablefmt = 'fancy_grid'))

╒═════════════════════╤═════════════╤══════════════════╕
│ x_opt               │   min value │   Number of iter │
╞═════════════════════╪═════════════╪══════════════════╡
│ [[ 0.00000000e+00]  │     3064.45 │               69 │
│  [ 9.69076943e-02]  │             │                  │
│  [-4.32192872e-03]  │             │                  │
│  [-7.75916341e-03]  │             │                  │
│  [ 7.49591989e-02]  │             │                  │
│  [ 1.13946581e-02]  │             │                  │
│  [-2.71293919e-02]  │             │                  │
│  [-7.34410690e-03]  │             │                  │
│  [ 9.98267911e-01]  │             │                  │
│  [-2.88089386e-02]  │             │                  │
│  [ 1.18688356e-01]  │             │                  │
│  [ 6.60922721e-02]  │             │                  │
│  [-5.57075918e-02]  │             │                  │
│  [-6.97056151e-02]  │             │                  │
│  [ 9.65844012e-02]  │        

I can observe that ,I didnot faced any difficulty in direct OLSLR by BFGS method.Also we got almost same optimizers by direct OLSLR and by regularized OLSLR and converging with same number of iterations.