$\textbf{Direct OLSLR}$

In [142]:
import numpy as np
from sklearn.datasets import load_digits

In [143]:
digits = load_digits()
#check the shape of digits data
print(digits.data.shape)

(1797, 64)


In [144]:
#check the shape of digits target
print(digits.target.shape)

(1797,)


In [145]:
#let us use the linear regression used in the previous lab
N = digits.data.shape[0] #Number of data points
n = digits.data.shape[1] #Dimension of data points
A = digits.data

In [146]:
#In the following code, we create a Nx1 vector of target labels
y = 1.0*np.ones([A.shape[0],1])
for i in range(digits.target.shape[0]):
  y[i] = digits.target[i]

In [147]:
#Now we will define a function which will compute and return the function value 
def evalf(x, n):  
  #Input: x is a numpy array of size n 
  assert type(x) is np.ndarray  #do not allow arbitrary type arguments 
  assert len(x) == n #do not allow arbitrary size arguments 
  #after checking if the argument is valid, we can compute the objective function value
  #compute the function value and return it 
  #A = np.random.randn(800, 2)
  #xy_min = [-4, -2]
  #xy_max = [6, 8]
  #x_bar = np.random.uniform(low=xy_min, high=xy_max, size=(1,2))
  #x_bar = x_bar.T
  #epsilon = np.random.randn(800, 1)
  #A_x = np.matmul(A, x_bar)
  #y = A_x + epsilon
  fval = np.linalg.norm(np.matmul(A,x) - y)
  fval = 0.5 * (fval)**2
  return (fval)



In [148]:
#Now we will define a function which will compute and return the gradient value as a numpy array 
def evalg(x, n):
  assert type(x) is np.ndarray 
  assert len(x) == n
  return np.matmul(A.T, np.matmul(A, x) - y)


In [149]:
def evalh(x,n):
  assert type(x) is np.ndarray  #do not allow arbitrary type arguments 
  assert len(x) == n #do not allow arbitrary size arguments 
  return np.matmul(A.T,A)


In [150]:
def compute_B_k(s, y, B_k, n):
  assert type(s) is np.ndarray #do not allow arbitrary type arguments 
  assert type(y) is np.ndarray #do not allow arbitrary type arguments 
  assert type(B_k) is np.ndarray #do not allow arbitrary type arguments 
  assert len(s) == n and len(y) == n #do not allow arbitrary size arguments 
  mu = 1 / np.dot(np.squeeze(np.asarray(y)), np.squeeze(np.asarray(s)))
  I = np.identity(n)
  a1 = np.outer(np.multiply(mu, s), np.transpose(y))
  a2 = np.outer(np.multiply(mu, y), np.transpose(s))
  a = np.subtract(I, a1)
  b = np.subtract(I, a2)
  c = np.outer(np.multiply(mu, s), np.transpose(s))
  d = np.matmul(np.matmul(a, B_k), b)
  z = np.add(d, c)
  return z

In [151]:
#line search type 
EXACT_LINE_SEARCH = 1
BACKTRACKING_LINE_SEARCH = 2
CONSTANT_STEP_LENGTH = 3

In [152]:
def compute_D_k(x,n):
  assert type(x) is np.ndarray
  assert len(x) == n
  mat = evalh(x,n)
  return np.linalg.inv(mat)

In [153]:
def compute_steplength_backtracking_scaled_direction(x, gradf, alpha_start, rho, gamma, B_k): #add appropriate arguments to the function 
  assert type(x) is np.ndarray
  assert type(gradf) is np.ndarray
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  n = len(x)
  alpha = alpha_start
  p = - gradf
  D_k = B_k
  #implement the backtracking line search
  #while evalf(x + alpha*p, n) > evalf(x, n) + gamma * alpha* (np.matmul(np.matrix.transpose(p), p)):
  while evalf(x + alpha*np.matmul(D_k,p), n) > evalf(x, n) + gamma * alpha* (np.matmul(np.matrix.transpose(gradf), np.matmul(D_k,p)) ):
    alpha = alpha * rho
  return alpha  

In [154]:
def find_minimizer_Newtonmethod(start_x, n, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size n, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary type arguments 
  assert len(start_x) == n #do not allow arbitrary size arguments 
  assert type(tol) is float and tol>=0 
  
  x = start_x
  g_x = evalg(x,n)
  h_x = evalh(x,n)

  if line_search_type == BACKTRACKING_LINE_SEARCH:
    if args is None:
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive any args. Please check!'
      raise ValueError(err_msg)
    elif len(args)<3 :
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive three args. Please check!'
      raise ValueError(err_msg)
    else:
      alpha_start = float(args[0])
      rho = float(args[1])
      gamma = float(args[2])
  k = 0
  
  #print('iter:',k,  ' f(x):', evalf(x,n), ' gradient norm:', np.linalg.norm(g_x))
  step_length = 1.
  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    #implement the Newton's method here
    D_k = np.linalg.inv(evalh(x, n))
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x, g_x, alpha_start, rho, gamma, D_k)
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.
    else:
      raise ValueError('Line search type unknown. Please check!')
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x))) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x, n) #compute gradient at new point
  return x,  k

In [156]:
def find_minimizer_BFGS_scaling(start_x, n, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size n, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary type arguments 
  assert len(start_x) == n #do not allow arbitrary size arguments 
  assert type(tol) is float and tol>=0 
  
  x = start_x
  g_x = evalg(x,n)

  if line_search_type == BACKTRACKING_LINE_SEARCH:
    if args is None:
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive any args. Please check!'
      raise ValueError(err_msg)
    elif len(args)<3 :
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive three args. Please check!'
      raise ValueError(err_msg)
    else:
      alpha_start = float(args[0])
      rho = float(args[1])
      gamma = float(args[2])

  k = 0
  B_k = np.identity(n)
  x_old = x

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x_old, g_x, alpha_start, rho, gamma, B_k)
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.
    else:
      raise ValueError('Line search type unknown. Please check!')

    x_new = np.subtract(x_old, np.multiply(step_length, np.matmul(B_k,g_x))) 
    g_x = evalg(x_new,n)
    s = np.subtract(x_new, x_old)
    y = np.subtract(evalg(x_new,n), evalg(x_old,n)) 
    B_k = compute_B_k(s, y, B_k, n)
    x_old = x_new
    k += 1
  return x_new, k

**Question.1 (Partial solution)- for direct OLSLR >>> using Newton method (BACKTRACKING_LINE_SEARCH).**

In [131]:
alpha = 0.9
rho = 0.5
gamma = 0.5
my_tol= 1e-5
my_start_x =np.zeros((n, 1))
x, k = find_minimizer_Newtonmethod(my_start_x, n, my_tol, BACKTRACKING_LINE_SEARCH, alpha, rho, gamma)
print(x, k)

LinAlgError: ignored

**Question.2 (Partial solution)- for direct OLSLR >>> using BFGS method (BACKTRACKING_LINE_SEARCH).**

In [157]:
alpha = 0.9
rho = 0.5
gamma = 0.5
my_tol= 1e-5
my_start_x =np.zeros((64, 1))
x, k = find_minimizer_BFGS_scaling(my_start_x, n, my_tol, BACKTRACKING_LINE_SEARCH, alpha, rho, gamma)
print(x)


[[ 0.00000000e+00]
 [ 9.69033568e-02]
 [-4.32277232e-03]
 [-7.76028319e-03]
 [ 7.49594380e-02]
 [ 1.13947198e-02]
 [-2.71328245e-02]
 [-7.33176333e-03]
 [ 9.98337968e-01]
 [-2.88095538e-02]
 [ 1.18688288e-01]
 [ 6.60916265e-02]
 [-5.57069862e-02]
 [-6.97063705e-02]
 [ 9.65876439e-02]
 [ 2.55182251e-01]
 [-7.29828608e-01]
 [ 2.42709916e-02]
 [ 7.73249597e-02]
 [-2.33000278e-02]
 [-5.64086144e-02]
 [ 5.72426822e-02]
 [-4.88717684e-02]
 [-2.62467763e-01]
 [-9.06562829e-01]
 [-1.49767791e-01]
 [ 5.64019538e-02]
 [ 8.96663590e-02]
 [ 8.39318159e-02]
 [ 9.85411936e-02]
 [ 1.69317614e-03]
 [-2.96805758e+00]
 [ 0.00000000e+00]
 [-1.54362338e-01]
 [-9.32361205e-03]
 [ 1.39497628e-01]
 [-3.69234835e-02]
 [ 5.46111776e-02]
 [-9.20505070e-03]
 [ 0.00000000e+00]
 [ 1.03279535e-01]
 [ 1.23983258e-01]
 [-1.37639605e-02]
 [ 5.40087816e-03]
 [ 1.31185107e-01]
 [ 5.49570758e-02]
 [ 2.24938237e-02]
 [ 7.47977909e-03]
 [ 6.17755030e-01]
 [ 2.44122357e-02]
 [ 1.42333037e-03]
 [-6.21110760e-02]
 [-2.0702503

$\textbf{Now, regularized OLSLR}$

In [158]:
#Now we will define a function which will compute and return the function value 
def evalf(x, n, lam):  
  #Input: x is a numpy array of size n 
  assert type(x) is np.ndarray  
  assert len(x) == n 
  return 0.5*(np.linalg.norm(np.matmul(A,x) - y))**2 + 0.5*lam*np.matmul(x.T,x)

In [159]:
def evalg(x, n, lam):
  assert type(x) is np.ndarray
  assert len(x) == n
  return lam*x + np.matmul(A.T, np.matmul(A, x) - y)

In [160]:
def evalh(x,n,lam):
  assert type(x) is np.ndarray  #do not allow arbitrary type arguments 
  assert len(x) == n #do not allow arbitrary size arguments 
  return lam*np.eye(n) + np.matmul(A.T, A)

In [161]:
def compute_steplength_backtracking_scaled(x,n,lam, gradf, direction, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == n
  assert type(gradf) is np.ndarray and len(gradf) == n
  #assert type(direction) is np.ndarray and len(direction) == 2  
   
  #assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
   
  #Complete the code 
  alpha = alpha_start
  gradf = evalg(x,n,lam)
  p=direction
  #np.matmul(np.matrix.transpose(gradf), p)
  while evalf(x+alpha*p,n,lam) > evalf(x,n,lam) + gamma*alpha*np.matmul(np.matrix.transpose(gradf), p) :
    alpha = rho*alpha 
  return alpha

In [162]:
import math

In [163]:
def find_minimizer_Newtonmethod(start_x, n,lam, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size n, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary type arguments 
  assert len(start_x) == n #do not allow arbitrary size arguments 
  assert type(tol) is float and tol>=0 
  
  x = start_x
  g_x = evalg(x,n,lam)
  h_x = evalh(x,n,lam)

  if line_search_type == BACKTRACKING_LINE_SEARCH:
    if args is None:
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive any args. Please check!'
      raise ValueError(err_msg)
    elif len(args)<3 :
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive three args. Please check!'
      raise ValueError(err_msg)
    else:
      alpha_start = float(args[0])
      rho = float(args[1])
      gamma = float(args[2])
  k = 0
  
  #print('iter:',k,  ' f(x):', evalf(x,n), ' gradient norm:', np.linalg.norm(g_x))
  x_newton =  []
  f_newton = []
  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    #implement the Newton's method here
    D_k=np.linalg.inv(evalh(x,n,lam))
    direction = np.matmul(D_k,-g_x)
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled(x,n,lam,g_x, direction, alpha_start, rho, gamma)  
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
      
    else:  
      raise ValueError('Line search type unknown. Please check!')
    #x_newton.append(math.log(np.linalg.norm(x - x_bar)))
    #f_newton.append(math.log(np.linalg.norm(evalf(x,n,lam) - evalf(x_bar,n,lam))))
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k, g_x)))
    k += 1 #increment iteration
    g_x = evalg(x, n,lam) #compute gradient at new point
    
  return x, evalf(x,n,lam), k,x_newton,f_newton

In [164]:
def find_minimizer_BFGS_method(start_x, n,lam, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size n, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary type arguments 
  assert len(start_x) == n #do not allow arbitrary size arguments 
  assert type(tol) is float and tol>=0 
  
  x = start_x
  x0 = x
  g_x = evalg(x,n,lam)
  g0 = g_x

  
  if line_search_type == BACKTRACKING_LINE_SEARCH:
    if args is None:
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive any args. Please check!'
      raise ValueError(err_msg)
    elif len(args)<3 :
      err_msg = 'Line search type: BACKTRACKING_LINE_SEARCH, but did not receive three args. Please check!'
      raise ValueError(err_msg)
    else:
      alpha_start = float(args[0])
      rho = float(args[1])
      gamma = float(args[2])
  k = 0
  
  #print('iter:',k,  ' f(x):', evalf(x,n), ' gradient norm:', np.linalg.norm(g_x))
  x_bfgs = []
  f_bfgs = []
  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    #implement the Newton's method here
    
    x0 = x
    g_x = evalg(x,n,lam)
    g0 = g_x

    if k==0:
      B_k=np.identity(n)
    else:

      I = np.identity(n)
      
      mu_k = 1/np.matmul(np.transpose(y_k),s_k)

      B_k = np.add(np.matmul(np.matmul(np.subtract(I, mu_k*np.outer( s_k, np.transpose(y_k))),B_k), np.subtract(I, mu_k*np.outer(y_k,np.transpose(s_k)))), mu_k*np.outer( s_k, np.transpose(s_k)))
    direction = np.matmul(B_k,-g_x)
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled(x,n,lam,g_x, direction, alpha_start, rho, gamma)     
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    #x_bfgs.append(math.log(np.linalg.norm(x0 - x_bar)))
    #f_bfgs.append(math.log(np.linalg.norm(evalf(x0,n,lam) - evalf(x_bar,n,lam))))
    x = np.subtract(x, np.multiply(step_length,np.matmul(B_k, g_x)))
    g_x = evalg(x, n,lam)
    s_k = x-x0 
    y_k=  g_x-g0
    k += 1 #increment iteration
    #g_x = evalg(x, n,lam) #compute gradient at new point
   
  return x, evalf(x,n,lam), k,x_bfgs,f_bfgs

**EX.1 solution- for The $\textbf{regularized OLSLR}$ with
lambda = 0.1   >>>> NEWTON METHOD (BACKTRACKING_LINE_SEARCH)**

In [171]:
alpha = 0.9
rho = 0.5
gamma = 0.5
my_tol= 1e-4
lam=0.1
#Starting_Point=[np.zeros((2, 1)), np.array([[50],[50]]), np.array([[-5],[-5]])]
my_start_x = np.zeros((n, 1))
x, opt_fval, num_iters, x_newton,f_newton = find_minimizer_Newtonmethod(my_start_x, n,lam, my_tol, BACKTRACKING_LINE_SEARCH, alpha, rho, gamma)
print("optimum_x_value is:\n",x)
print("Number of Iterations:",num_iters)

optimum_x_value is:
 [[ 0.00000000e+00]
 [ 9.72176393e-02]
 [-4.25221013e-03]
 [-7.65725749e-03]
 [ 7.49359297e-02]
 [ 1.13924666e-02]
 [-2.68134810e-02]
 [-8.48370171e-03]
 [ 9.91208545e-01]
 [-2.87397984e-02]
 [ 1.18690196e-01]
 [ 6.61518400e-02]
 [-5.57615717e-02]
 [-6.96340237e-02]
 [ 9.62813519e-02]
 [ 2.56470858e-01]
 [-7.28979627e-01]
 [ 2.42825856e-02]
 [ 7.72526071e-02]
 [-2.33770172e-02]
 [-5.63320407e-02]
 [ 5.71246069e-02]
 [-4.84767009e-02]
 [-2.70744170e-01]
 [-8.60889237e-01]
 [-1.49941949e-01]
 [ 5.64334649e-02]
 [ 8.96806467e-02]
 [ 8.39114973e-02]
 [ 9.85243348e-02]
 [ 1.64759992e-03]
 [-2.82145749e+00]
 [ 0.00000000e+00]
 [-1.54275472e-01]
 [-9.36618641e-03]
 [ 1.39528972e-01]
 [-3.69438111e-02]
 [ 5.46098301e-02]
 [-9.13188784e-03]
 [ 0.00000000e+00]
 [ 1.07369006e-01]
 [ 1.23996365e-01]
 [-1.37231270e-02]
 [ 5.34871565e-03]
 [ 1.31237767e-01]
 [ 5.50202749e-02]
 [ 2.24738205e-02]
 [ 7.53480641e-03]
 [ 5.95009063e-01]
 [ 2.42332551e-02]
 [ 1.44538782e-03]
 [-6.21495

**Here, we were not able to find the optimum value for tolerance = 10^(-5) because for that it was not converging and  thats why I have taken tolerance as  equal to 10^(-4), for which the optimum value of x is quite easily obtained in just 15 iterations.**

**EX.2 solution- for The $\textbf{regularized OLSLR}$ with
lambda = 0.1   >>>> BFGS Method (BACKTRACKING_LINE_SEARCH)** 

In [166]:
alpha = 0.9
rho = 0.5
gamma = 0.5
my_tol= 1e-5
lam=0.1
x, opt_fval, num_iters, x_bfgs, f_bfgs= find_minimizer_BFGS_method(my_start_x,n,lam,my_tol,BACKTRACKING_LINE_SEARCH,0.9, 0.5,0.5)
print("Optimum_X is:")
print(x)

Optimum_X is:
[[ 0.00000000e+00]
 [ 9.72176393e-02]
 [-4.25221012e-03]
 [-7.65725749e-03]
 [ 7.49359298e-02]
 [ 1.13924666e-02]
 [-2.68134811e-02]
 [-8.48370171e-03]
 [ 9.91208545e-01]
 [-2.87397984e-02]
 [ 1.18690196e-01]
 [ 6.61518400e-02]
 [-5.57615717e-02]
 [-6.96340237e-02]
 [ 9.62813519e-02]
 [ 2.56470858e-01]
 [-7.28979628e-01]
 [ 2.42825856e-02]
 [ 7.72526071e-02]
 [-2.33770172e-02]
 [-5.63320407e-02]
 [ 5.71246069e-02]
 [-4.84767009e-02]
 [-2.70744170e-01]
 [-8.60889236e-01]
 [-1.49941949e-01]
 [ 5.64334649e-02]
 [ 8.96806467e-02]
 [ 8.39114973e-02]
 [ 9.85243348e-02]
 [ 1.64759992e-03]
 [-2.82145749e+00]
 [ 0.00000000e+00]
 [-1.54275472e-01]
 [-9.36618641e-03]
 [ 1.39528972e-01]
 [-3.69438111e-02]
 [ 5.46098301e-02]
 [-9.13188785e-03]
 [ 0.00000000e+00]
 [ 1.07369006e-01]
 [ 1.23996365e-01]
 [-1.37231270e-02]
 [ 5.34871565e-03]
 [ 1.31237767e-01]
 [ 5.50202750e-02]
 [ 2.24738205e-02]
 [ 7.53480639e-03]
 [ 5.95009063e-01]
 [ 2.42332551e-02]
 [ 1.44538782e-03]
 [-6.21495531e-02

**Here We are not able to find the optimum value using Direct OLSLR Newton method because hessian becomes non-invertible (singular matrix). But for Direct OLSLR BFGS method we are able to obtain the optimum values.**

**Also, from the above outputs it is obvious that in case of The regularized OLSLR Newton method we were not able to find the optimum value corresponding to the tolerance = 10^(-5) but for  tolerance it was easily obtained in just 15 iterations**