$\textbf{Linear regression with gradient descent}$

In [313]:
import numpy as np

In [314]:
# Simple gradient descent 
# Input: Df=gradient, x0=initial guess, N=maximal number of steps, eps=tolerance
# Output: x=global minimum, steps=number of steps, norm of the gradient
def GD(Df, x0, eta, N, eps):
    x=x0
    Gradf_norm =  np.dot(Df(x0),Df(x0))**0.5 
    steps = 0
    while abs(Gradf_norm) > eps and steps < N:
        #print(x)
        Df_value=Df(x)
        Gradf_norm =  np.dot(Df_value,Df_value)**0.5
        x = x-eta*Df_value
        #print('Gradf_norm =',Gradf_norm)
        steps = steps + 1
    # Either a solution is found, or too many iterations
    if abs(Gradf_norm) > eps:
        steps = -1
    return x, steps, Gradf_norm

In [315]:
def linear_model(t, x):
    return x[0]+t*x[1]

In [316]:
#Least square function 
def MLSE(x, t, y):
    m = y.size
    return (1/m)*np.dot(linear_model(t, x)-y,linear_model(t, x)-y)

In [317]:
# Gradient of least square function 
def dMLSE(x, t, y):
    m = y.size
    return (1/m)*np.array([2*np.dot((linear_model(t, x)-y),np.ones(m)),
                    2*np.dot((linear_model(t, x)-y),t)])

In [318]:
def f(x):
    return MLSE(x,td,yd)
def df(x):
    return dMLSE(x,td,yd)

In [319]:
GD(df, x0=np.array([10,10]), eta=.06, N=1000, eps=0.001)

(array([-0.74409975,  3.15082899]), 367, 0.0009835324664540243)

In [320]:
# Backtracking function
#Input: Df=gradient, x0=initial guess,
#        hyperparameters eta=intial stepsize,  factors alpha, beta, 
#        e.g., eta=1, alpha=0.5, beta=0.8
#        cout=maximal number of steps 
# Output: eta=adjusted step size
def backtrack(f,Df,x0, eta, alpha, beta, count):
    while ((f(x0) - (f(x0 - eta*df(x0)) + alpha * eta * np.dot(df(x0),df(x0))))< 0):
        eta = eta*beta
        #print("Iteration", cout,"Inequality: ",  f(x0) \
        #      -(f(x0 - eta*df(x0))+ alpha * eta * np.dot(df(x0),df(x0))))
        count=count+ 1      
    return eta

In [321]:
# Gradient descent with backtracking
# Input: Df=gradient, x0=initial guess, 
#        hyperparameters eta=intial stepsize,  factors alpha, beta, 
#        e.g., eta=1, alpha=0.5, beta=0.8
#        N=maximal number of steps, eps=tolerance
# Output: x=global minimum, steps=number of steps, norm of the gradient
def GD_bt(f,Df, x0, eta, alpha, beta, N, eps):
    x=x0
    Gradf_norm =  (np.dot(Df(x0),Df(x0)))**0.5
    steps = 0
    while abs(Gradf_norm) > eps and steps < N:      
        #Condition to adjust the step-size comment out to omit backtracking
        eta=backtrack(f,Df, x0, eta, alpha, beta, N)
        x = x-eta*Df(x)
        Gradf_norm =  (np.dot(Df(x),Df(x)))**0.5
        steps = steps + 1 
    print('Final eta=',eta)
    # Either a solution is found, or too many iterations
    if abs(Gradf_norm) > eps:
        steps = -1
    return x, steps, Gradf_norm

In [322]:
GD_bt(f,df, x0=np.array([10,10]), eta=1.0, alpha=0.1, beta=0.8, N=1000, eps=1e-3)

Final eta= 0.054975581388800036


(array([-0.74405168,  3.15081776]), 400, 0.0009800105037305299)