# Line Search
This notebooke will explore implementing backtracking linesearch into out gradient descent algorithm.
See [here](http://users.ece.utexas.edu/~cmcaram/EE381V_2012F/Lecture_4_Scribe_Notes.final.pdf)

In [101]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gradient_descent as gd
import math

In [2]:
x = np.array([1,2,3,4])
y = np.array([2,3,4,5])**2
features, weights = gd.gen_predicted(x, 2)

In [3]:
n=1
b=.5
a=.25

In [20]:
left = gd.get_RSS(y, features, (weights-n*gd.get_RSS_partial(y, features, weights)))
left

array([  6.59344000e+05,   7.50212100e+06,   3.40588960e+07,
         1.02070609e+08])

In [21]:
right = gd.get_RSS(y, features, weights)-a*n*(np.linalg.norm(gd.get_RSS_partial(y, features, weights))**2)
right

array([-93202., -93137., -92962., -92593.])

In [22]:
left <= right

array([False, False, False, False], dtype=bool)

$$f(x^{k} - \eta^{k} \nabla f(x^{k}) \leq f(x^{k} - \alpha n^{k} \Vert \nabla f(x^{k}) \Vert ^2$$

In [51]:
def is_step_valid(y, features, weights, partial, step, a=0.4):
    RSS = gd.get_RSS(y, features, weights)
    return (gd.get_RSS(y, features, (weights-step*partial)) 
            <= RSS-a*step*(np.linalg.norm(partial)**2)).all()

In [61]:
%%timeit
n=1
b=.8
partial = gd.get_RSS_partial(y, features, weights)
while not is_step_valid(y, features, weights, partial, n):
    n = b*n
    partial = gd.get_RSS_partial(y, features, weights)

100 loops, best of 3: 4.81 ms per loop


## Gradient descent with backtracking line search

In [86]:
def gradient_descent(y, features, weights, step_size_initial, tolerance, step_function, params={}):
    y_copy = y.copy()
    features_copy = features.copy()
    weights_copy = weights.copy()
    n=step_size_initial
    i=1
    while True:
        partial = gd.get_RSS_partial(y_copy, features_copy, weights_copy)
        n = step_function(n, i, y_copy, features_copy, weights_copy, params)
        weights_copy = weights_copy+n*partial
        if np.linalg.norm(partial)<tolerance:
            break
        i=i+1
        if i%50000==0:
            print("loop:", i, ' | n:', n, ' | weights:', weights_copy)
    return weights_copy

In [87]:
def backtracking_line_search(n, i, y, features, weights, params):
    partial = gd.get_RSS_partial(y, features, weights)
    while not is_step_valid(y, features, weights, partial, n):
        n=b*n
    return n

In [None]:
model = gradient_descent(y, features, weights, 1, 0.01, backtracking_line_search)
model

Ok well backtracking line search has failed us...
Lets try a search with a step schedule

In [102]:
def step_schedule(n, i, y, features, weights, params):
    return params['a']/math.sqrt(i)

In [90]:
def fixed(n, i, y, features, weights, params):
    return n

In [92]:
model = gradient_descent(y, features, weights, 0.0009, 0.001, fixed)
model

loop: 50000  | n: 0.0009  | weights: [ 1.01434572  1.98694881  1.00247153]


array([ 1.01034982,  1.99058413,  1.0017831 ])

In [110]:
model = gradient_descent(y, features, weights, 1, 0.01, step_schedule, { 'a' : .03 })
model

loop: 50000  | n: 0.00013416542031089882  | weights: [ 1.13503851  1.87714718  1.0232649 ]


array([ 1.10350447,  1.90583563,  1.01783211])