## Gradient Descent

![image](1.JPG)

### Parameters

In [1]:
cur_x = 3 # The algorithm starts at x=3
rate = 0.01 # Learning rate
precision = 0.000001 #This tells us when to stop the algorithm
previous_step_size = 1 #
max_iters = 10000 # maximum number of iterations
iters = 0 #iteration counter
df = lambda x: 2*(x+5) #Gradient of our function (x+5)**2 

### Loop for calculation

In [35]:
while previous_step_size > precision and iters < max_iters:
    prev_x = cur_x #Store current x value in prev_x
    cur_x = cur_x - rate * df(prev_x) #Grad descent
    previous_step_size = abs(cur_x - prev_x) #Change in x
    iters = iters+1 #iteration count
    print("Iteration",iters,"\nX value is",cur_x) #Print iterations
print("The local minimum occurs at", cur_x)

The local minimum occurs at -4.9999518490318176


![image](2.JPG)

## Stochastic Gradient Descent

In [54]:
import numpy as np

def stochastic_gradient_descent(model, eta, batch_size=1, max_iterations=1e4,
                                epsilon=1e-5, beta_start=None, seed=None):
    """
    Stochastic gradient with linearly decaying learning rate
    Parameters
    ----------
    model: optimization model object
    eta: learning rate
    batch_size: mini-batch size
    max_iterations: maximum number of gradient iterations
    epsilon: tolerance for stopping condition
    beta_start: where to start (otherwise random)
    Output
    ------
    solution: final beta value
    beta_history: beta values from each iteration
    """
    if seed:
        np.random.seed(seed)

    # data from model
    n = model.n  # number of data points
    d = model.d  # number of varaibles

    # gradient of single likelihood
    grad_f = model.grad_f
    F = model.F

    # initialization
    if beta_start:
        beta_current = beta_start
    else:
        beta_current = np.random.normal(loc=0, scale=1, size=d)

    # history
    beta_history = []

    for k in range(int(max_iterations)):

        beta_history.append(beta_current)

        # compute gradient estimate
        index = np.random.choice(n, batch_size)
        batch_grad = np.mean([grad_f(beta_current, i) for i in index], axis=0)

        # gradient update
        beta_next = beta_current - eta/(k + 1.0)*batch_grad

        # relative error stoping condition
        if np.linalg.norm(beta_next - beta_current) <= epsilon*np.linalg.norm(beta_current):
            break

        beta_current = beta_next

    print ('SGD finished after ' + str(k) + ' iterations')

    return {'solution': beta_current,
            'beta_history': beta_history}

![image](3.jpg)

## Model

In [55]:
import numpy as np
from sklearn import linear_model  # ridge regression
class LeastSquares(object):
    """
    Least squares
    min_beta ||X beta - y||_2^2
    X in R^(n x d)
    y in R^n
    beta in R^d
    """
    def __init__(self, X, y):
    
        self.name = "least squares"

        # data
        self.X=X
        self.y=y

        self.d = X.shape[1]
        self.n = X.shape[0]

        # lipshitz constant
        self.L_F = np.linalg.norm(X)**2
        self.mu_F = 0

    def F(self, beta):
        return .5*sum((np.dot(self.X, beta) - self.y)**2)

    def grad_F(self, beta):
        return np.dot(self.X.T, np.dot(self.X, beta) - self.y)

    def f(self, beta, i):
        return self.n*.5*(np.dot(self.X[i, :], beta) - self.y[i])**2

    def grad_f(self, beta, i):
        return self.n*(np.dot(self.X[i, :], beta) - self.y[i]) * self.X[i, :]

    def get_solution(self):
        """returns the analytic solution to the LS problem"""
        return np.dot(np.dot(np.linalg.inv(np.dot(self.X.T, self.X)),
                      self.X.T), self.y)

![image](4.jpg)

### Make a Model

In [56]:
import numpy as np 
x = [[1,2,3,3.5],[3,4,5,3],[2.3,4,3,3]]
y=[3,4,5,3]
a = np.asarray(x)
b= np.asarray(y)
m=LeastSquares(a,b)

In [45]:
stochastic_gradient_descent(m, 0.5, batch_size=1, max_iterations=1e4,
                                epsilon=1e-5, beta_start=None, seed=None)

SGD finished after 88 iterations


{'solution': array([ 7.23094278e+13, -3.75995284e+13,  1.33505167e+14, -1.38814378e+14]),
 'beta_history': [array([ 0.91283473,  1.13587253, -0.81249375,  0.8848713 ]),
  array([-0.35338739, -1.39657171, -4.61116012, -3.54690613]),
  array([23.9422496 , 47.19470227, 68.27575085, 81.48782333]),
  array([-1239.72942359, -1637.70086199, -2037.84370447, -1182.18384986]),
  array([ 4298.76022721,  9439.27843961, 14577.62524793, 18202.52992794]),
  array([ -96427.25196075, -165736.39493074, -116804.12977984,
         -113179.22509983]),
  array([197183.42779204, 421484.96457482, 764027.90947851, 914458.15403491]),
  array([-5486316.66939042, -7156515.16500179, -8708472.25249225,
         -4769041.94314755]),
  array([52414340.57065588, 70044361.15505995, 87792623.14758493,
         53131615.29689876]),
  array([-2.63261862e+08, -4.78957731e+08, -3.23958946e+08, -3.58619954e+08]),
  array([2.53971738e+08, 5.55509470e+08, 1.22774186e+09, 1.45169765e+09]),
  array([-1.12727708e+09, -2.20698817e

![image](5.jpg)

In [69]:
import numpy as np 
x = [[1,0,0],[0,1,0],[0,0,1]]
y=[3,1,7]
a = np.asarray(x)
b= np.asarray(y)
m=LeastSquares(a,b)

In [70]:
stochastic_gradient_descent(m, 0.5, batch_size=1, max_iterations=1e4,
                                epsilon=1e-5, beta_start=None, seed=None)

SGD finished after 1075 iterations


{'solution': array([2.9447369 , 0.9195365 , 7.07433088]),
 'beta_history': [array([ 0.29644281, -1.09315679, -0.29511579]),
  array([ 0.29644281, -1.09315679, 10.6475579 ]),
  array([ 2.3241107 , -1.09315679, 10.6475579 ]),
  array([ 2.3241107 , -1.09315679,  8.82377895]),
  array([ 2.3241107 , -1.09315679,  8.13986184]),
  array([ 2.3241107 , -0.46520975,  8.13986184]),
  array([ 2.3241107 , -0.09890731,  8.13986184]),
  array([2.3241107 , 0.13657283, 8.13986184]),
  array([2.3241107 , 0.13657283, 7.92613775]),
  array([2.3241107 , 0.13657283, 7.77178146]),
  array([2.3241107 , 0.13657283, 7.65601424]),
  array([2.3241107 , 0.13657283, 7.56655775]),
  array([2.3241107 , 0.24450122, 7.56655775]),
  array([2.40209793, 0.24450122, 7.56655775]),
  array([2.40209793, 0.32544752, 7.56655775]),
  array([2.40209793, 0.39290277, 7.56655775]),
  array([2.40209793, 0.39290277, 7.51344296]),
  array([2.40209793, 0.39290277, 7.46813917]),
  array([2.4519231 , 0.39290277, 7.46813917]),
  array([2.4

![image](6.jpg)