# Gradient Descent and Family
Gradient Descent is an optimization algorithm to get the minimum error for a given function, It has several variants, some are as follows:

In [1]:
# This is the function, we have to find minima for
def fn(x):
    return (x - 5)**4 - 2 * (x**2) + 21 * x

# This is its derivate, will give us the slope at a given point
def fn_deriv(x):
    return 4 * (x - 5)**3 - 4 * (x - 5) + 1

### As you can see in this image, there is a local maxima at 5.27, local minima at 5.838 and global minima at 3.893

![oops](http://i.imgur.com/2JuheEF.png)


## Lets create classes for some Gradient Descent implementations/variants:

In [2]:
class StochasticGradientDescent:
    def __init__(self, func, func_deriv, learning_rate = 0.01):
        self.fn = func
        self.fn_deriv = func_deriv
        self.learning_rate = learning_rate
    
    def minimize(self, time_steps, x):
        for n in range(time_steps):
            x = x - self.learning_rate * fn_deriv(x)
        return x.real
    
    def maximize(self, time_steps, x):
        for n in range(time_steps):
            x = x + self.learning_rate * fn_deriv(x)
        return x.real

In [3]:
SGD = StochasticGradientDescent(fn, fn_deriv)
print(SGD.minimize(1000, 5.))
print(SGD.minimize(1000, 6.5))
print(SGD.maximize(1000, 4))

3.892840128311234
5.837565435283333
5.269594436405425


In [4]:
class MomentumGradientDescent:
    def __init__(self, func, func_deriv, alpha = 0.01, learning_rate = 0.01):
        self.fn = func
        self.fn_deriv = func_deriv
        self.learning_rate = learning_rate
        self.alpha = alpha
    
    def minimize(self, time_steps, x):
        delta = 0
        for n in range(time_steps):
            delta = - self.learning_rate * fn_deriv(x) + delta * self.alpha
            x = x + delta
        return x.real
    
    def maximize(self, time_steps, x):
        delta = 0
        for n in range(time_steps):
            delta = + self.learning_rate * fn_deriv(x) + delta * self.alpha
            x = x + delta
        return x.real

In [5]:
MGD = MomentumGradientDescent(fn, fn_deriv)
print(MGD.minimize(1000, 5.))
print(MGD.minimize(1000, 6.5))
print(MGD.maximize(1000, 4))

3.892840128311234
5.837565435283333
5.269594436405431


In [6]:
# But what if I drop from a little higher? :P
print(MGD.minimize(1000, 10.5))

# It will jump that little bump

3.892840128311234


In [7]:
class AdamGD:
    def __init__(self, func, func_deriv, beta1, beta2, learning_rate, epsilon):
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.momentum = 0.
        self.velocity = 0.
        self.fn = func
        self.fn_deriv = func_deriv
        self.learning_rate = learning_rate
    
    def minimize(self, time_steps, x):
        for n in range(time_steps):
            self.momentum = self.beta1 * self.momentum + (1 - self.beta1) * self.fn_deriv(x)
            self.velocity = self.beta2 * self.velocity + (1 - self.beta2) * self.fn_deriv(x)**2
            moment = self.momentum / (1 - self.beta1)
            vel_moment = self.momentum / (1 - self.beta2)
            
            x = x - self.learning_rate * (moment / (vel_moment ** (.5) + self.epsilon ))
        return x.real
    
    def maximize(self, time_steps, x):
        for n in range(time_steps):
            self.momentum = self.beta1 * self.momentum + (1 - self.beta1) * self.fn_deriv(x)
            self.velocity = self.beta2 * self.velocity + (1 - self.beta2) * self.fn_deriv(x)**2
            moment = self.momentum / (1 - self.beta1)
            vel_moment = self.momentum / (1 - self.beta2)
            
            x = x + self.learning_rate * (moment / (vel_moment ** (.5) + self.epsilon ))
        return x.real

In [8]:
Adam = AdamGD(fn, fn_deriv, beta1 = 0.001, beta2=0.001, learning_rate = 0.01, epsilon=0.0001)
print(Adam.minimize(1000, 5))
print(Adam.minimize(1000, 6.5))
print(Adam.maximize(1000, 4))

3.551452976387318
5.755233999413476
5.331964771693469


In [9]:
# But what if.... :P
print(Adam.minimize(1000, 10.5))

5.758129799520977


## So Adam fixed the issue!