In [1]:
import autograd.numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

np.random.seed(0)
n_samples = 50
X = np.linspace(1, 50, n_samples)
Y = 10*X + 6 + 2*np.random.randn(n_samples)
print(Y.shape,X.shape)

plt.plot(X, Y, 'k.')
plt.xlabel("X")
plt.ylabel("Y");


ModuleNotFoundError: No module named 'autograd'

In [3]:
def cost(param_list):
    w, b = param_list
    pred = w*X+b
    return np.sqrt(((pred - Y) ** 2).mean(axis=None))/(2*len(Y))


In [9]:
from copy import deepcopy
import math
from autograd import grad


# Cost of w=0, b=0
w, b = 0., 0.
print("Cost at w={}, b={} is: {}".format(w, b, cost([w, b])))

# Cost of w=10, b=4. Should be lower than w=0, b=0
w, b = 10., 4.
print("Cost at w={}, b={} is: {}".format(w, b, cost([w, b])))

# Computing the gradient at w=0, b=0

grad_cost =grad(cost)
w, b = 0., 0.
print("Gradient at w={}, b={} is: {}".format(w, b, grad_cost([w, b])))

# Computing the gradient at w=10, b=4. We would expect it to be smaller than at 0, 0
w, b = 10., 4.
print("Gradient at w={}, b={} is: {}".format(w, b, grad_cost([w, b])))



def adagrad_gd(param_init, cost, niter=5, lr=1e-2, eps=1e-8, random_seed=0):
    """
    param_init: List of initial values of parameters
    cost: cost function
    niter: Number of iterations to run
    lr: Learning rate
    eps: Fudge factor, to avoid division by zero
    """
    # Function to compute the gradient of the cost function
    grad_cost = grad(cost)
    params = deepcopy(param_init)
    param_array, grad_array, lr_array, cost_array = [params], [], [[lr for _ in params]], [cost(params)]
    # Initialising sum of squares of gradients for each param as 0
    sum_squares_gradients = [np.zeros_like(param) for param in params]
    for i in range(4):
        out_params = []
        gradients = grad_cost(params)
        print("gradients:",gradients," type:",type(gradients[0]),type(gradients[1]),gradients[0].shape,gradients[1].shape)
        # At each iteration, we add the square of the gradients to `sum_squares_gradients`
        sum_squares_gradients= [eps + sum_prev + np.square(g) for sum_prev, g in zip(sum_squares_gradients, gradients)]
        print("sum_squares_gradients:",sum_squares_gradients)
        # Adapted learning rate for parameter list
        lrs = [np.divide(lr, np.sqrt(sg)) for sg in sum_squares_gradients]
        print("lrs:",lrs)
        # Paramter update
        params = [param-(adapted_lr*grad_param) for param, adapted_lr, grad_param in zip(params, lrs, gradients)]
        print("params:",params)
        param_array.append(params)
        lr_array.append(lrs)
        grad_array.append(gradients)
        cost_array.append(cost(params))
        
    return params, param_array, grad_array, lr_array, cost_array

Cost at w=0.0, b=0.0 is: 2.9809044649485408
Cost at w=10.0, b=4.0 is: 0.03204794719387623
Gradient at w=0.0, b=0.0 is: [array(-0.29297047), array(-0.00876516)]
Gradient at w=10.0, b=4.0 is: [array(-0.14406455), array(-0.00711783)]


In [11]:
# Fixing the random seed for reproducible init params for `W` and `b`
first_param = np.random.randn()
second_param = np.random.randn()
print("first_param:",first_param, " second_param:",second_param)
param_init = [first_param, second_param]
lr = 0.01
eps=1e-8
niter=1000
ada_params, ada_param_array, ada_grad_array, ada_lr_array, ada_cost_array = adagrad_gd(param_init, cost, niter=niter, lr=lr, eps=eps)

first_param: 0.06651722238316789  second_param: 0.3024718977397814
gradients: [array(-0.29297186), array(-0.0087632)]  type: <class 'numpy.ndarray'> <class 'numpy.ndarray'> () ()
sum_squares_gradients: [0.08583252313062349, 7.680360467210906e-05]
lrs: [0.03413296853458938, 1.1410618834373025]
params: [0.0765172218006381, 0.31247124670747833]
gradients: [array(-0.29297186), array(-0.00876318)]  type: <class 'numpy.ndarray'> <class 'numpy.ndarray'> () ()
sum_squares_gradients: [0.1716650460212332, 0.0001536068633469537]
lrs: [0.024135653529707857, 0.8068535042434689]
params: [0.08358828919564959, 0.319541846204688]
gradients: [array(-0.29297186), array(-0.00876316)]  type: <class 'numpy.ndarray'> <class 'numpy.ndarray'> () ()
sum_squares_gradients: [0.25749756873472707, 0.00023040987693335538]
lrs: [0.019706678596832297, 0.6587937252260828]
params: [0.08936179154456016, 0.3253149625444586]
gradients: [array(-0.29297186), array(-0.00876315)]  type: <class 'numpy.ndarray'> <class 'numpy.nd

In [None]:
pd.Series(ada_cost_array, name='Cost').plot(title='Adagrad: Cost v/s # Iterations')
plt.ylabel("Cost")
plt.xlabel("# Iterations");