# Chapter 8 - Stochastic Gradient Descent

In [1]:
import sys
sys.path.append("../")
from utils import *

## Gradient Descent Algorithm and Animation Implementation

In [2]:
def VanillaGradientDescent(objective, gradient, init=np.random.uniform(-1, 1, 2), eta=.1, delta=1e-5):
    steps, gradients, deltas = [init], [], [delta]

    while deltas[-1] >= delta:
        gradients.append(gradient(steps[-1]))
        steps.append(steps[-1] - eta * gradients[-1])
        deltas.append(np.sum((steps[-1] - steps[-2])**2)**.5)
        
    return np.array(steps), np.array(gradients), np.array(deltas[1:])


def Animate_GradientDescent(f, f_grad, init, eta, delta, axis_range):
    def as_array(x):
        return np.array([x]) if np.isscalar(x) else x

    def function_surface(fun, vals):
        xx, yy = np.meshgrid(vals, vals)
        z = fun(np.c_[xx.ravel(), yy.ravel()]).reshape(len(vals), len(vals))
        return go.Surface(x = vals, y=vals, z=z, opacity=.4, colorscale="Viridis")

    steps, gradients, deltas = VanillaGradientDescent(f, f_grad, init, eta, delta)
    surface = function_surface(f, axis_range)

    frames, markers = [], []
    for i in range(1, len(steps)+1):
        z = as_array(f(steps[:i]))       
        frames.append(go.Frame(
            data=[
               go.Scatter3d(x=steps[:i-1,0], y=steps[:i-1,1], z=z[:i-1],
                            marker=dict(size=3, color="black"), showlegend=False),
               go.Scatter3d(x=[steps[i-1,0]], y=[steps[i-1,1]], z=[z[i-1]], 
                            marker=dict(size=5, color="orange"), showlegend=False), 
                surface],
            traces=[0,1],
            layout=go.Layout(title=rf"$\text{{Iteration }} {i}/{steps.shape[0]}$" )))


    return go.Figure(data=frames[0]["data"], frames=frames,
                     layout = go.Layout(height=600, title=frames[0]["layout"]["title"],
                                        updatemenus=[dict(type="buttons",
                                                          buttons=[AnimationButtons.play(100,0), 
                                                                   AnimationButtons.pause()])]))

## Gradient Descent Over Gaussian Function

In [1]:
from numpy.linalg import solve, det

def negative_gaussian(mu=np.zeros(2), cov=np.eye(2)):
    from scipy.stats import multivariate_normal
    
    def _evaluate(x: np.ndarray):
        return - multivariate_normal(mu, cov).pdf(x)

    def _gradient(x: np.ndarray):
        z = solve(cov,x-mu)
        return np.exp(-z @ (x-mu) /2) * z / (2*np.sqrt((2*np.pi)**mu.shape[0] * det(cov)))
    
    return _evaluate, _gradient


Animate_GradientDescent(*negative_gaussian(cov=[5,10]*np.eye(2)),
                        init=np.array([-5,-5]), 
                        eta=250, 
                        delta=1e-2, 
                        axis_range=np.linspace(-5, 5, 50))


NameError: name 'np' is not defined

## Gradient Descent For RSS

In [4]:
from scipy.stats import ortho_group

def residual_sum_of_squares(X: np.ndarray, y: np.ndarray):
    def _evaluate(w: np.ndarray):
        return np.sum( (X @ w - y)**2, axis=0)
    
    def _gradient(w: np.ndarray):
        return 2 * X.T @ (X @ w -y)
    
    return _evaluate, _gradient


# Create a random subspace and sample a design matrix and response vector from it
m, d = 20, 2
V, a = ortho_group.rvs(dim=d), np.random.uniform(size=(m, d))

dataset = np.sum(a[:,None]*V, axis=2)

go.Figure(go.Scatter(x=dataset[:,0], y=dataset[:,1], mode="markers")).show()

# Show both the descent but also the linear fit!!!!!!!!!!

In [5]:
def non_convex_function():
    def _evaluate(x: np.ndarray):
        print("Eval", x.shape)
        z = 4 *x[:,0] * x[:, 1] / np.exp(np.sum(x*x, axis=1))
        print("Eval", z.shape)
        return np.array([[z]]) if np.isscalar(z) else z
    
    def _gradient(x: np.ndarray):
        print("Grad", x.shape)
        z, e = x*x, np.exp(np.sum(x*x))
        a= np.c_[(4 - 8 * z[:,0]) * z[:,1] / e, (4 - 8 * z[:,1]) * z[:,0] / e]
        return a
    
    return _evaluate, _gradient

Animate_GradientDescent(*non_convex_function(),
                        init=np.array([[-5,-5]]), 
                        eta=250, 
                        delta=.5*1e-4, 
                        axis_range=np.linspace(-5, 5, 50))


Grad (1, 2)
Eval (2500, 2)
Eval (2500,)
Eval (1, 1, 2)


IndexError: index 1 is out of bounds for axis 1 with size 1