In [1]:
from autograd import grad
import autograd.numpy as np

# Code from last lecture

In [5]:
x_lin = np.array([1., 2., 2., 3.])
y_lin = np.array([1., 1., 2., 2.])


x_quad = np.array([1., 2., 1.5, 3.])
y_quad = np.array([1., 1., 0., 2.])

def feature_map_linear(x):
    # phi_i = x, 1
    # so append a column of 1s, and we'll have 2 weights which will be m and b
    return np.vstack([x, np.ones(len(x))]).T

def feature_map_quadratic(x):
    # phi_i = x**2, x, 1
    # 3 weights -- a, b, c
    return np.vstack([x**2, x, np.ones(len(x))]).T

def gradient_descent(X, y, learning_rate=0.1, iters=1000):
    # X is our feaure map, y is our target labels
    m, n = X.shape
    # n weights
    w = np.zeros(n)

    for _ in range(iters):
        y_pred = X.dot(w)
        # find gradient
        gradient = X.T.dot(y_pred - y) / m
        # update weights
        w -= learning_rate * gradient
    return w


# Closed-form solution

In [3]:
def closed_form_solution(X, y):
    
    phi = X
    inverse_term = np.linalg.inv(phi.T.dot(phi))

    w = inverse_term.dot(phi.T).dot(y)

    return w
    # Here's a more numerically stable way of doing it
    # this will work on more general cases where the normal equation fails invertibility
    # phi_pseudo_inverse = np.linalg.pinv(phi)
    # w = phi_pseudo_inverse.dot(y)

In [13]:
X_quad = feature_map_quadratic(x_quad)
w_quad_gd = gradient_descent(X_quad, y_quad, learning_rate=0.03, iters=100000)
w_quad_cf = closed_form_solution(X_quad, y_quad)

print(f"gradient descent weights: {w_quad_gd}")
print(f"closed-form solution weights: {w_quad_cf}")

gradient descent weights: [ 0.81818181 -2.64545452  2.63636361]
closed-form solution weights: [ 0.81818182 -2.64545455  2.63636364]


# Multi-variate Regression
- the main point here is that our feature map formulation will be different, but our GD and CF algorithms will not change

In [2]:
X_stock = np.genfromtxt("./Data/stock_prediction_data.csv", delimiter=",")
y_stock = np.genfromtxt("./Data/stock_price.csv", delimiter=",")

print(X_stock.shape)
print(y_stock.shape)

(300, 10)
(300,)


In [15]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(2)
phi_stock = poly.fit_transform(X_stock)

In [29]:
# Gradient descent is the same!
w_quad_gd = gradient_descent(phi_stock, y_stock, learning_rate=0.01, iters=10000)
w_quad_gd

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan])

## We got a bunch of nans, this is normal when we do things ourselves
- usually means we divided by 0, exploded our gradients into infinity, or just generally did math thats too large to handle
- easiest "solution" is to throw different standardization and boundaries until we find the problem and neutralize it

In [26]:
def zmuv_norm(X):
    # zero mean, unit variance standardization
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    std[std == 0] = 1
    
    return (X - mean) / std

def clip_gradients(gradients, threshold=1.0):
    # Clip gradients to help with the exploding gradient problem.
    
    gradient_norm = np.linalg.norm(gradients)
    if gradient_norm > threshold:
        gradients = gradients * threshold / gradient_norm
    return gradients

def more_stable_gradient_descent(X, y, learning_rate=0.01, iters=1000, gradient_clipping_threshold=1.0):
    X = zmuv_norm(X)
    
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(iters):
        y_pred = X.dot(w)
        gradients = X.T.dot(y_pred - y) / m

        # Clip gradients to prevent large gradients
        gradients = clip_gradients(gradients, threshold=gradient_clipping_threshold)

        w -= learning_rate * gradients

    return w


In [27]:
w_quad_gd = more_stable_gradient_descent(phi_stock, y_stock, learning_rate=0.01, iters=10000)


In [28]:
w_quad_gd

array([ 0.        , -0.33130815,  3.10814204, -0.41436695, -0.14860673,
        0.6767484 ,  1.96620149,  3.00633895,  1.6425693 ,  0.32395045,
        0.96801387,  0.07091873,  0.06556882,  0.05192631, -0.00715662,
        0.0762644 , -0.00840449,  0.05147481,  0.12233287,  0.08920206,
        0.00794382,  0.25956858,  0.11256542,  0.13383122,  0.08147415,
        0.11328582,  0.15645011,  0.18643335,  0.12266274, -0.03618363,
       -0.0031777 ,  0.01250698, -0.05627823,  0.10286842,  0.1239688 ,
        0.22816204,  0.05841332,  0.0049547 , -0.05677831, -0.00545771,
        0.09131472,  0.02852551,  0.12119848,  0.02153226, -0.09568235,
        0.00755069,  0.2765269 ,  0.04779805,  0.06517629, -0.01574148,
       -0.04123438,  0.35957758,  0.11676249,  0.14640058,  0.17755373,
        0.0521094 ,  0.35999005,  0.22096603,  0.14679412,  0.1012213 ,
        0.53018099,  0.0704341 ,  0.06523659,  0.18189582,  0.02407394,
       -0.01506365])