In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from matplotlib import cm

sns.set(font_scale=1.5)
sns.set_style("whitegrid", {'grid.linestyle':'--'})

In [None]:
auto = pd.read_csv("../lecture_3/auto_mpg.csv")
auto.head()

In [None]:
def auto_loss(
    X: np.ndarray,
    y: np.ndarray,
    betas: np.ndarray,
) -> float:
    """Calculate the loss of a linear regression problem."""
    if not isinstance(betas, np.ndarray):
        betas = np.array(betas)

    loss = np.sum(np.square(y - X @ betas))
    return loss

In [None]:
X_auto = np.vstack((np.ones(shape=len(auto)), auto["weight"].values.T)).T
y_auto = auto["mpg"].values

# test run
betas = [70, -0.01]
auto_loss(X=X_auto, y=y_auto, betas=betas)

In [None]:
# fix beta_1 at -0.01
losses = []
beta_0s = np.linspace(start=35, stop=75, num=100)
for b in beta_0s:
    betas = [b, -0.01]
    loss = auto_loss(X=X_auto, y=y_auto, betas=betas)
    losses.append(loss)
    
# plot the loss function
sns.lineplot(x=beta_0s, y=losses)
plt.xlabel("intercept")
plt.ylabel("Loss")
plt.tight_layout()

In [None]:
def auto_loss_gradient(
    X: np.ndarray,
    y: np.ndarray,
    betas: np.ndarray
) -> np.ndarray:
    """Calculates the gradient of the loss of a linear regression problem."""
    if not isinstance(betas, np.ndarray):
        betas = np.array(betas)
    
    grad_0 = -2 * np.sum(y - X @ betas)
    grad_1 = -2 * np.sum(np.dot((y - X @ betas), X[:, 1]))
    
    return np.array([grad_0, grad_1])

In [None]:
# gradient descent with just the intercept
lr = 0.0001  # learning rate
slope = -0.01  # the fixed slope, i.e., beta_1

# initial guesses
b0_current = 70  
difference = float("inf")

# start iterative udpate
threshold = 1e-1
beta_0s_iter = [b0_current]
losses_iter = []
while abs(difference) > threshold:
    loss_current = auto_loss(X=X_auto, y=y_auto, betas=[b0_current, slope])
    losses_iter.append(loss_current)
    
    b0_next = b0_current - lr * auto_loss_gradient(X=X_auto, 
                                                   y=y_auto, 
                                                   betas=[b0_current, slope])[0]  # only take the first element
    loss_next = auto_loss(X=X_auto, y=y_auto, betas=[b0_next, slope])
    b0_current = b0_next
    beta_0s_iter.append(b0_current)
    
    difference = loss_next - loss_current
    
# plot the loss function
sns.lineplot(x=beta_0s, y=losses)
# plot the iterative updates
sns.scatterplot(x=beta_0s_iter[:-1], y=losses_iter, color="red", alpha=0.3)
plt.xlabel("intercept")
plt.ylabel("Loss")
plt.tight_layout()

In [None]:
# full gradient descent
beta_1s = np.linspace(start=-0.01, stop=-0.006, num=100)
X, Y = np.meshgrid(beta_0s, beta_1s)
losses_2d = np.zeros(shape=X.shape)  # initialize the losses
for i, x in enumerate(X):
    for j in range(len(x)):
        losses_2d[i][j] = auto_loss(X=X_auto, y=y_auto, betas=[X[i][j], Y[i][j]])
        
# make the 3d plot
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
# plot the actual minimum
betas_min = [46.317, -0.008]
ax.scatter(
    xs=[betas_min[0],], 
    ys=[betas_min[1],], 
    zs=[auto_loss(X=X_auto, y=y_auto, betas=betas_min)],
    s=100,
    color="black",
)
# plot the loss function
surf = ax.plot_surface(X, Y, losses_2d, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False, alpha=0.2)

plt.tight_layout()

In [None]:
def auto_loss_gradient(
    X: np.ndarray,
    y: np.ndarray,
    betas: np.ndarray
) -> np.ndarray:
    """Calculates the gradient of the loss of a linear regression problem."""
    if not isinstance(betas, np.ndarray):
        betas = np.array(betas)
    
    grad_0 = -2 * np.sum(y - X @ betas)
    grad_1 = -2 * np.sum(np.dot((y - X @ betas), X[:, 1]))
    
    return np.array([grad_0, grad_1])

# gradient descent with both intercept and slope
lr = 1e-5  # learning rate

# initial guesses
b_current = np.array([70, -0.007])  
difference = float("inf")

# start iterative udpate
threshold = 1e-1
betas_iter = [b_current]
losses_iter = []
while abs(difference) > threshold:
    loss_current = auto_loss(X=X_auto, y=y_auto, betas=b_current)
    print(b_current, loss_current)
    losses_iter.append(loss_current)
    
    b_next = b_current - lr * auto_loss_gradient(X=X_auto, 
                                                 y=y_auto, 
                                                 betas=b_current) 
    b_next[1] = -0.007
    loss_next = auto_loss(X=X_auto, y=y_auto, betas=b_next)
    b_current = b_next
    betas_iter.append(b_current)
    
    difference = loss_next - loss_current