# Optimization

Things to try:

- change the number of samples
- without and without bias
- with and without regularization
- changing the number of layers
- changing the amount of noise
- change number of degrees
- look at parameter values (high) in OLS
- tarin network for many epochs

In [None]:
from fastprogress.fastprogress import progress_bar

import torch

import matplotlib.pyplot as plt
from jupyterthemes import jtplot

jtplot.style(context="talk")

In [None]:
def plot_regression_data(model=None, MSE=None, poly_deg=0):

    # Plot the noisy scatter points and the "true" function
    plt.scatter(x_train, y_train, label="Noisy Samples")
    plt.plot(x_true, y_true, "--", label="True Function")

    # Plot the model's learned regression function
    if model:
        x = x_true.unsqueeze(-1)
        x = x.pow(torch.arange(poly_deg + 1)) if poly_deg else x

        with torch.no_grad():
            yhat = model(x)

        plt.plot(x_true, yhat, label="Learned Function")

    plt.xlim([min_x, max_x])
    plt.ylim([-5, 5])
    plt.legend()
    if MSE:
        plt.title(f"MSE = ${MSE}$")

# Create Fake Training Data

In [None]:
def fake_y(x, add_noise=False):
    y = 10 * x ** 3 - 5 * x
    return y + torch.randn_like(y) * 0.5 if add_noise else y


N = 20
min_x, max_x = -1, 1

x_true = torch.linspace(min_x, max_x, 100)
y_true = fake_y(x_true)

x_train = torch.rand(N) * (max_x - min_x) + min_x
y_train = fake_y(x_train, add_noise=True)

plot_regression_data()

# Train A Simple Linear Model Using Batch GD

In [None]:
# Hyperparameters
learning_rate = 0.1
num_epochs = 100

# Model parameters
m = torch.randn(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

params = (b, m)

# Torch utils
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(params, lr=learning_rate)

# Regression
for epoch in range(num_epochs):
    # Model
    yhat = m * x_train + b

    # Update parameters
    optimizer.zero_grad()
    loss = criterion(yhat, y_train)
    loss.backward()
    optimizer.step()

plot_regression_data(lambda x: m * x + b, MSE=loss.item())

# Train Linear Regression Model Using Batch GD

In [None]:
# Hyperparameters
learning_rate = 0.1
num_epochs = 1000

# Model parameters
w2 = torch.randn(1, requires_grad=True)
w1 = torch.randn(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

params = (b, w1, w2)

# Torch utils
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(params, lr=learning_rate)

# Regression
for epoch in range(num_epochs):
    # Model
    yhat = b + w1 * x_train + w2 * x_train ** 2

    # Update parameters
    optimizer.zero_grad()
    loss = criterion(yhat, y_train)
    loss.backward()
    optimizer.step()

plot_regression_data(lambda x: b + w1 * x + w2 * x ** 2, MSE=loss.item())

# Train Complex Linear Regression Model Using Batch GD

In [None]:
# Hyperparameters
learning_rate = 0.1
num_epochs = 1000

# Model parameters
degrees = 50  # 3, 4, 16, 32, 64, 128
powers = torch.arange(degrees + 1)
x_poly = x_train.unsqueeze(-1).pow(powers)
params = torch.randn(degrees + 1, requires_grad=True)

# Torch utils
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD([params], lr=learning_rate)

# Regression
for epoch in range(num_epochs):
    # Model
    yhat = x_poly @ params

    # Update parameters
    optimizer.zero_grad()
    loss = criterion(yhat, y_train)
    loss.backward()
    optimizer.step()

plot_regression_data(lambda x: x @ params, poly_deg=degrees, MSE=loss.item())

# Compute Linear Regression Model Using Ordinary Least Squares

In [None]:
params = ((x_poly.T @ x_poly).inverse() @ x_poly.T) @ y_train
mse = torch.nn.functional.mse_loss(x_poly @ params, y_train)
plot_regression_data(lambda x: x @ params, poly_deg=degrees, MSE=mse)
# params

# Train Neural Network Model Using Batch GD

In [None]:
# Hyperparameters
learning_rate = 0.01
num_epochs = 100000
regularization = 0  # 1e-2

# Model parameters
model = torch.nn.Sequential(
    torch.nn.Linear(1, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 1),
)

# Torch utils
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(
    model.parameters(), lr=learning_rate, weight_decay=regularization
)

# Training
for epoch in progress_bar(range(num_epochs)):
    # Model
    yhat = model(x_train.unsqueeze(-1))

    # Update parameters
    optimizer.zero_grad()
    loss = criterion(yhat.squeeze(), y_train)
    loss.backward()
    optimizer.step()

plot_regression_data(model, loss.item())