In [5]:
import numpy as np, torch, torch.nn as nn, torch.nn.functional as F
import matplotlib, matplotlib.pyplot as plt, os
import imageio
from gradient_descent import Gradient_Descent
# matplotlib.use("Agg")

In [6]:
np.random.seed(1)
n = 1_000
X = np.random.randn(n).reshape(-1, 1)
X = np.concatenate((np.ones_like(X), X), axis=1)
y = X[:, 1].reshape(-1, 1) * 10 + 35 + np.random.randn(n).reshape(-1, 1) * 3

def loss_function(X, y, weights):
    return ((y - X.dot(weights)) ** 2).mean()

def b_grad(X, y, weights):
    return -2 * X.T.dot(y) + 2 * (X.T.dot(X)).dot(weights)

optimal_betas = np.linalg.inv(X.T.dot(X)).dot(X.T.dot(y))

betas = np.random.randn(2).reshape(-1, 1)
print(f"Initial betas: {betas}")

w = Gradient_Descent(
    weights=betas,
    gradient_function=b_grad,
    epsilon=1e-5,
    # loss_function=loss_function, 
    # comment this parameter if you don't wish to use linear search (slower convergence)
)

w.fit(X=X, y=y, tau=1 / 2)

bs = [(i[0][0], i[1][0]) for i in w.weights_by_step]

if not os.path.isdir("tempfigs"):
    os.makedirs("tempfigs")

xmin = X[:, 1].min() - 3
xmax = X[:, 1].max() + 3
ymin = y.min() - 10
ymax = y.max() + 10

lf = lambda beta0, beta1: loss_function(
    X=X, y=y, weights=np.array([beta0, beta1]).reshape(-1, 1)
)

for i, (b0, b1) in enumerate(bs):
    fig, ax = plt.subplots(figsize=(15, 7))
    current_loss = np.round(lf(b0, b1), 2)
    plt.title(
        f"Gradient Descent method iteration #{i}; loss (MSE) = {current_loss}"
    )
    plt.xlabel("X")
    plt.ylabel("Y")
    ax.scatter(X[:, 1], y)
    ax.plot([xmin, xmax], [b0 + b1 * xmin, b0 + b1 * xmax], c="red")
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)

    fig.savefig(fname=f"tempfigs/tempfig_{i}.png")
    plt.close()

figs = ["tempfigs/" + fi for fi in os.listdir("tempfigs")]
figs.sort(key=lambda s: int(s.split("_")[1].split(".")[0]))
frames = [imageio.imread(file_) for file_ in figs]
if not os.path.isdir('gifs'):
    os.makedirs('gifs')
imageio.mimsave("gifs/gradient_descent.gif", frames, "GIF", duration=10)

### remove generated images
for file_ in figs:
    os.remove(file_)

os.rmdir("tempfigs")

Initial betas: [[0.48951662]
 [0.23879586]]
Reached convergence after 1135 steps.


  frames = [imageio.imread(file_) for file_ in figs]


In [10]:
w = Gradient_Descent(
    weights=betas,
    gradient_function=b_grad,
    epsilon=1e-6,
    loss_function=loss_function, 
    # comment this parameter if you don't wish to use linear search (slower convergence)
)

w.fit(X=X, y=y, tau = 3 / 4)

bs = [(i[0][0], i[1][0]) for i in w.weights_by_step]

if not os.path.isdir("tempfigs"):
    os.makedirs("tempfigs")

xmin = X[:, 1].min() - 3
xmax = X[:, 1].max() + 3
ymin = y.min() - 10
ymax = y.max() + 10

lf = lambda beta0, beta1: loss_function(
    X=X, y=y, weights=np.array([beta0, beta1]).reshape(-1, 1)
)

for i, (b0, b1) in enumerate(bs):
    fig, ax = plt.subplots(figsize=(15, 7))
    current_loss = np.round(lf(b0, b1), 2)
    plt.title(
        f"Gradient Descent method iteration #{i}; loss (MSE) = {current_loss}"
    )
    plt.xlabel("X")
    plt.ylabel("Y")
    ax.scatter(X[:, 1], y)
    ax.plot([xmin, xmax], [b0 + b1 * xmin, b0 + b1 * xmax], c="red")
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)

    fig.savefig(fname=f"tempfigs/tempfig_{i}.png")
    plt.close()

figs = ["tempfigs/" + fi for fi in os.listdir("tempfigs")]
figs.sort(key=lambda s: int(s.split("_")[1].split(".")[0]))
frames = [imageio.imread(file_) for file_ in figs]

imageio.mimsave("gifs/gradient_descent_with_line_search.gif", frames, "GIF", duration=10)

### remove generated images
for file_ in figs:
    os.remove(file_)

os.rmdir("tempfigs")

Reached convergence after 35 steps.


  frames = [imageio.imread(file_) for file_ in figs]
