# Importing some packages

In [6]:
import numpy as np
from sklearn.linear_model import LinearRegression

import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot

# Creating data

In [7]:
N = 100
ERROR_SCALE = 0.1
TRUE_B = 1
TRUE_W = 2

np.random.seed(42)

X = np.random.rand(N, 1)
error = ERROR_SCALE * np.random.randn(N, 1)
y = TRUE_B + TRUE_W * X + error

Splitting into train and test sets

In [8]:
idx = np.arange(N)
np.random.shuffle(idx)

train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

X_train, y_train = X[train_idx], y[train_idx]
X_val, y_val = X[val_idx], y[val_idx]

# Gradient descent

Random parameter initialization

In [9]:
np.random.seed(42)
B = np.random.randn(1)
W = np.random.randn(1)

y_train_hat = B + W * X_train

Loss computation

In [10]:
loss = np.mean((y_train - y_train_hat)**2)
loss

2.7421577700550976

Gradient computation

In [11]:
b_grad = 2 * np.mean((y_train_hat - y_train))
w_grad = 2 * np.mean(X_train * (y_train_hat - y_train))

print(f"slope gradient = {w_grad}")
print(f"intercept gradient = {b_grad}")

slope gradient = -1.8337537171510832
intercept gradient = -3.044811379650508


Updating the parameters

In [13]:
LEARNING_RATE = 0.1

B_NEW = B - LEARNING_RATE * b_grad
W_NEW = W - LEARNING_RATE * w_grad

print(f"Initial guess: {B[0], W[0]}")
print(f"Updated parameters: {B_NEW[0], W_NEW[0]}")

Initial guess: (0.4967141530112327, -0.13826430117118466)
Updated parameters: (0.8011952909762835, 0.04511107054392366)
