In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

## Syntetic Data Generation

In [2]:
true_b = 1
true_w = 2
N = 100

# Data Generation
np.random.seed(42)
x = np.random.rand(N,1)
epsilon = (.1 * np.random.randn(N,1))
y = true_b + true_w * x + epsilon

## Train-Validation Split

In [3]:
# Shuffle the indices
idx = np.arange(N)
np.random.shuffle(idx)

# Use first 80 random indices for train
train_idx = idx[:int(80)]
# using the remaining indices for validation 
val_idx = idx[int(N*0.8):]

# generating a training and validation set
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

## Step 0: Random Initialization

In [4]:
np.random.seed(42)
b = np.random.randn(1)
w = np.random.randn(1)

print(b, w)

[0.49671415] [-0.1382643]


## Step 1: Compute Model's Predictions

In [5]:
# Step 1: Compute our model's predictions output - forward pass
y_hat = b + w * x_train

In [6]:
# Step 2: Compute the loss
# BATCH gradient descent because we are using all data points
error = (y_hat - y_train)
# for regression we use MSE
loss = (error ** 2).mean()
print(loss)

2.7421577700550976


In [7]:
## Loss Surface
## calculing error for different possible values of b and w
b_range = np.linspace(true_b - 3, true_b + 3, 101)
w_range = np.linspace(true_w - 3, true_w + 3, 101)
# meshgrid creates a grid of b and w values
bs, ws = np.meshgrid(b_range, w_range)
bs.shape, ws.shape

((101, 101), (101, 101))

In [8]:
bs

array([[-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ],
       [-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ],
       [-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ],
       ...,
       [-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ],
       [-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ],
       [-2.  , -1.94, -1.88, ...,  3.88,  3.94,  4.  ]])

In [9]:
dummy_x = x_train[0]
dummy_yhat = bs + ws * dummy_x
dummy_yhat.shape

(101, 101)

In [10]:
all_predictions = np.apply_along_axis(
    func1d= lambda x: bs + ws * x,
    axis=1,
    arr = x_train
)
all_predictions.shape

(80, 101, 101)

In [11]:
all_labels = y_train.reshape(-1, 1, 1)
all_labels.shape

(80, 1, 1)

In [12]:
all_errors = all_predictions - all_labels
all_errors.shape

(80, 101, 101)

In [13]:
all_losses = (all_errors ** 2).mean(axis=0)
all_losses.shape

(101, 101)

## Step 3: Compute Gradients

In [14]:
b_grad = 2 * error.mean()
w_grad = 2 * (error * x_train).mean()
print(b_grad, w_grad)

-3.044811379650508 -1.8337537171510832


## Step 4: Update Parameters

In [15]:
# sets learning rate as eta
lr = 0.1
print(b, w)
# updating parameters
b = b - lr * b_grad
w = w - lr * w_grad


[0.49671415] [-0.1382643]


In [16]:
print(b, w)

[0.80119529] [0.04511107]
