In [1]:
import numpy as np

In [2]:
np.random.seed(45) # for consistently generating random no.(reproducable)
num_samples = 40

# Generating data
x1 = np.random.uniform(-1, 1, num_samples)
f_x = 3*x1 + 4
eps = np.random.randn(num_samples)
y = f_x + eps

In [10]:
import torch

# Setting random seed for reproducibility
torch.manual_seed(45)

# Converting the data to tensors
x1_tensor = torch.tensor(x1, dtype=torch.float32, requires_grad=False)
y_tensor = torch.tensor(y, dtype=torch.float32, requires_grad=False)

# Initializing parameters theta_0 and theta_1 as (0,0)
theta_0 = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
theta_1 = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# Defining the linear model
def linear_model(x):
    return theta_1 * x + theta_0

# Defining the loss function (Mean Squared Error)
def mse_loss(y_pred, y_true):
    return torch.mean((y_pred - y_true) ** 2)

# Computing the predicted values
y_pred = linear_model(x1_tensor)

# Computing the loss
loss = mse_loss(y_pred, y_tensor)

# Performing backpropagation to compute the gradients
loss.backward()

# Printing the gradients of theta_0 and theta_1
print(f"Gradient of theta_0: {theta_0.grad.item():.4f}")
print(f"Gradient of theta_1: {theta_1.grad.item():.4f}")
print(f"Loss: {loss.item():.4f}")

Gradient of theta_0: -7.4471
Gradient of theta_1: -1.0253
Loss: 16.6815


In [11]:
# Initializing parameters theta_0 and theta_1 (using the same values as above (0,0))
theta_0_sgd = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
theta_1_sgd = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# Storing the gradients for each point
theta_0_grads = []
theta_1_grads = []

# Looping over each point in the dataset
for i in range(len(x1_tensor)):
    # Zeroing the gradients before each point calculation
    if theta_0_sgd.grad is not None:
        theta_0_sgd.grad.zero_()
        theta_1_sgd.grad.zero_()

    # Single data point (stochastic step)
    x_i = x1_tensor[i]
    y_i = y_tensor[i]

    # Forward pass (single point prediction)
    y_pred_i = theta_1_sgd * x_i + theta_0_sgd

    # Computing the loss for this single point
    loss_i = (y_pred_i - y_i) ** 2

    # Backpropagating the loss to compute gradients
    loss_i.backward()

    # Storing the gradients for theta_0 and theta_1
    theta_0_grads.append(theta_0_sgd.grad.item())
    theta_1_grads.append(theta_1_sgd.grad.item())

# Calculating the average gradients
theta_0_avg_grad = sum(theta_0_grads) / len(theta_0_grads)
theta_1_avg_grad = sum(theta_1_grads) / len(theta_1_grads)

# Printing the average stochastic gradients
print(f"Average stochastic gradient of theta_0: {theta_0_avg_grad:.4f}")
print(f"Average stochastic gradient of theta_1: {theta_1_avg_grad:.4f}")

# Comparing with true gradient from the previous batch gradient computation
print(f"True gradient of theta_0: {theta_0.grad.item():.4f}")
print(f"True gradient of theta_1: {theta_1.grad.item():.4f}")

Average stochastic gradient of theta_0: -7.4471
Average stochastic gradient of theta_1: -1.0253
True gradient of theta_0: -7.4471
True gradient of theta_1: -1.0253


In [12]:
# Number of samples
n = len(x1)

# Initialize parameters theta_0 and theta_1
theta_0 = 0.0
theta_1 = 0.0

# Computing the predicted values
y_pred = theta_1 * x1 + theta_0

# Computing the gradients manually
grad_theta_0 = -2/n * np.sum(y - y_pred)
grad_theta_1 = -2/n * np.sum((y - y_pred) * x1)

# Printing the calculated gradients
print(f"Gradient of theta_0 (manual calculation): {grad_theta_0:.4f}")
print(f"Gradient of theta_1 (manual calculation): {grad_theta_1:.4f}")

Gradient of theta_0 (manual calculation): -7.4471
Gradient of theta_1 (manual calculation): -1.0253


In [14]:
# Initializing parameters theta_0 and theta_1
theta_0_sgd = 0.0
theta_1_sgd = 0.0

# Storing gradients for each data point
theta_0_grads = []
theta_1_grads = []

# Looping over each data point
for i in range(n):
    # Single data point (x_i, y_i)
    x_i = x1[i]
    y_i = y[i]

    # Predicted value for the single point
    y_pred_i = theta_1_sgd * x_i + theta_0_sgd

    # Computing the gradient for this single data point
    grad_theta_0_i = -2 * (y_i - y_pred_i)
    grad_theta_1_i = -2 * (y_i - y_pred_i) * x_i

    # Storing the gradients
    theta_0_grads.append(grad_theta_0_i)
    theta_1_grads.append(grad_theta_1_i)

# Calculating  the average stochastic gradient
grad_theta_0_sgd_avg = np.mean(theta_0_grads)
grad_theta_1_sgd_avg = np.mean(theta_1_grads)

# Printing the stochastic gradient
print(f"Average stochastic gradient of theta_0: {grad_theta_0_sgd_avg:.4f}")
print(f"Average stochastic gradient of theta_1: {grad_theta_1_sgd_avg:.4f}")

Average stochastic gradient of theta_0: -7.4471
Average stochastic gradient of theta_1: -1.0253
