# Implementing gradient descent

In [89]:
%pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [90]:
import numpy as np

np.random.seed(1)  # for reproducible results

In [125]:
N = 100  # Number of samples
X = 5 * np.random.rand(N, 1)  # Feature vectors
y = 2 * X + 1 + np.random.randn(N, 1)  # Labels (slope = 2, intercept = 1)

X_b = np.c_[X, np.ones((N, 1))] # Append 1 to each feature vector

In [92]:
# Gradient descent
eta = 0.1  # learning rate
n_iters = 1000  # Number of iterations

theta = np.random.randn(2, 1)  # Random initialization of weights

for i in range(n_iters):
    grad_loss = 2/N * X_b.T @ (X_b @ theta - y)
    theta -=  eta * grad_loss

In [93]:
print(f"theta: {theta}")

theta: [[1.93698502]
 [1.23695725]]


In [94]:
theta


array([[1.93698502],
       [1.23695725]])

We can do better by checking the difference between iterations

In [152]:
# Gradient descent
eta = 0.1  # learning rate
n_iters = 1000  # Number of iterations

theta = np.random.randn(2, 1)  # Random initialization of weights
    
# Gradient descent loop
for iteration in range(n_iters):
    # Calculate gradient of the loss function
    gradient = (2 / N) * X_b.T @ (X_b @ theta - y)
    
    # Update theta
    new_theta = theta - eta * gradient

    # Check for convergence (change in theta is below tolerance)
    if np.linalg.norm(new_theta - theta, ord=2) < 0.001:
        print(f"Converged after {iteration + 1} iterations.")
        break
    
    # Update theta
    theta = new_theta
    
print("theta: ", theta)

Converged after 115 iterations.
theta:  [[1.94579929]
 [1.11810325]]


array([[1.93758384],
       [1.23502891]])

In [63]:
np.zeros_like(theta)

array([[0.],
       [0.]])