### Designing a synthetic Linear Regression Dataset and implementing Linear Regression

In [51]:
import numpy as np

def generate_regression_data(m=200, n = 5, noise_std=0.1, seed=42):
    np.random.seed(seed)

    #feature matrix 
    X = np.random.randn(m, n)

    # Adding the bias column
    X = np.hstack([np.ones((m, 1)), X]) # cause X0 = 1

    theta = np.zeros(X.shape[1])

    # The true parameter (including the bias)
    theta_true = np.array([2.0, 3.0, -2.0, 0.5, 0.0, 1.0])[:n+1]

    # Noise
    noise = np.random.randn(m) * noise_std

    # Generating the target
    # X @ theta_true is the matrix-vector product
    # Adding the noise produces the final target y
    y = X @ theta_true + noise

    return X, y, theta_true, theta

In [52]:
X, y, theta_true, theta = generate_regression_data();

In [53]:
# def compute_cost(X, y, theta):

#     m = len(y)

#     y_hat = X @ theta

#     error_squared = (y_hat - y) ** 2

#     total_error = np.sum(error_squared)

#     J = (1/(2 * m))* total_error

#     return J

In [None]:
def compute_gradient_descent(X, y, theta, learning_rate):
    m = len(y)

    y_hat = X @ theta

    residue = y_hat - y

    grad = (1/m) * (X.T @ residue)

    theta = theta - (learning_rate * grad)

    return theta

In [59]:
def compute_gradient_descent_loop (X, y, theta, learning_rate, n_iterations):
    for i in range(n_iterations):
        theta = compute_gradient_descent(X, y, theta, learning_rate)

    return theta

In [81]:
theta_final = compute_gradient_descent_loop(X=X , y=y , theta=theta, learning_rate=0.01, n_iterations = 1000)

In [82]:
theta_true

array([ 2. ,  3. , -2. ,  0.5,  0. ,  1. ])

In [83]:
theta_final

array([ 2.01269579,  2.99448527, -1.99340134,  0.50147041,  0.00538474,
        1.00406424])