In [1]:
import numpy as np

# 1. Data
# Simple dataset with one feature (x) and target (y)
X = np.array([[1], [2], [3], [4], [5]])  # Feature
y = np.array([[3], [6], [9], [12], [15]])  # Target (y = 3x)

# Add bias term (intercept)
X = np.hstack((np.ones((X.shape[0], 1)), X))  # Adding a column of ones for theta_0

# 2. Initialize parameters
m = X.shape[0]  # Number of samples
n = X.shape[1]  # Number of features (including bias)
theta = np.zeros((n, 1))  # Initial parameters (weights)

# 3. Cost function
def compute_cost(X, y, theta):
    y_pred = X @ theta  # Matrix multiplication
    cost = (1 / (2 * m)) * np.sum((y_pred - y) ** 2)  # Mean squared error
    return cost

# 4. Gradient descent
def gradient_descent(X, y, theta, alpha, iterations):
    for i in range(iterations):
        y_pred = X @ theta
        gradients = (1 / m) * (X.T @ (y_pred - y))  # Compute gradients
        theta -= alpha * gradients  # Update parameters
        
        # Optional: print the cost every 100 iterations
        if i % 100 == 0:
            print(f"Iteration {i}: Cost = {compute_cost(X, y, theta):.4f}")
    return theta

# 5. Training the model
alpha = 0.01  # Learning rate
iterations = 1000
theta = gradient_descent(X, y, theta, alpha, iterations)

# Final parameters
print("Final parameters (theta):", theta)

# 6. Predictions

y_pred = X @ theta  # Make predictions
print("Predictions:", y_pred.flatten())


Iteration 0: Cost = 38.4921
Iteration 100: Cost = 0.0385
Iteration 200: Cost = 0.0275
Iteration 300: Cost = 0.0196
Iteration 400: Cost = 0.0140
Iteration 500: Cost = 0.0099
Iteration 600: Cost = 0.0071
Iteration 700: Cost = 0.0051
Iteration 800: Cost = 0.0036
Iteration 900: Cost = 0.0026
Final parameters (theta): [[0.14212982]
 [2.96063232]]
Predictions: [ 3.10276214  6.06339446  9.02402678 11.9846591  14.94529141]
