In [None]:
import numpy as np

def load_data(x_file, y_file):
    # Load the data from CSV files
    X = np.loadtxt(x_file, delimiter=',')
    y = np.loadtxt(y_file, delimiter=',')
    return X, y

def compute_rss(X, y, beta):
    # Compute the residual sum of squares (RSS)
    residuals = y - np.dot(X, beta)
    return np.sum(residuals ** 2)

def gradient_descent(X, y, alpha, max_iter=10000, tolerance=1e-8):
    # Initialize beta as a zero vector
    n_features = X.shape
    beta = np.zeros(n_features)
    
    # Initialize previous RSS for convergence check
    prev_rss = float('inf')
    
    for iteration in range(max_iter):
        # Calculate the gradient
        predictions = np.dot(X, beta)
        residuals = y - predictions
        gradient = -2 * np.dot(X.T, residuals)
        
        # Update beta
        beta = beta - alpha * gradient
        
        # Compute the current RSS
        current_rss = compute_rss(X, y, beta)
        
        # Check for convergence
        if abs(prev_rss - current_rss) < tolerance:
            print(f"Converged after {iteration + 1} iterations.")
            break
        
        # Update previous RSS
        prev_rss = current_rss
    
    return beta

# Load the data
X, y = load_data('X.csv', 'Y.csv')

# Set the step size
alpha = 0.01

# Perform gradient descent
beta_optimal = gradient_descent(X, y, alpha)

# Display the results
print("Optimal beta coefficients:")
print(beta_optimal)
