In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Cost Function
def cost_function(X, Y, W):
    """
    Computes the Mean Squared Error (MSE).
    """
    m = len(Y)
    predictions = np.dot(X, W)
    cost = (1 / (2 * m)) * np.sum((predictions - Y) ** 2)
    return cost

# Gradient Descent
def gradient_descent(X, Y, W, alpha, iterations):
    """
    Performs gradient descent to optimize the weights.
    """
    m = len(Y)
    cost_history = []

    for _ in range(iterations):
        predictions = np.dot(X, W)
        loss = predictions - Y
        gradient = (1 / m) * np.dot(X.T, loss)
        W = W - alpha * gradient
        cost = cost_function(X, Y, W)
        cost_history.append(cost)

    return W, cost_history

# RMSE Calculation
def rmse(Y, Y_pred):
    """
    Computes Root Mean Squared Error.
    """
    return np.sqrt(np.mean((Y - Y_pred) ** 2))

# R-Squared Calculation
def r2(Y, Y_pred):
    """
    Computes the R-squared value.
    """
    ss_tot = np.sum((Y - np.mean(Y)) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    return 1 - (ss_res / ss_tot)

# Main Function
def main():
    data = pd.read_csv('/content/drive/MyDrive/student.csv')
    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    X = np.hstack((np.ones((X.shape[0], 1)), X))

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    W = np.zeros(X_train.shape[1])
    alpha = 0.0001
    iterations = 1000

    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    Y_pred = np.dot(X_test, W_optimal)

    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    print("Final Weights:", W_optimal)
    print("Cost History (First 10 iterations):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)

if __name__ == "__main__":
    main()

Final Weights: [0.00221962 0.08948349 0.89502744]
Cost History (First 10 iterations): [17.80549271459724, 16.983188130762354, 16.925187801661103, 16.867921576632344, 16.81114744297991, 16.75486109935263, 16.699058354723682, 16.643735054097604, 16.588887078177933, 16.53451034306046]
RMSE on Test Set: 4.79263247324407
R-Squared on Test Set: 0.908239378711656
