In [1]:
import pandas as pd
import numpy as np
from linear_regression_gradient import *

In [2]:
gss = pd.read_csv('../Datasets/gss.csv', low_memory=False) 
gss_2010 = gss.loc[gss["yrint"] == 2010].dropna(subset=['paeduc', 'maeduc', 'age', 'educ'])
X, y = gss_2010[['paeduc', 'maeduc', 'age']], gss_2010['educ']

In [3]:
gradient_model = LinearRegressionGradient(learning_rate=0.0005, n_iters=1000000).fit(X.values.reshape(-1,len(X.columns)),np.array(y))

print(f"Learned bias (β̂₀): {gradient_model.intercept:.4f}")
print(f"Learned coefficient (β̂₁): {gradient_model.coefficients[0]:.4f}")
print(f"Learned coefficient (β̂₁): {gradient_model.coefficients[1]:.4f}")
print(f"Learned coefficient (β̂₁): {gradient_model.coefficients[2]:.4f}")

Learned bias (β̂₀): 7.3128
Learned coefficient (β̂₁): 0.2146
Learned coefficient (β̂₁): 0.2573
Learned coefficient (β̂₁): 0.0242


In [4]:
def predict():
    X_test = np.array([[0,0,0]])
    y_pred = gradient_model.predict(X_test)

    print("Predictions:")
    for (x, pred) in zip(X_test.flatten(), y_pred):
        print(f"X = {x:4.1f} → ŷ = {pred:.3f}")

predict()

Predictions:
X =  0.0 → ŷ = 7.313


In [5]:
def gradient_stats():
    predictions = gradient_model.predict(X.values.reshape(-1, len(gradient_model.coefficients)))
    residuals = (y-predictions)
    degrees_freedom = len(residuals) - (len(gradient_model.coefficients) + 1)

    SSR = np.sum(residuals**2)                           
    TSS = np.sum((y - np.mean(y))**2)                   
    ESS = np.sum((predictions - np.mean(y))**2)  

    r_squared_method1 = 1 - (SSR / TSS)
    r_squared_method2 = ESS / TSS

    MSE = SSR / len(residuals)
    RMSE = np.sqrt(MSE)

    MSE_DF = SSR / degrees_freedom
    RMSE_DF = np.sqrt(MSE_DF)

    print(f"Sum of Squared Residuals: {SSR:.4f}")
    print(f"Total Sum of Squares: {TSS:.4f}")
    print(f"Explained Sum of Squares: {ESS:.4f}\n")

    print(f"Mean Squared Error: {MSE:.4f}")
    print(f"Root Mean Squared Error: {RMSE:.4f}")

    print(f"Mean Squared Error 2 df: {MSE_DF:.4f}")
    print(f"Root Mean Squared Error 2 df: {RMSE_DF:.4f}\n")

    print(f"R² (method 1): {r_squared_method1:.4f}")
    print(f"R² (method 2): {r_squared_method2:.4f}")

gradient_stats()

Sum of Squared Residuals: 9893.7355
Total Sum of Squares: 13663.2696
Explained Sum of Squares: 3778.4646

Mean Squared Error: 7.0569
Root Mean Squared Error: 2.6565
Mean Squared Error 2 df: 7.0771
Root Mean Squared Error 2 df: 2.6603

R² (method 1): 0.2759
R² (method 2): 0.2765
