In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error

# Load cleaned data
df = pd.read_csv("../data/clean_guardian.csv")
features = ['Teaching','Satisfied with feedback','Continuation',
            'Student to staff ratio','Career after 15 months','Spend per student/10']
target = 'Overall'

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(df[features])
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
ols = LinearRegression()
rf = RandomForestRegressor(n_estimators=200, random_state=42)
gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, random_state=42)

models = {"OLS": ols, "RF": rf, "GBR": gbr}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} R²: {r2_score(y_test, y_pred):.3f}")
    print(f"{name} MAE: {mean_absolute_error(y_test, y_pred):.3f}")
