In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.ensemble import GradientBoostingRegressor

# -----------------------------------
# Example: assume df already exists
# target: burnout_score
# -----------------------------------

df = pd.read_csv("clean.csv")

X = df.drop(columns=['burnout_score'])
y = df['burnout_score']

# if categorical features exist
X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

gbm = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    min_samples_leaf=5,
    subsample=0.8,
    max_features=None,
    loss='squared_error',
    random_state=42
)

gbm.fit(X_train, y_train)

y_train_pred = gbm.predict(X_train)
y_test_pred = gbm.predict(X_test)

print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred)))
print("Test  RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred)))
print("Test R2:", r2_score(y_test, y_test_pred))

Train RMSE: 5.097119977958092
Test  RMSE: 6.02591775223503
Test R2: 0.9356334888334641
