In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('../dataset/data.csv')
data = df.select_dtypes(include=['float64', 'int64'])
targets = ["Turbidity", "DO", "Chl-a"]
data.columns

Index(['Turbidity', 'DO', 'Chl-a', 'Discharge', 'Height', 'Temperature', 'B1',
       'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12',
       'WVP', 'MNDWI', 'GNDVI', 'SDDI', 'NDTI', 'BR', 'NDWI', 'NDPI', 'NDCI',
       '2BDA_Chl', 'RR'],
      dtype='object')

In [3]:
def hybrid_model(data, target, model_a_name, model_b_name):
    """Train and evaluate a hybrid stacking regressor (5-Fold CV)."""

    X = data.drop(target, axis=1)
    y = data[target]

    # === Define base models ===
    models = {
        "rf": RandomForestRegressor(random_state=42, n_jobs=-1),
        "xgb": XGBRegressor(objective='reg:squarederror', random_state=42, n_jobs=-1),
        "svr": Pipeline([('scaler', StandardScaler()), ('svr', SVR())]),
        "mlp": Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(random_state=42, max_iter=2000))])
    }

    model_a = models[model_a_name]
    model_b = models[model_b_name]

    # === Create stacking regressor ===
    stack = StackingRegressor(
        estimators=[(model_a_name, model_a), (model_b_name, model_b)],
        final_estimator=RidgeCV(),
        n_jobs=-1
    )

    # === Cross-validation ===
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    y_pred = cross_val_predict(stack, X, y, cv=kf, n_jobs=-1)

    # === Fit on full data (so we can save the trained hybrid) ===
    stack.fit(X, y)

    # === Save model ===
    os.makedirs("../models/hybrid", exist_ok=True)
    save_path = f"../models/hybrid/{model_a_name}{model_b_name}_{target}.pkl"
    with open(save_path, "wb") as file:
        pickle.dump(stack, file)
    print(f"âœ… Model saved to {save_path}")

    # === Metrics ===
    mae = np.around(mean_absolute_error(y, y_pred), 2)
    rmse = np.around(np.sqrt(mean_squared_error(y, y_pred)), 2)
    r2 = np.around(r2_score(y, y_pred) * 100, 2)
    mbe = np.around(np.mean(y_pred - y), 2)

    print(f"\nðŸ“Š Hybrid {model_a_name}â€“{model_b_name} Performance (5-Fold CV):")
    print(f"MAE  = {mae}")
    print(f"RMSE = {rmse}")
    print(f"RÂ²   = {r2} %")
    print(f"MBE  = {mbe}")

In [4]:
targets = ["Turbidity", "DO", "Chl-a"]

pairs = [
    ("rf", "xgb"),
    ("rf", "svr"),
    ("rf", "mlp"),
    ("xgb", "svr"),
    ("xgb", "mlp"),
    ("svr", "mlp")
]

for target in targets:
    print("========== " + target + " ==========")
    for a, b in pairs:
        hybrid_model(data.drop([col for col in targets if col != target], axis=1), target, a, b)
    print("="*40)

âœ… Model saved to ../models/hybrid/rfxgb_Turbidity.pkl

ðŸ“Š Hybrid rfâ€“xgb Performance (5-Fold CV):
MAE  = 8.65
RMSE = 11.78
RÂ²   = 88.8 %
MBE  = -1.04
âœ… Model saved to ../models/hybrid/rfsvr_Turbidity.pkl

ðŸ“Š Hybrid rfâ€“svr Performance (5-Fold CV):
MAE  = 9.15
RMSE = 12.82
RÂ²   = 86.73 %
MBE  = -2.14
âœ… Model saved to ../models/hybrid/rfmlp_Turbidity.pkl

ðŸ“Š Hybrid rfâ€“mlp Performance (5-Fold CV):
MAE  = 5.9
RMSE = 8.11
RÂ²   = 94.7 %
MBE  = -0.87
âœ… Model saved to ../models/hybrid/xgbsvr_Turbidity.pkl

ðŸ“Š Hybrid xgbâ€“svr Performance (5-Fold CV):
MAE  = 8.24
RMSE = 11.43
RÂ²   = 89.45 %
MBE  = 0.77
âœ… Model saved to ../models/hybrid/xgbmlp_Turbidity.pkl

ðŸ“Š Hybrid xgbâ€“mlp Performance (5-Fold CV):
MAE  = 5.7
RMSE = 7.97
RÂ²   = 94.87 %
MBE  = -0.55
âœ… Model saved to ../models/hybrid/svrmlp_Turbidity.pkl

ðŸ“Š Hybrid svrâ€“mlp Performance (5-Fold CV):
MAE  = 6.19
RMSE = 8.49
RÂ²   = 94.18 %
MBE  = -0.82
âœ… Model saved to ../models/hybrid/rfxgb_DO.pkl

ðŸ“Š Hybri