In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error
from sklearn.model_selection import cross_val_predict, KFold

In [2]:
df = pd.read_csv('../dataset/data.csv')
data = df.select_dtypes(include=['float64', 'int64'])
targets = ["Turbidity", "DO", "fChl"]
data.columns

Index(['Turbidity', 'DO', 'fChl', 'Discharge', 'Height', 'WT', 'B1', 'B2',
       'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'WVP',
       'MNDWI', 'GNDVI', 'SDDI', 'NDTI', 'BR', 'NDWI', 'NDPI', 'NDCI',
       '2BDA_Chl', 'RR'],
      dtype='object')

In [3]:
model_dirs = {
    "rf": "../models/rf/cross",
    "mlp": "../models/mlp/cross",
    "rfmlp": "../models/rfmlp",
    "rfmlp": "../models/rfmlp",
}

In [4]:
models = {}
for model_name, folder in model_dirs.items():
    if os.path.exists(folder):
        for file in os.listdir(folder):
            if file.endswith(".pkl"):
                bo = "bo" if "bo" in file else ""
                target = file.replace(".pkl", "").replace("bo", "").strip("_")   
                path = os.path.join(folder, file)
                with open(path, 'rb') as f:
                    models[f"{bo}{model_name}_{target}"] = pickle.load(f)
    else:
        print(f"Folder not found: {folder}")

print(f"Loaded {len(models)} models.")
print("Available models:", list(models.keys()))

Loaded 12 models.
Available models: ['rf_DO', 'rf_fChl', 'rf_Turbidity', 'mlp_DO', 'mlp_fChl', 'mlp_Turbidity', 'borfmlp_DO', 'borfmlp_fChl', 'borfmlp_Turbidity', 'rfmlp_DO', 'rfmlp_fChl', 'rfmlp_Turbidity']


In [5]:
results = []
predictions = {}
kf = KFold(n_splits=5, shuffle=True, random_state=42)

save_dir = "../dataset/predictions"
os.makedirs(save_dir, exist_ok=True)

In [6]:
for name, model in models.items():
    target = next((t for t in targets if t in name), None)
    if target is None:
        continue

    X = data.drop(targets, axis=1)
    y = data[target]
    
    y_pred = cross_val_predict(model, X, y, cv=kf, n_jobs=-1)

    if target not in predictions:
        predictions[target] = pd.DataFrame({target: y})
    predictions[target][name] = y_pred

In [7]:
for target, df_pred in predictions.items():
    csv_path = os.path.join(save_dir, f"{target}.csv")
    df_pred.to_csv(csv_path, index=False)
    print(f"Saved predictions for {target}: {csv_path}")

Saved predictions for DO: ../dataset/predictions\DO.csv
Saved predictions for fChl: ../dataset/predictions\fChl.csv
Saved predictions for Turbidity: ../dataset/predictions\Turbidity.csv
