In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score
import pickle
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('../dataset/data.csv')
data = df.select_dtypes(include=['float64', 'int64'])
targets = ["Turbidity", "DO", "Chl-a"]
data.columns

Index(['Turbidity', 'DO', 'Chl-a', 'Discharge', 'Height', 'Temperature', 'B1',
       'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12',
       'WVP', 'MNDWI', 'GNDVI', 'SDDI', 'NDTI', 'BR', 'NDWI', 'NDPI', 'NDCI',
       '2BDA_Chl', 'RR'],
      dtype='object')

In [3]:
def mlp(data, target):
    X = data.drop(target, axis=1)
    y = data[target]

    model = Pipeline([
        ('scaler', StandardScaler()),
        ('mlp', MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu',
                             solver='adam', max_iter=2000, random_state=42))
    ])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    model.fit(X_train, y_train)

    os.makedirs("../models/mlp/split", exist_ok=True)
    save_path = f"../models/mlp/split/{target}.pkl"
    with open(save_path, 'wb') as file:
        pickle.dump(model, file)
    print(f"Model saved to {save_path}")

    y_pred = model.predict(X_test)
    mae = np.around(mean_absolute_error(y_test, y_pred), 2)
    rmse = np.around(root_mean_squared_error(y_test, y_pred), 2)
    r2 = np.around(r2_score(y_test, y_pred) * 100, 2)
    mbe = np.around(np.mean(y_pred - y_test), 2)

    print(f"Performance for {target} (Split):")
    print(f"MAE  = {mae}")
    print(f"RMSE = {rmse}")
    print(f"R²   = {r2} %")
    print(f"MBE  = {mbe}")

In [4]:
for target in targets:
    mlp(
        data.drop([col for col in targets if col != target], axis=1),
        target
    )
    print("="*40)

Model saved to ../models/mlp/split/Turbidity.pkl
Performance for Turbidity (Split):
MAE  = 4.92
RMSE = 7.72
R²   = 94.06 %
MBE  = -0.98
Model saved to ../models/mlp/split/DO.pkl
Performance for DO (Split):
MAE  = 1.0
RMSE = 1.4
R²   = 61.29 %
MBE  = -0.19
Model saved to ../models/mlp/split/Chl-a.pkl
Performance for Chl-a (Split):
MAE  = 1.32
RMSE = 1.6
R²   = 89.01 %
MBE  = -0.4


In [5]:
def cvmlp(data, target):
    X = data.drop(target, axis=1)
    y = data[target]

    model = Pipeline([
        ('scaler', StandardScaler()),
        ('mlp', MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu',
                             solver='adam', max_iter=500, random_state=42))
    ])

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    y_pred = cross_val_predict(model, X, y, cv=kf, n_jobs=-1)
    model.fit(X, y)

    os.makedirs("../models/mlp/cross", exist_ok=True)
    save_path = f"../models/mlp/cross/{target}.pkl"
    with open(save_path, 'wb') as file:
        pickle.dump(model, file)
    print(f"Model saved to {save_path}")

    mae = np.around(mean_absolute_error(y, y_pred), 2)
    rmse = np.around(root_mean_squared_error(y, y_pred), 2)
    r2 = np.around(r2_score(y, y_pred) * 100, 2)
    mbe = np.around(np.mean(y_pred - y), 2)

    print(f"Performance for {target} (5-Fold CV):")
    print(f"MAE  = {mae}")
    print(f"RMSE = {rmse}")
    print(f"R²   = {r2} %")
    print(f"MBE  = {mbe}")

In [6]:
for target in targets:
    cvmlp(
        data.drop([col for col in targets if col != target], axis=1),
        target
    )
    print("="*40)

Model saved to ../models/mlp/cross/Turbidity.pkl
Performance for Turbidity (5-Fold CV):
MAE  = 6.61
RMSE = 9.25
R²   = 93.09 %
MBE  = 0.29
Model saved to ../models/mlp/cross/DO.pkl
Performance for DO (5-Fold CV):
MAE  = 1.2
RMSE = 1.67
R²   = 33.38 %
MBE  = 0.06
Model saved to ../models/mlp/cross/Chl-a.pkl
Performance for Chl-a (5-Fold CV):
MAE  = 1.7
RMSE = 2.41
R²   = 73.05 %
MBE  = 0.11
