In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Conv1D, GlobalAveragePooling1D, 
                                     ReLU, Multiply, LSTM, Dense, BatchNormalization, Dropout)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from tabulate import tabulate
import matplotlib.pyplot as plt

In [None]:
# Load CSV
data_path = 'plant_health_datawithnomalization.csv'
data = pd.read_csv(data_path)

# Shuffle data
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Features and Target
X = data[['Soil_Moisture', 'Ambient_Temperature', 'Soil_Temperature', 'Humidity',
          'Light_Intensity', 'Soil_pH', 'Nitrogen_Level', 'Phosphorus_Level',
          'Potassium_Level', 'Chlorophyll_Content', 'Electrochemical_Signal']].values
y = data['Plant_Health_Status'].values

# Reshape X ให้เหมาะกับ Conv1D
X = X.reshape((X.shape[0], X.shape[1], 1))


In [None]:
def create_model(learning_rate):
    input_seq = Input(shape=(X.shape[1], 1))
    conv_1 = Conv1D(filters=64, kernel_size=3, padding='same')(input_seq)
    conv_1 = BatchNormalization()(conv_1)
    relu_1 = ReLU()(conv_1)
    dropout_1 = Dropout(0.3)(relu_1)
    
    conv_2 = Conv1D(filters=64, kernel_size=3, padding='same')(dropout_1)
    conv_2 = BatchNormalization()(conv_2)
    relu_2 = ReLU()(conv_2)
    dropout_2 = Dropout(0.3)(relu_2)
    
    global_pool = GlobalAveragePooling1D()(conv_1)
    fc_2 = Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(0.01))(global_pool)
    dropout_3 = Dropout(0.3)(fc_2)
    
    fc_3 = Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(0.01))(dropout_3)
    sigmoid = Dense(64, activation='sigmoid')(fc_3)
    
    multiply = Multiply()([dropout_2, sigmoid])
    
    lstm = LSTM(128, return_sequences=True)(multiply)
    for _ in range(2):  
        lstm = LSTM(128, return_sequences=True)(lstm)
    
    lstm = LSTM(128, return_sequences=False)(lstm)  
    fc = Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(0.01))(lstm)
    dropout_4 = Dropout(0.3)(fc)  
    output = Dense(1)(dropout_4)

    model = Model(inputs=input_seq, outputs=output)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')

    return model

In [None]:
def compute_average_loss(history_all_folds):
    min_epochs = min(len(loss) for loss in history_all_folds['loss'])
    avg_loss = np.zeros(min_epochs)
    avg_val_loss = np.zeros(min_epochs)
    
    for loss, val_loss in zip(history_all_folds['loss'], history_all_folds['val_loss']):
        interp_loss = np.interp(np.linspace(0, len(loss)-1, min_epochs), np.arange(len(loss)), loss)
        interp_val_loss = np.interp(np.linspace(0, len(val_loss)-1, min_epochs), np.arange(len(val_loss)), val_loss)
        avg_loss += interp_loss
        avg_val_loss += interp_val_loss

    avg_loss /= len(history_all_folds['loss'])
    avg_val_loss /= len(history_all_folds['val_loss'])
    
    return avg_loss, avg_val_loss

In [None]:
def train_kfold(learning_rate, k=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    results = []
    history_all_folds = {"loss": [], "val_loss": []}
    
    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        
        model = create_model(learning_rate)
        early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-6)

        history = model.fit(
            X_train, y_train,
            epochs=200,
            batch_size=8,
            validation_data=(X_val, y_val),
            verbose=1,
            callbacks=[early_stopping, reduce_lr]
        )

        history_all_folds['loss'].append(history.history['loss'])
        history_all_folds['val_loss'].append(history.history['val_loss'])
        
        y_pred_train = model.predict(X_train)
        y_pred_val = model.predict(X_val)

        # Compute MBE
        mbe_train = np.mean(y_train - y_pred_train)
        mbe_val = np.mean(y_val - y_pred_val)

        metrics = [
            learning_rate,
            r2_score(y_val, y_pred_val), mean_absolute_error(y_val, y_pred_val),
            mbe_val, np.sqrt(mean_squared_error(y_val, y_pred_val)),
            r2_score(y_train, y_pred_train), mean_absolute_error(y_train, y_pred_train),
            mbe_train, np.sqrt(mean_squared_error(y_train, y_pred_train))
        ]
        results.append(metrics)

    return results, history_all_folds

In [None]:
learning_rates = [0.001]
k = 5
all_results = []

for lr in learning_rates:
    results, history_all_folds = train_kfold(lr, k)
    all_results.extend(results)

history_avg_loss, history_avg_val_loss = compute_average_loss(history_all_folds)

plt.figure(figsize=(8, 5))
plt.plot(history_avg_loss, label='Avg Training Loss')
plt.plot(history_avg_val_loss, label='Avg Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title(f'Average Learning Curve Across {k} Folds (LR={learning_rates[0]})')
plt.legend()
plt.grid()
plt.show()

headers = ["Learning Rate", "Test R²", "Test MAE", "Test MBE", "Test RMSE",
           "Train R²", "Train MAE", "Train MBE", "Train RMSE"]
print(tabulate(all_results, headers=headers, tablefmt="grid"))
df_results = pd.DataFrame(all_results, columns=headers)