# Import Libraries

In [None]:
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import os
import IPython
import IPython.display
import tensorflow as tf
from keras.models import Model
import keras.layers as kl
import keras.activations as ka
import gc
import warnings
warnings.filterwarnings('ignore')

In [None]:
random_seed = 42
SKIP_TIMESTEPS = 20
FORECAST_WINDOW = 20
FORECAST_SHIFT = 10
CONV_WIDTH = 5
TARGET_LABELS = ["ph", "temperature", "disolved_oxg"]

In [None]:
random.seed(random_seed)
np.random.seed(random_seed)
tf.random.set_seed(random_seed)

In [None]:
f_path = "/kaggle/input/cleaned-aquaponics-pond-dataset/"
ponds = os.listdir(f_path)[:]
print(ponds)

# Loading and feature correcting Data

In [None]:
def load_correct_data(ponds=ponds, skip_timesteps=SKIP_TIMESTEPS):
    data = []
    date_times = []
    used_ponds = []
    unused_ponds = []

    #loading data
    for pond in ponds:
        try:
            df = pd.read_csv(f_path + pond)
            df = df[::skip_timesteps]
            df["created_at"] = df["created_at"].apply(pd.to_datetime)
            date_time1 = pd.to_datetime(df.pop("created_at"), format="%Y-%m-%d %H:%M:%S")
            df.pop('population')
            df.pop('entry_id')
            for col in df.columns:
                col_mean = df[col].mean()
                df[col] = df[col].fillna(col_mean)
                if 'Unnamed' in col:
                    df.pop(col)

            data.append(df)
            date_times.append(date_time1)
            used_ponds.append(pond)
            IPython.display.clear_output()
        except:
            # print("\n\nERROR at POND: ", pond)
            unused_ponds.append(pond)

    #correcting data
    for df in data:
        df['temperature'].loc[df['temperature'] < 20] = 20
        df['ph'].loc[df['ph'] > 12] = 12
        df['ph'].loc[df['ph'] < 5] = 5
        df['ammonia'].loc[df['ammonia'] > 10] = 10
        df['nitrate'].loc[df['nitrate'] > 2000] = 2000
        IPython.display.clear_output()

    return (data, date_times, used_ponds, unused_ponds)

# Standarizing and Normalizing data

In [None]:
def standarize_normalize(df):
    df_cols = df.columns
    standarizer = StandardScaler()
    normalizer = MinMaxScaler()

    df = normalizer.fit_transform(df)
    df = standarizer.fit_transform(df)

    df = pd.DataFrame(df, columns=df_cols)

    return df, normalizer, standarizer

In [None]:
def compute_metrics(df):
    df_mean = df.mean()
    df_std = df.std()
    df_max = df.max()
    df_min = df.min()
    return df_mean, df_std, df_max, df_min

In [None]:
def destandarize_denormalize(df, transformations):
    normalizer, standarizer = transformations
    df_cols = df.columns
    df = standarizer.inverse_transform(df)
    df = normalizer.inverse_transform(df)
    df = pd.DataFrame(df, columns=df_cols)

    return df

# Visualization tools

In [None]:
def visualize_feature(feature_idx, data, date_times, used_ponds):

    fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16, 10))
    for i, df in enumerate(data):
        row_idx = i // 2
        col_idx = i % 2

        test_feature = df.columns[feature_idx]
        treshold0 = np.percentile(df[test_feature], 20)
        treshold1 = np.percentile(df[test_feature], 50)
        treshold2 = np.percentile(df[test_feature], 75)
        treshold3 = np.percentile(df[test_feature], 90)
        axs[row_idx, col_idx].plot(date_times[i], df[test_feature])
        axs[row_idx, col_idx].axhline(treshold0, color="black")
        axs[row_idx, col_idx].axhline(treshold1, color="green")
        axs[row_idx, col_idx].axhline(treshold2, color="yellow")
        axs[row_idx, col_idx].axhline(treshold3, color="red")
        axs[row_idx, col_idx].set_title(used_ponds[i] + f"[{treshold0:.2f} {treshold3:.2f}]")

        fig.suptitle(test_feature)
    plt.show()

In [None]:
def visualize_df(df_idx, data, date_times):
    df = data[df_idx]
    date_time = date_times[df_idx]
    plot_cols = df.columns

    colors = ['blue', 'red', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta']

    plot_features = df[plot_cols]
    plot_features.index = date_time

    fig, axs = plt.subplots(ncols=2, nrows=4, figsize=(15, 10))
    fig.subplots_adjust(wspace=0.1, hspace=0.2)
    for i, col in enumerate(plot_features):
        row_idx = i // 2
        col_idx = i % 2
        axs[row_idx, col_idx].plot(plot_features[col], color=colors[i], label=col)
        axs[row_idx, col_idx].tick_params(axis='x', labelsize=7)
        axs[row_idx, col_idx].legend()

    plt.show()

In [None]:
def plot_history(history):
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.legend(['Training', 'Evaluation'])
    plt.title("Loss")
    plt.show()

# splitting the data

In [None]:
def split_data(data, train_ratio=0.8, val_ratio=0.9):
    train_data = []
    val_data = []
    test_data = []
    for df in data:
        n = len(df)
        train_data.append(df[0:int(n*train_ratio)])
        val_data.append(df[int(n*train_ratio):int(n*val_ratio)])
        test_data.append(df[int(n*val_ratio):])

    return train_data, val_data, test_data

# Window Maker Class

In [None]:
#from TensorFlow
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_data, val_data, test_data,
               label_columns=None):
    # Store the raw data.
    self.train_data = train_data
    self.val_data = val_data
    self.test_data = test_data

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_data[0].columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

  def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
      labels = tf.stack(
          [labels[:, :, self.column_indices[name]] for name in self.label_columns],
          axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

  def plot(self, model=None, plot_col=TARGET_LABELS[0], max_subplots=3):
    inputs, labels = self.example
    plt.figure(figsize=(12, 8))
    plot_col_index = self.column_indices[plot_col]
    max_n = min(max_subplots, len(inputs))
    for n in range(max_n):
      plt.subplot(max_n, 1, n+1)
      plt.ylabel(f'{plot_col} [normed]')
      plt.plot(self.input_indices, inputs[n, :, plot_col_index],
              label='Inputs', marker='.', zorder=-10)

      if self.label_columns:
        label_col_index = self.label_columns_indices.get(plot_col, None)
      else:
        label_col_index = plot_col_index

      if label_col_index is None:
        continue


      if model is not None:
        predictions = model(inputs)
        plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                    marker='X', edgecolors='k', label='Predictions',
                    c='#ff7f0e', s=64)
      plt.scatter(self.label_indices, labels[n, :, label_col_index],
                  edgecolors='k', label='Labels', c='#2ca02c', s=64)
      #plt.ylim((-3, 3))

      if n == 0:
        plt.legend()

    plt.xlabel(plot_col)



  def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)

    ds = ds.map(self.split_window)

    return ds

  @property
  def train(self):
    full_dataset = None
    for i in range(len(self.train_data)):
      if i == 0:
        full_dataset = self.make_dataset(self.train_data[0])
      else:
        full_dataset = full_dataset.concatenate( self.make_dataset(self.train_data[i]))

    return full_dataset

  @property
  def val(self):
    full_dataset = None
    for i in range(len(self.val_data)):
      if i == 0:
        full_dataset = self.make_dataset(self.val_data[0])
      else:
        full_dataset = full_dataset.concatenate( self.make_dataset(self.val_data[i]))

    return full_dataset

  @property
  def test(self):
    full_dataset = None
    for i in range(len(self.test_data)):
      if i == 0:
        full_dataset = self.make_dataset(self.test_data[0])
      else:
        full_dataset = full_dataset.concatenate( self.make_dataset(self.test_data[i]))

    return full_dataset

  @property
  def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
      # No example batch was found, so get one from the `.train` dataset
      result = next(iter(self.train))
      # And cache it for next time
      self._example = result
    return result


# Pipeline

In [None]:
#loading and preprocessing the data the data
data, date_times, used_ponds, unused_ponds = load_correct_data()

In [None]:
print(used_ponds)

In [None]:
data[0].head()

In [None]:
#normalizing and standarizing the data based on whole dataset
full_data = pd.concat(data)
full_data.describe().transpose()


In [None]:
full_data, normalizer, standarizer = standarize_normalize(full_data)
full_data.describe().transpose()

## normalizing and standarizing each dataset

In [None]:
for i in range(len(data)):
    df_cols = data[i].columns
    data[i] = normalizer.transform(data[i])
    data[i] = standarizer.transform(data[i])

    data[i] = pd.DataFrame(data[i], columns=df_cols)

In [None]:
data[0].head()

In [None]:
#testing getting data back to original
# for i, df in enumerate(data):
#     data[i] = destandarize_denormalize(df, transformations[i])

# data[0].head()

In [None]:
#visualizing a dataframe
visualize_df(3, data, date_times)

In [None]:
#splitting the dataset into train, val, test sets
train_data, val_data, test_data = split_data(data)

In [None]:
column_indices = {name: i for i, name in enumerate(data[0].columns)}
column_indices

In [None]:
del data
del date_times
gc.collect()

# Creating Conv Model

In [None]:
#generating a window
window_cnn = WindowGenerator(input_width=FORECAST_WINDOW+2, label_width=FORECAST_WINDOW, shift=FORECAST_SHIFT,
                             train_data=train_data, val_data=val_data, test_data=test_data, label_columns=TARGET_LABELS)
window_cnn

In [None]:
for example_inputs, example_labels in window_cnn.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')

In [None]:
def create_conv_model(input_shape=(FORECAST_WINDOW+2, 8)):
    inputs = kl.Input(shape=input_shape)
    conv1 = kl.Conv1D(filters=64, kernel_size=CONV_WIDTH, strides=1, activation="selu", padding="same")(inputs)
    pool1 = kl.MaxPool1D(pool_size=3, strides=1)(conv1)
    conv3 = kl.Conv1D(filters=128, kernel_size=3, strides=1, activation="selu", padding="same")(pool1)
    dense1 = kl.Dense(units=64)(conv3)
    dense1 = kl.PReLU()(dense1)
    dense1 = kl.Dropout(rate=0.3)(dense1)
    dense2 = kl.Dense(units=16)(dense1)
    dense2 = kl.PReLU()(dense2)
    dense2 = kl.Dropout(rate=0.3)(dense2)
    outputs = kl.Dense(units=len(TARGET_LABELS))(dense2)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.MeanAbsoluteError()])

    return model

In [None]:
conv_model = create_conv_model()
conv_model.summary()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=2)
history = conv_model.fit(window_cnn.train, epochs=10, validation_data=window_cnn.val, callbacks=[reduce_lr])

In [None]:
plot_history(history.history)

In [None]:
window_cnn.plot(conv_model, plot_col=TARGET_LABELS[2])

In [None]:
val_MSE_cnn = conv_model.evaluate(window_cnn.val)[0]
test_MSE_cnn = conv_model.evaluate(window_cnn.test)[0]

# Fully Connected NN

In [None]:
window_nn = WindowGenerator(input_width=FORECAST_WINDOW, label_width=FORECAST_WINDOW, shift=FORECAST_SHIFT,
                             train_data=train_data, val_data=val_data, test_data=test_data, label_columns=TARGET_LABELS)
window_nn

In [None]:
for example_inputs, example_labels in window_nn.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')

In [None]:
def create_nn_model(input_shape=(FORECAST_WINDOW, 8)):
    inputs = kl.Input(shape=input_shape)

    dense1 = kl.Dense(units=256)(inputs)
    dense1 = kl.PReLU()(dense1)
    dense1 = kl.Dropout(rate=0.3)(dense1)

    dense2 = kl.Dense(units=64)(dense1)
    dense2 = kl.PReLU()(dense2)
    dense2 = kl.Dropout(rate=0.2)(dense2)

    dense3 = kl.Dense(units=16)(dense2)
    dense3 = kl.PReLU()(dense3)
    dense3 = kl.Dropout(rate=0.2)(dense3)

    outputs = kl.Dense(units=len(TARGET_LABELS))(dense3)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.MeanAbsoluteError()])

    return model

In [None]:
nn_model = create_nn_model()
nn_model.summary()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=2)
history3 = nn_model.fit(window_nn.train, epochs=10, validation_data=window_nn.val, callbacks=[reduce_lr])

In [None]:
plot_history(history3.history)

In [None]:
window_nn.plot(nn_model, plot_col=TARGET_LABELS[2])

In [None]:
val_MSE_nn = nn_model.evaluate(window_nn.val)[0]
test_MSE_nn = nn_model.evaluate(window_nn.test)[0]

In [None]:
val_MSE_nn

# Comparaison Between all Models

In [None]:

labels = ['CNN', 'Neural Network']
performances_val = [val_MSE_cnn, val_MSE_nn]
performaces_test = [test_MSE_cnn, test_MSE_nn]

x = np.arange(len(performances_val))
width = 0.3

plt.ylabel('Mean Square Error')
plt.bar(x - 0.17, performances_val, width, label='Validation', color="purple")
plt.bar(x + 0.17, performaces_test, width, label='Test', color="red")
plt.xticks(ticks=x, labels=labels,
           rotation=20)
_ = plt.legend()
plt.show()

# Save Models

In [None]:
# Tạo thư mục nếu chưa có
import os
os.makedirs("models", exist_ok=True)

# Lưu mô hình CNN
cnn_path = "models/CNN_model.h5"
conv_model.save(cnn_path)
print("Saved CNN model to:", cnn_path)

# Lưu mô hình Neural Network
nn_path = "models/NN_model.h5"
nn_model.save(nn_path)
print("Saved NN model to:", nn_path)
