# Libs

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
from utils.futurai_ppd import drop_transitorio_desligado
from utils.futurai_utils import select_training_period

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed

# Silenciar logs menos importantes do TensorFlow
tf.get_logger().setLevel('ERROR')

import warnings
warnings.filterwarnings('ignore')

# Import Data

In [None]:
timestamp = 'Timestamp'

df_dataset = pd.read_csv('data/Depurador 762-28-006 - Cozimento.csv', sep=';', decimal='.', encoding='utf-8-sig')
df_dataset.drop(columns=["762P0013.OP", "762F0040.OP", "762F0014.SP", "762H0336.PV", "762H0342.PV", "762N0015.SP", "762P0013.SP", "762-34-073.CR", "762N0015.OP"], inplace=True, errors='ignore')
df_dataset.dropna(inplace=True)
df_dataset[timestamp] = pd.to_datetime(df_dataset[timestamp], format='%Y-%m-%d %H:%M:%S')
df_dataset.head()

# Remove periods off

In [None]:
pre_process = []
pp_var_ref_desligado = "762-28-006.CR"
pp_valor_ref_desligado = 5
pp_tempo_ref_desligado = 0
pp_pre_corte_transitorio = 0
pp_pos_corte_transitorio = 0
pre_process.append(  
{
   "after_cut": pp_pos_corte_transitorio,
   "interval_off": pp_tempo_ref_desligado,
   "limit_off": pp_valor_ref_desligado,
   "pre_cut": pp_pre_corte_transitorio,
   "variable_off": pp_var_ref_desligado
  })

for pro in pre_process:
    df_dataset,_,_ = drop_transitorio_desligado(df_dataset,pro["variable_off"],pro["limit_off"],pro["interval_off"],timestamp,pre_corte=pro["pre_cut"],pos_corte=pro["after_cut"])
print(f"Dataset shape: {df_dataset.shape}")
df_dataset.head()

# Select training periods

In [None]:
fig_training_period = select_training_period(df_dataset, timestamp)
fig_training_period.show()

# Split train data

In [None]:
start_date_train = pd.to_datetime('2024-02-25 00:00:00')
end_date_train = pd.to_datetime('2024-03-12 00:00:00')

mask = (df_dataset[timestamp] >= start_date_train) & (df_dataset[timestamp] <= end_date_train)
df_train = df_dataset.loc[mask]

eixo_timestamp_train = df_train[timestamp]
df_train = df_train.drop(columns=[timestamp])
print(f"Training set shape: {df_train.shape}")

# Set test data

In [None]:
df_teste = df_dataset.copy()
eixo_timestamp_teste = df_teste[timestamp]
df_teste = df_teste.drop(columns=[timestamp])
print(f"Test set shape: {df_teste.shape}")

# Create Data Windows

In [None]:
class WindowGenerator(tf.keras.utils.Sequence):
    def __init__(self, data, window_size, batch_size):
        self.data = data
        self.window_size = window_size
        self.batch_size = batch_size

    def __len__(self):
        # Retorna o número de lotes (batches) por época
        return int(np.floor(len(self.data) - self.window_size + 1) / self.batch_size)

    def __getitem__(self, idx):
        # Gera um lote (batch) de dados
        start_idx = idx * self.batch_size
        end_idx = start_idx + self.batch_size

        batch_windows = []
        for i in range(start_idx, end_idx):
            if i + self.window_size <= len(self.data):
                batch_windows.append(self.data[i : i + self.window_size])
        
        batch_windows = np.array(batch_windows)
        # Para autoencoders, o input e o target são os mesmos
        return batch_windows, batch_windows

WINDOW_SIZE = 1440
BATCH_SIZE = 64  # Ajuste conforme a memória da sua GPU/CPU

scaler = StandardScaler()
scaler.fit(df_train)

train_data_scaled = scaler.transform(df_train)
test_data_scaled = scaler.transform(df_teste)

train_generator = WindowGenerator(train_data_scaled, WINDOW_SIZE, BATCH_SIZE)
test_generator = WindowGenerator(test_data_scaled, WINDOW_SIZE, BATCH_SIZE)

# Build autoencoder

In [None]:
def create_lstm_autoencoder(input_shape):
    """Cria e compila um modelo de Autoencoder LSTM."""
    inputs = Input(shape=input_shape)
    encoded = LSTM(128, activation='relu', return_sequences=False)(inputs)
    encoded = RepeatVector(input_shape[0])(encoded)
    decoded = LSTM(128, activation='relu', return_sequences=True)(encoded)
    decoded = TimeDistributed(Dense(input_shape[1]))(decoded)
    autoencoder = Model(inputs, decoded)
    autoencoder.compile(optimizer='adam', loss='mae')
    autoencoder.summary()
    return autoencoder

num_features = train_data_scaled.shape[1]
input_shape = (WINDOW_SIZE, num_features) 
autoencoder = create_lstm_autoencoder(input_shape)

history = autoencoder.fit(
    train_generator,
    epochs=50,
    validation_data=test_generator,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')]
)

predict_train_generator = WindowGenerator(train_data_scaled, WINDOW_SIZE, BATCH_SIZE, shuffle=False)
predict_test_generator = WindowGenerator(test_data_scaled, WINDOW_SIZE, BATCH_SIZE, shuffle=False)

train_pred = autoencoder.predict(predict_train_generator)
test_pred = autoencoder.predict(predict_test_generator)

def get_original_windows(generator):
    windows = []
    for i in range(len(generator)):
        x, _ = generator[i]
        windows.append(x)
    return np.concatenate(windows)

train_originals = get_original_windows(predict_train_generator)
test_originals = get_original_windows(predict_test_generator)

min_len_train = min(len(train_pred), len(train_originals))
min_len_test = min(len(test_pred), len(test_originals))

# Calcule o MAE para cada janela
train_mae_loss = np.mean(np.abs(train_pred[:min_len_train] - train_originals[:min_len_train]), axis=(1, 2))
test_mae_loss = np.mean(np.abs(test_pred[:min_len_test] - test_originals[:min_len_test]), axis=(1, 2))

print(f"\nForma do array de erro de treino: {train_mae_loss.shape}")
print(f"Forma do array de erro de teste: {test_mae_loss.shape}")

# Plot and evaluate

In [None]:
def plot_reconstruction_error(error_df, threshold):
    """Plota o erro de reconstrução e o limiar de anomalia."""
    plt.figure(figsize=(15, 6))
    plt.plot(error_df.index, error_df['error'], label='Erro de Reconstrução')
    plt.axhline(y=threshold, color='r', linestyle='--', label='Limiar de Anomalia')
    plt.title('Erro de Reconstrução ao Longo do Tempo')
    plt.xlabel('Data')
    plt.ylabel('Erro (MAE)')
    plt.legend()
    plt.grid(True)
    plt.show()