In [None]:
import sys
import json

from data.loader import DataLoader
from data import preprocessor
import transformer_full
from libs import *

In [None]:
tf.__version__

In [None]:
# Sets all random seeds for the program (Python, NumPy, and TensorFlow)
keras.utils.set_random_seed(117)
tf.config.experimental.enable_op_determinism()

In [None]:
wind_turbine = input("Enter the wind turbine name: ")

In [None]:
model_name = f"FT_{wind_turbine}"
project_folder = ".."
samples_per_day = 144
MA_window = samples_per_day * 1
MA_window_FPI = samples_per_day * 7

In [None]:
# Read the wind turbine name from the json file
with open('../wind_turbines.json', 'r') as f:
    wind_turbines_names = json.load(f)

WT_name = wind_turbines_names[wind_turbine]

In [None]:
features_list = [
    wind_turbine + "_Gear Bearing Temp. Avg.",
    wind_turbine + "_Gear Bearing Temp.B Avg.",
    wind_turbine + "_Gear Bearing Temp.C Avg.",
    wind_turbine + "_Gear Oil Temp. Avg.",
    wind_turbine + "_Gear Oil Temp.Inlet Avg.",
    #wind_turbine + "_Rotor RPM Max.",
    wind_turbine + "_Rotor RPM Avg."
]

In [None]:
columns = [
    'gear_bearing_temp',
    'gear_bearing_temp_b',
    'gear_bearing_temp_c',
    'gear_oil_temp',
    'gear_oil_temp_inlet',
    #'rotor_rpm_max',
    'rotor_rpm_avg'
]

In [None]:
# Load the data
data_loader = DataLoader(
    wind_turbine=wind_turbine,
    years=[2019, 2020],
    features_list=features_list,
    column_names=columns
)
data = data_loader.load_from_pickle(path=project_folder)

# Data pre-processing

In [None]:
data = preprocessor.clean_data(data)
data = preprocessor.smooth_data(data, rolling_window=samples_per_day*3)
data = preprocessor.remove_outliers(data, threshold=5)
data = preprocessor.feature_selection(data)
data, scaler = preprocessor.normalize_data(data)

In [None]:
data.head()

## Train-test Split

In [None]:
def split_sequences(sequences, n_steps):
    # Convert DataFrame to NumPy array excluding the date column
    data = sequences.iloc[:, 1:].to_numpy()

    # Calculate the number of samples
    n_samples = len(sequences) - n_steps

    # Initialize arrays for X and y
    X = np.zeros((n_samples, n_steps, data.shape[1]))
    y = np.zeros(n_samples)

    # Create a view into the data array for sequences
    for i in range(n_samples):
        # Get the sequence
        X[i] = data[i:i+n_steps]
        # Get the target value
        y[i] = data[i+n_steps, 1]

    return X, y


In [None]:
n_steps = samples_per_day
x_train, y_train = split_sequences(data, n_steps)
print(x_train.shape, y_train.shape)

# Model definition and training

In [None]:
def get_compiled_model(x_train, y_train):
    input_shape = x_train.shape[1:]

    model = transformer_full.build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[100],
    mlp_dropout=0.3,
    dropout=0.2,
)

    model.compile(
        loss="mean_squared_error",
        optimizer = keras.optimizers.Adam(learning_rate=0.0003),
    )

    return model

In [None]:
model = get_compiled_model(x_train, y_train)

callbacks = [keras.callbacks.ModelCheckpoint(f"{project_folder}/Models/{model_name}_ckpt.keras", save_best_only=True, monitor="val_loss"),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=30, verbose=1),]

history = model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    #validation_data=(x_val, y_val),
    shuffle=False,
    epochs=50,
    batch_size=64,
    callbacks=callbacks,
)

In [None]:
model.save(f'{project_folder}/Models/{model_name}.keras')
history_df = pd.DataFrame(history.history)
history_df.to_csv(f'{project_folder}/Training_history/{model_name}_{wind_turbine}_history.csv', index=False)

loss = history.history['loss'][1:]
val_loss = history.history['val_loss'][1:]

In [None]:
sns.set_theme(palette="ch:s=.25,rot=-.25")
fig,ax = plt.subplots(figsize=(8,8))
sns.lineplot(data=loss, ax = ax, color="b", label='Training Loss')
sns.lineplot(data=val_loss, ax = ax, color="r", label='Validation Loss')
ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")

# save plot as image
plt.savefig(f'{project_folder}/Training_plots/pre_training/{model_name}_{wind_turbine}_loss.png')

In [None]:
history_df.to_csv(f'{project_folder}/Training_history/{model_name}_history.csv', index=False)

# Model evaluation

In [None]:
model = keras.models.load_model(f'{project_folder}/Models_trans/{model_name}.keras')

In [None]:
y_train_pred = model.predict(x_train)

In [None]:
date_train = data.date
date_train = date_train[samples_per_day:]

In [None]:
y_train = y_train.reshape(-1)
y_train_pred = y_train_pred.reshape(-1)

In [None]:
date_train.shape

In [None]:
train_results = pd.DataFrame({'date':date_train[2:],
                              'Real':y_train[:-2],
                              'Predicted': y_train_pred[2:]})
train_results

In [None]:
biased_mean = np.mean(train_results['Real'] - train_results['Predicted'])
print("Biased mean: ", biased_mean)

train_results['Predicted'] += biased_mean

In [None]:
train_results['Residual'] = abs(train_results.Real - train_results.Predicted)

In [None]:
sns.set_theme(palette="ch:s=.25,rot=-.25")
fig,ax = plt.subplots(figsize=(16,8))
sns.lineplot(data=train_results, x='date', y='Real', ax = ax, color="b", label='Real')
sns.lineplot(data=train_results, x='date', y='Predicted', ax = ax, color="r", label='Predicted')

In [None]:
window = samples_per_day * 7   #Averaged per week
#averaged_error = train_results.Residual.rolling(window = window).mean()[window-1:]
averaged_error = train_results.Residual.ewm(span=window).mean()

In [None]:
mean = np.mean(averaged_error)
std = np.std(averaged_error)

In [None]:
threshold_k3 = mean+3.*std
threshold_k6 = mean+6.*std
threshold_k7 = mean+7.*std

In [None]:
sns.set_theme(palette="ch:s=.25,rot=-.25")
fig,ax = plt.subplots(figsize=(16,8))
sns.lineplot(x=train_results.date[window-1:], y=averaged_error, ax = ax, color="b", label='Error')

plt.axhline(y = threshold_k3, color = 'green', linestyle = '--', linewidth=2, label='mu + 3 * sigma')
plt.axhline(y = threshold_k6, color = 'fuchsia', linestyle = '--', linewidth=2, label='mu + 6 * sigma')
plt.axhline(y = threshold_k7, color = 'red', linestyle = '--', linewidth=2, label='mu + 7 * sigma')

plt.legend(frameon=False)
#plt.savefig(f'{project_folder}/Training_plots/pre_training/{model_name}_{wind_turbine}_validation.png')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.ticker as mticker

# ====== Ajusta estos valores según tu caso ======
training_split = 0.8
alarm_mode = "dynamic"        # "dynamic" | "constant" | "none"
fontsize = 22                 # ticks / título
legend_fontsize = 18          # <<< SOLO leyenda (ajusta 12–16)
figsize = (12, 8)

colors = {
    'train': 'gray',
    'validation': 'orange',
    'test': 'blue',
    'threshold': 'green',
    'alarm': 'red'
}

# Deben existir en tu notebook:
# train_results, test_result, alarm_df, threshold, WT_name, selected_kappa

# ====== Asegurar que date sea datetime ======
train_results = train_results.copy()
test_result = test_result.copy()
alarm_df = alarm_df.copy()

train_results["date"] = pd.to_datetime(train_results["date"])
test_result["date"] = pd.to_datetime(test_result["date"])
alarm_df["date"] = pd.to_datetime(alarm_df["date"])

# ====== Split train/val ======
train_size = int(train_results.shape[0] * training_split)

# ====== Figura y eje 1 ======
fig, ax1 = plt.subplots(figsize=figsize)

sns.lineplot(
    data=train_results.iloc[:train_size], x="date", y="Residual",
    ax=ax1, color=colors["train"], linestyle='solid', linewidth=1.6,
    label="Fine-tuning (train) residuals"
)

sns.lineplot(
    data=train_results.iloc[train_size:], x="date", y="Residual",
    ax=ax1, color=colors["validation"], linestyle="solid", linewidth=1.8,
    label="Fine-tuning (val) residuals"
)

sns.lineplot(
    data=test_result, x="date", y="Residual",
    ax=ax1, color=colors["test"], linestyle="solid", linewidth=1.6,
    label="Test residuals"
)

ax1.axhline(
    y=threshold,
    color=colors["threshold"],
    linestyle=":",
    linewidth=2.2,
    label=f"Threshold κ={selected_kappa}"
)

# ====== Líneas verticales con leyenda (solo para WT8) ======
if WT_name == "WT8":
    ax1.axvline(
        x=pd.to_datetime("2021-10-18"),
        color="pink", linestyle="--", linewidth=2,
        label="October 18, 2021:\nFPI activation"     # <<< salto de línea
    )
    ax1.axvline(
        x=pd.to_datetime("2022-02-23"),
        color="brown", linestyle="--", linewidth=2,
        label="February 23, 2022:\nConfirmed fault"   # <<< salto de línea
    )
    #ax1.axvline(x=pd.to_datetime("2021-08-10"), color="black", linestyle="--", linewidth=2)

# ====== Eje 2 (derecho) ======
ax2 = ax1.twinx()

if alarm_mode == "constant":
    ax2.axhline(y=0, color=colors["alarm"], linewidth=2.2, label="FPI")
elif alarm_mode == "dynamic":
    sns.lineplot(
        data=alarm_df, x="date", y="Alarm",
        ax=ax2, color=colors["alarm"], linestyle="dashdot", linewidth=2.2,
        label="FPI"
    )

# ====== Leyendas combinadas ======
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    ncol=2,
    frameon=True,
    framealpha=0.9,
    fontsize=legend_fontsize,
    loc="upper left",
    bbox_to_anchor=(0.01, 0.99),  # <<< mueve a la derecha (prueba 0.10–0.20)
    borderaxespad=0.1,
    columnspacing=1.0,
    handlelength=2.5,
    handletextpad=0.6
)

if ax2.get_legend():
    ax2.get_legend().remove()

# ====== Estética ======
ax1.set_xlabel("")
ax1.set_ylabel("")
ax2.set_ylabel("")
ax1.tick_params(axis="x", labelsize=fontsize)
ax1.tick_params(axis="y", labelsize=fontsize)
ax2.tick_params(axis="y", labelsize=fontsize)
ax1.set_title(WT_name, fontsize=fontsize)

# ====== Eje Y izquierdo ======
ax1_y_range = np.arange(0.0, 0.0401, 0.008)
ax1.set_ylim(0, 0.04)
ax1.set_yticks(ax1_y_range)

# ====== Eje derecho (ajusta si lo necesitas) ======
ax2.set_ylim(-5, 6)
ax2.set_yticks(np.arange(-5, 7, 3))

# ====== X ticks: Jan, May, Sep ======
ax1.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1, 5, 9], bymonthday=1))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
ax1.xaxis.set_minor_locator(mticker.NullLocator())
ax1.margins(x=0)

# ====== Layout (IMPORTANTE: solo una vez) ======
fig.subplots_adjust(top=0.78)   # <<< deja espacio arriba para la leyenda
plt.show()