In [None]:
import torch

def available_gpus():
    gpus = torch.cuda.device_count()
    return [torch.cuda.get_device_name(i) for i in range(gpus)]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("GPUs disponibles:", available_gpus())

## Libraries

In [2]:
import os
from datetime import datetime
from libraries.utils import read_csv
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from dateutil.relativedelta import relativedelta
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
import matplotlib.pyplot as plt

from esa_libraries.ESAScores import ESAScores
from metrics_libraries.basic_methods import precision_corrected_score, recall_score, f05_score

from models_architectures.AutoEnconderFullWindow import AutoEnconderFullWindow
from models_architectures.AutoEnconderLastEvent import AutoEnconderLastEvent
from models_architectures.VariationalAutoencoderFullWindow import VariationalAutoencoderFullWindow
from models_architectures.VariationalAutoencoderLastEvent import VariationalAutoencoderLastEvent
from libraries.sequence_generators import sequence_generator, sequence_generator_last_event

## Parameters

In [None]:
MISSION = 2

WINDOW_SIZE = 50
PERCENTILE = 99
BATCH_SIZE = 256
EPOCHS = 25
LEARNING_RATE = 0.0001

CHANNELS = ["allchannels", "subset", "target"][2]

START_DATE = pd.to_datetime("2000-01-01")
END_DATE = pd.to_datetime("2014-01-01") if MISSION == 1 else pd.to_datetime("2003-07-01")

MONTHS_TO_TRAIN = 6
MONTHS_TO_TEST = 1
MODEL_TYPE = ["AutoEncoderFullWindow",
              "AutoEncoderLastEvent",
              "VariationalAutoencoderFullWindow",
              "VariationalAutoencoderLastEvent"][3]

CHANNELS_INFO_PATH = f"../data/Mission{MISSION}-ESA/channels.csv"
ESA_ANOMALIES_PATH = f"../esa-anomalies/anomalies_mission{MISSION}.csv"

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace(" ", "_").replace(":", "-")
EXPERIMENT_NAME = f"Mission{MISSION}_{CHANNELS}_{MODEL_TYPE}_{START_DATE.strftime('%Y-%m-%d')}_{END_DATE.strftime('%Y-%m-%d')}_T{MONTHS_TO_TRAIN}_V{MONTHS_TO_TEST}_window{WINDOW_SIZE}_percentile{PERCENTILE}_epochs{EPOCHS}_lr{LEARNING_RATE}__{timestamp}"
RESULT_PATH = f"../metrics/multiphases_training/{EXPERIMENT_NAME}.csv"
EXPERIMENT_NAME

In [4]:
first_channel_number = 41 if MISSION == 1 else 18  # Only if CHANNELS == "subset" 
last_channel_number = 46 if MISSION == 1 else 28  # Only if CHANNELS == "subset"

if CHANNELS == "subset":
    input_data_path = f'../data/Mission{MISSION}-Preprocessed/data_preprocessed_channels{first_channel_number}_{last_channel_number}_frequency-previous_2000_{2013 if MISSION == 1 else 2003}.csv'
else:
    input_data_path = f'../data/Mission{MISSION}-Preprocessed/data_preprocessed_{CHANNELS}_frequency-previous_2000_{2013 if MISSION == 1 else 2003}.csv'

In [5]:
if CHANNELS == "target":
    channels_info = pd.read_csv(CHANNELS_INFO_PATH)
    channels_list = list(channels_info[channels_info['Target']=="YES"]['Channel'])
else:
    channels_list = None if CHANNELS == "allchannels" else [f"channel_{i}" for i in range(first_channel_number, last_channel_number+1)]

## Load data

In [None]:
data = read_csv(input_data_path, sep=";")
if channels_list is not None:
    data = data[channels_list]

# Filtrar los datos entre start_date_train y end_date_train
data = data.loc[(data.index >= START_DATE) & (data.index < END_DATE)]
data

In [None]:
esa_anomalies = pd.read_csv(ESA_ANOMALIES_PATH)
esa_anomalies['StartTime'] = pd.to_datetime(esa_anomalies['StartTime'], errors='coerce').dt.tz_localize(None)
esa_anomalies['EndTime'] = pd.to_datetime(esa_anomalies['EndTime'], errors='coerce').dt.tz_localize(None)
esa_anomalies

## Training methods

In [8]:
def sum_months_to_date(date, months):
    return date + relativedelta(months=months)

def calculate_period_months(start_date, end_date):
    difference = relativedelta(end_date, start_date)
    return difference.years * 12 + difference.months

In [9]:
def load_data(data_dict, input_path, channels, start_date_val, end_date_val, sep=';'):
    def _get_years(start_date_val, end_date_val):
        adjusted_end_date = end_date_val - relativedelta(days=1)
        return list(range(start_date_val.year, adjusted_end_date.year + 1))
    
    years = _get_years(start_date_val, end_date_val)
    result_data_dict = {k: v for k, v in data_dict.items() if k in years}
    for year in years:
        if year not in result_data_dict.keys():
            input_full_path = f"{input_path}{year}.csv"
            df = read_csv(input_full_path, sep=sep)
            if channels is not None:
                df = df[channels]
            result_data_dict[year] = df
    
    dfs = []
    for year in years:
        df = result_data_dict[year]
        dfs.append(df[(start_date_val <= df.index) & (df.index <= end_date_val)].copy())
    result_df = pd.concat(dfs, ignore_index=False)  
    return result_df, result_data_dict

In [10]:
import pandas as pd

def format_anomalies(anomalies: pd.DataFrame) -> pd.DataFrame:
    formatted_data = []

    # Iterar sobre cada canal (columna)
    for channel in anomalies.columns:
        channel_data = anomalies[channel]
        is_active = False  # Para rastrear si estamos dentro de una secuencia activa
        start_time = None  # Almacenar el tiempo de inicio de la anomalía

        # Iterar por cada fila en el canal
        for time, value in channel_data.items():
            if value == 1 and not is_active:
                # Detectamos el inicio de una anomalía
                is_active = True
                start_time = time
            elif value == 0 and is_active:
                # Detectamos el final de una anomalía
                is_active = False
                end_time = time
                # Guardar el resultado
                formatted_data.append({"Channel": channel, "StartTime": start_time, "EndTime": end_time})

        # Manejar el caso en que una anomalía sigue activa hasta el final del DataFrame
        if is_active:
            formatted_data.append({"Channel": channel, "StartTime": start_time, "EndTime": channel_data.index[-1]})

    # Convertir los resultados en un nuevo DataFrame
    anomalies_formatted = pd.DataFrame(formatted_data)

    # Ordenar el DataFrame por StartTime
    anomalies_formatted = anomalies_formatted.sort_values(by="StartTime").reset_index(drop=True)

    return anomalies_formatted


In [11]:
def filter_esa_anomalies(esa_anomalies, start_date, end_date):
    # Filter by date
    esa_anomalies_filtered = esa_anomalies[(esa_anomalies["EndTime"] >= start_date) & (esa_anomalies["StartTime"] <= end_date)]
    esa_anomalies_filtered.loc[esa_anomalies_filtered['StartTime'] < start_date, 'StartTime'] = start_date
    esa_anomalies_filtered.loc[esa_anomalies_filtered['EndTime'] > end_date, 'EndTime'] = end_date
    esa_anomalies_filtered.reset_index(drop=True, inplace=True)
    return esa_anomalies_filtered

In [12]:
def get_anomalies_list(anomalies_df):
    # Genera una lista donde sus elementos son listas de dos elementos con el timestampt y 0 si todos los valores de la fila es 0 y 1 si alguno es 1
    anomalies_list = []
    for index, row in anomalies_df.iterrows():
        if row.any():
            anomalies_list.append([index, 1])
        else:
            anomalies_list.append([index, 0])
    return anomalies_list

## Models training methods

In [13]:
def train_autoencoder(train_generator, val_generator, model, criterion, optimizer, epochs, steps_per_epoch_train, steps_per_epoch_val, device):
    train_loss_history = []
    val_loss_history = []

    model.train()
    # for epoch in range(epochs):
    for epoch in tqdm(range(epochs), desc="Training model"):
        total_train_loss = 0  # Initialize total training loss for the epoch

        # Training loop
        for step in range(steps_per_epoch_train):
            inputs, targets = next(train_generator)
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()  # Accumulate training loss

        avg_train_loss = total_train_loss / steps_per_epoch_train  # Calculate average training loss
        train_loss_history.append(avg_train_loss)

        # Validation loop (if val_generator is provided)
        if val_generator is not None:
            model.eval()  # Set the model to evaluation mode
            total_val_loss = 0

            with torch.no_grad():
                for step in range(steps_per_epoch_val):
                    val_inputs, val_targets = next(val_generator)
                    val_inputs, val_targets = inputs.to(device), targets.to(device)

                    val_outputs = model(val_inputs)
                    val_loss = criterion(val_outputs, val_targets)

                    total_val_loss += val_loss.item()

            avg_val_loss = total_val_loss / steps_per_epoch_val  # Calculate average validation loss
            val_loss_history.append(avg_val_loss)
            # print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
        else:
            # print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}")
            pass

    return train_loss_history, val_loss_history

In [14]:
def train_vae(train_generator, val_generator, model, criterion, optimizer, epochs, steps_per_epoch_train, steps_per_epoch_val, device):
    train_loss_history = []
    val_loss_history = []

    model.train()
    # for epoch in range(epochs):
    for epoch in tqdm(range(epochs), desc="Training model"):
        total_train_loss = 0  # Initialize total training loss for the epoch

        # Training loop
        for step in range(steps_per_epoch_train):
            inputs, targets = next(train_generator)
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            reconstructed, mu, logvar = model(inputs)
            loss = criterion(reconstructed, targets, mu, logvar)

            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()  # Accumulate training loss

        avg_train_loss = total_train_loss / steps_per_epoch_train  # Calculate average training loss
        train_loss_history.append(avg_train_loss)

        # Validation loop (if val_generator is provided)
        if val_generator is not None:
            model.eval()  # Set the model to evaluation mode
            total_val_loss = 0

            with torch.no_grad():
                for step in range(steps_per_epoch_val):
                    val_inputs, val_targets = next(val_generator)
                    val_inputs, val_targets = inputs.to(device), targets.to(device)

                    val_reconstructed, val_mu, val_logvar = model(val_inputs)
                    val_loss = criterion(val_reconstructed, val_targets, val_mu, val_logvar)

                    total_val_loss += val_loss.item()

            avg_val_loss = total_val_loss / steps_per_epoch_val  # Calculate average validation loss
            val_loss_history.append(avg_val_loss)
            # print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
        else:
            # print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}")
            pass

    return train_loss_history, val_loss_history

In [15]:
def vae_loss_function(reconstructed, target, mu, logvar):
    # Error de reconstrucción (MSE o BCE según el caso)
    reconstruction_loss = nn.MSELoss()(reconstructed, target)

    # KL-divergence
    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    # Loss total
    return reconstruction_loss + kl_loss

In [16]:
def train_model(model_type, train_df_normalized, epochs, window_size, batch_size, learning_rate, device):
    steps_per_epoch_train = (len(train_df_normalized) - window_size) // batch_size
    if model_type == "AutoEncoderFullWindow" or model_type == "AutoEncoderLastEvent":
        if model_type == "AutoEncoderFullWindow":
            model = AutoEnconderFullWindow(window_size, train_df_normalized.shape[1], latent_dim=8).to(device)
            train_gen = sequence_generator(train_df_normalized.values, window_size, batch_size)
        else:
            model = AutoEnconderLastEvent(window_size, train_df_normalized.shape[1], latent_dim=8).to(device)
            train_gen = sequence_generator_last_event(train_df_normalized.values, window_size, batch_size)

        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        _, _ = train_autoencoder(train_gen, None, model, criterion, optimizer, epochs=epochs,
                  steps_per_epoch_train=steps_per_epoch_train, steps_per_epoch_val=None, device=device)
        return model
    
    else:
        if model_type == "VariationalAutoencoderFullWindow":
            model = VariationalAutoencoderFullWindow(window_size, train_df_normalized.shape[1], latent_dim=8).to(device)
            train_gen = sequence_generator(train_df_normalized.values, window_size, batch_size)
        else:
            model = VariationalAutoencoderLastEvent(window_size, train_df_normalized.shape[1], latent_dim=8).to(device)
            train_gen = sequence_generator_last_event(train_df_normalized.values, window_size, batch_size)
        criterion = vae_loss_function
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        _, _ = train_vae(train_gen, None, model, criterion, optimizer, epochs=epochs,
                  steps_per_epoch_train=steps_per_epoch_train, steps_per_epoch_val=None, device=device)
        return model

## Threshold methods

In [17]:
def anomaly_limits(errors, percentile, axis) -> float:
    P1 = np.percentile(errors, 100-percentile, axis)
    P2 = np.percentile(errors, percentile, axis)
    IPR = P2 - P1
    return list(P2 + 1.5 * IPR)

In [18]:
def calculate_channels_thresholds_autoencoder_full(percentile, autoencoder, df_normalized, window_size, batch_size, device):
    steps_per_epoch = (len(df_normalized) - window_size) // batch_size

    # Umbral basado en el percentil 95 del error
    train_gen = sequence_generator(df_normalized.values, window_size, batch_size)
    reconstruction_errors = []

    # Barra de progreso para el cálculo de reconstruction_errors
    for _ in tqdm(range(steps_per_epoch), desc="Calculando errores de reconstrucción"):
        batch, _ = next(train_gen)  # Ignorar las etiquetas, usar solo las entradas
        reconstructed_batch = autoencoder(batch.to(device)).detach().cpu().numpy()
        batch = batch.cpu().numpy()
        reconstruction_errors.extend(
            # np.mean(np.square(batch - reconstructed_batch), axis=(1, 2))
            np.square(batch - reconstructed_batch)
        )

    return anomaly_limits(reconstruction_errors, percentile, axis=(0, 1))

In [19]:
def calculate_channels_thresholds_autoencoder_last(percentile, autoencoder, df_normalized, window_size, batch_size, device):
    steps_per_epoch = (len(df_normalized) - window_size) // batch_size

    # Umbral basado en el percentil 95 del error
    train_gen = sequence_generator_last_event(df_normalized.values, window_size, batch_size)
    reconstruction_errors = []

    # Barra de progreso para el cálculo de reconstruction_errors
    for _ in tqdm(range(steps_per_epoch), desc="Calculando errores de reconstrucción"):
        batch_inputs, batch_targets = next(train_gen)
        batch_inputs = batch_inputs.to(device)

        reconstructed_batch = autoencoder(batch_inputs).detach().cpu().numpy()
        batch_targets = batch_targets.cpu().numpy()

        reconstruction_errors.extend(
            # np.mean(np.square(batch - reconstructed_batch), axis=(1, 2))
            np.square(batch_targets - reconstructed_batch)
        )
    return anomaly_limits(reconstruction_errors, percentile, axis=(0))

In [20]:
def calculate_channels_thresholds_vae_full(percentile, autoencoder, df_normalized, window_size, batch_size, device):
    steps_per_epoch = (len(df_normalized) - window_size) // batch_size

    # Umbral basado en el percentil 95 del error
    train_gen = sequence_generator(df_normalized.values, window_size, batch_size)
    reconstruction_errors = []

    # Barra de progreso para el cálculo de reconstruction_errors
    for _ in tqdm(range(steps_per_epoch), desc="Calculando errores de reconstrucción"):
        batch, _ = next(train_gen)  # Ignorar las etiquetas, usar solo las entradas
        reconstructed_batch, _, _ = autoencoder(batch.to(device))
        reconstructed_batch = reconstructed_batch.detach().cpu().numpy()
        batch = batch.cpu().numpy()
        reconstruction_errors.extend(
            # np.mean(np.square(batch - reconstructed_batch), axis=(1, 2))
            np.square(batch - reconstructed_batch)
        )

    return anomaly_limits(reconstruction_errors, percentile, axis=(0, 1))

In [21]:
def calculate_channels_thresholds_vae_last(percentile, autoencoder, df_normalized, window_size, batch_size, device):
    steps_per_epoch = (len(df_normalized) - window_size) // batch_size

    # Umbral basado en el percentil 95 del error
    train_gen = sequence_generator_last_event(df_normalized.values, window_size, batch_size)
    reconstruction_errors = []

    # Barra de progreso para el cálculo de reconstruction_errors
    for _ in tqdm(range(steps_per_epoch), desc="Calculando errores de reconstrucción"):
        batch_inputs, batch_targets = next(train_gen)
        batch_inputs = batch_inputs.to(device)

        reconstructed_batch, _, _ = autoencoder(batch_inputs)
        reconstructed_batch = reconstructed_batch.detach().cpu().numpy()

        reconstruction_errors.extend(
            # np.mean(np.square(batch - reconstructed_batch), axis=(1, 2))
            np.square(batch_targets - reconstructed_batch)
        )

    return anomaly_limits(reconstruction_errors, percentile, axis=(0))

In [22]:
def calculate_channels_thresholds(model_type, percentile, autoencoder, df_normalized, window_size, batch_size, device):
    if model_type == "AutoEncoderFullWindow":
        return calculate_channels_thresholds_autoencoder_full(percentile, autoencoder, df_normalized, window_size, batch_size, device)
    elif model_type == "AutoEncoderLastEvent":
        return calculate_channels_thresholds_autoencoder_last(percentile, autoencoder, df_normalized, window_size, batch_size, device)
    elif model_type == "VariationalAutoencoderFullWindow":
        return calculate_channels_thresholds_vae_full(percentile, autoencoder, df_normalized, window_size, batch_size, device)
    else:
        return calculate_channels_thresholds_vae_last(percentile, autoencoder, df_normalized, window_size, batch_size, device)

## Training

In [23]:
if os.path.exists(RESULT_PATH):
    os.remove(RESULT_PATH)

results = pd.DataFrame(columns=['Precision', 'Recall', 'F0.5', 'tp', 'fp', 'fn', 'tnt', 'nt', 'tnrt', 
                                'Training start', 'Training end', 'Test start', 'Test end',
                                '#ESA Anomalies train', '#Predicted Anomalies train',
                                '#ESA Anomalies test', '#Predicted Anomalies test'])

In [24]:
# if os.path.exists(RESULT_PATH):
#     results = pd.read_csv(RESULT_PATH)
#     START_DATE = pd.to_datetime("2001-05-01")

In [None]:
num_periods = calculate_period_months(START_DATE, END_DATE) - \
    MONTHS_TO_TRAIN - MONTHS_TO_TEST + 1

for period in range(num_periods):
    start_date_train = sum_months_to_date(START_DATE, period)
    end_date_train = sum_months_to_date(start_date_train, MONTHS_TO_TRAIN)
    start_date_test = end_date_train
    end_date_test = sum_months_to_date(start_date_test, MONTHS_TO_TEST)
    print(f"PERIOD {period+1}/{num_periods} - Start train: {start_date_train.strftime('%Y-%m-%d')} - Start val: {start_date_test.strftime('%Y-%m-%d')} - End val: {end_date_test.strftime('%Y-%m-%d')}")
    row = {'Training start': start_date_train,
           'Training end': end_date_train,
           'Test start': start_date_test, 
           'Test end': end_date_test}


    ### ********** TRAINING ********** ###
    print("* Preprocessing train data")
    scaler = MinMaxScaler()
    train_data = data[(start_date_train <= data.index) & (data.index <= end_date_train)]
    train_data_normalized = scaler.fit_transform(train_data)
    train_df_normalized = pd.DataFrame(train_data_normalized, index=train_data.index, columns=train_data.columns)

    print("* Training model")
    model = train_model(MODEL_TYPE, train_df_normalized, EPOCHS, WINDOW_SIZE, BATCH_SIZE, LEARNING_RATE, device)

    print("* Calculating threshold")
    threshold_list = calculate_channels_thresholds(MODEL_TYPE,
                                                   PERCENTILE,
                                                   model,
                                                   train_df_normalized,
                                                   WINDOW_SIZE,
                                                   BATCH_SIZE,
                                                   device)
    ### ****************************** ###


    ### ****** TRAINING METRICS ****** ###
    print("* Calculating predicted training number of anomalies")
    train_prediction = model.predict(threshold_list, train_df_normalized, WINDOW_SIZE, BATCH_SIZE, device)
    train_anomalies = format_anomalies(train_prediction)
    row['#Predicted Anomalies train'] = len(train_anomalies)

    print("* Calculating ESA training number of anomalies")
    esa_anomalies_train = filter_esa_anomalies(esa_anomalies, start_date_train, end_date_train)
    row['#ESA Anomalies train'] = len(esa_anomalies_train)
    ### ****************************** ###


    ### ********* INFERENCE ********** ###
    print("* Preprocessing test data")
    test_data = data[(start_date_test <= data.index) & (data.index <= end_date_test)]
    test_data_normalized = scaler.transform(test_data)
    test_df_normalized = pd.DataFrame(test_data_normalized, index=test_data.index, columns=test_data.columns)
    
    print("* Inference")
    test_prediction = model.predict(threshold_list, test_df_normalized, WINDOW_SIZE, BATCH_SIZE, device)
    test_anomalies = format_anomalies(test_prediction)
    row['#Predicted Anomalies test'] = len(test_anomalies)

    print("* Calculating ESA test number of anomalies")
    esa_anomalies_test = filter_esa_anomalies(esa_anomalies, start_date_test, end_date_test)
    row['#ESA Anomalies test'] = len(esa_anomalies_test)
    ### ****************************** ###


    ### ********** METRICS *********** ###
    print("* Calculating metrics")
    scores_calculator = ESAScores(betas=0.5, full_range=(start_date_test, end_date_test)) 
    anomalies_list = get_anomalies_list(test_prediction)
    scores_metrics = scores_calculator.score(esa_anomalies_test, anomalies_list)
    row['Precision'], row['Recall'], row['F0.5'] = scores_metrics["EW_precision"], scores_metrics["EW_recall"], scores_metrics["EW_F_0.50"]
    row['tp'], row['fp'], row['fn'] = scores_metrics["tp"], scores_metrics["fp"], scores_metrics["fn"]
    row['tnt'], row['nt'], row['tnrt'] = scores_metrics["tnt"], scores_metrics["nt"], scores_metrics["tnrt"]
    ### ****************************** ###


    ### ******** SAVE RESULTS ******** ###
    results = pd.concat([results, pd.DataFrame([row])], ignore_index=True) if len(results) > 0 else pd.DataFrame([row])
    # if period % 5 == 0 or period == num_periods - 1:
    if period % 1 == 0 or period == num_periods - 1:
        print("* Saving result:", RESULT_PATH)
        results.to_csv(RESULT_PATH, index=False)
    print()
    ### ****************************** ###

results

## Calculate final metrics

In [26]:
tp = results['tp'].sum()
fp = results['fp'].sum()
fn = results['fn'].sum()
tnt = results['tnt'].sum()
nt = results['nt'].sum()
tnrt = tnt / nt

In [None]:
precision = precision_corrected_score(tp, fp, tnrt)
recall = recall_score(tp, fn)
f05 = f05_score(precision, recall)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F0.5: {f05:.4f}")

## Plot metrics

In [None]:
# Crear el plot
plt.figure(figsize=(10, 6))
plt.plot(results["Test start"], results["Precision"], label="Precision", marker="o", color="blue")
plt.plot(results["Test start"], results["Recall"], label="Recall", marker="s", color="green")
plt.plot(results["Test start"], results["F0.5"], label="F0.5", marker="^", color="orange")

# Configurar el plot
plt.title("Evolución de Precision, Recall y F0.5 a lo largo de los tests")
plt.xlabel("Fecha de Test")
plt.ylabel("Valor")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.xticks(rotation=45)
plt.tight_layout()

# Mostrar el plot
graphic_save_path = f"../graphics/multiphases_training/ALL_{EXPERIMENT_NAME}.jpg"
plt.savefig(graphic_save_path, dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Crear un subplot con 3 gráficos diferentes dentro del mismo plot
fig, axes = plt.subplots(3, 1, figsize=(10, 12), sharex=True)

# Precision
axes[0].plot(results["Test start"], results["Precision"], marker="o", color="blue", label="Precision")
axes[0].set_title("Precision")
axes[0].set_ylabel("Valor")
axes[0].grid(True, linestyle="--", alpha=0.6)
axes[0].legend()

# Recall
axes[1].plot(results["Test start"], results["Recall"], marker="s", color="green", label="Recall")
axes[1].set_title("Recall")
axes[1].set_ylabel("Valor")
axes[1].grid(True, linestyle="--", alpha=0.6)
axes[1].legend()

# F0.5
axes[2].plot(results["Test start"], results["F0.5"], marker="^", color="orange", label="F0.5")
axes[2].set_title("F0.5")
axes[2].set_ylabel("Valor")
axes[2].set_xlabel("Fecha de Test")
axes[2].grid(True, linestyle="--", alpha=0.6)
axes[2].legend()

# Ajustar el diseño del subplot
plt.tight_layout()
graphic_save_path = f"../graphics/multiphases_training/SEPARATED_{EXPERIMENT_NAME}.jpg"
plt.savefig(graphic_save_path, dpi=300, bbox_inches='tight')
plt.show()

## Store metrics

In [None]:
import wandb
import shutil
import os

wandb_dir = os.path.join(os.getcwd(), "wandb")
try:
    if os.path.exists(wandb_dir):
        shutil.rmtree(wandb_dir)
        print("Local wandb folder has been removed.")
    else:
        print("Local wandb folder not found.")
except Exception as e:
    print(e)
wandb.login()

In [None]:
run_name = EXPERIMENT_NAME.replace("VariationalAutoencoderFullWindow", "VAEFull").replace("VariationalAutoencoderLastEvent", "VAELast").replace("AutoEncoderFullWindow", "AEFull").replace("AutoEncoderLastEvent", "AELast")
run_name

In [None]:
run = wandb.init(
    # set the wandb project where this run will be logged
    project="MET-ESA",

    id=run_name,
    name=run_name,

    # track hyperparameters and run metadata
    config={
        "experiment_type": "multiphases_training",
        "mission": MISSION,
        "model_type": MODEL_TYPE,
        "timestamp": timestamp,
        "channels": CHANNELS,
        "start_date": START_DATE.strftime('%Y-%m-%d'),
        "end_date": END_DATE.strftime('%Y-%m-%d'),
        "months_to_train": MONTHS_TO_TRAIN,
        "months_to_test": MONTHS_TO_TEST,
        "window_size": WINDOW_SIZE,
        "percentile": PERCENTILE,
        "epochs": EPOCHS,
        "learning_rate": LEARNING_RATE,
    },
    allow_val_change=True,
    reinit=True,

    resume="allow",
    settings=wandb.Settings(init_timeout=300)
)

In [33]:
for i, row in results.iterrows():
    wandb.log(row.to_dict(), step=i)

In [34]:
table = wandb.Table(columns= ["model_name", "precision", "recall", "f0.5"])
table.add_data(EXPERIMENT_NAME, precision, recall, f05)
wandb.log({"metrics": table})

wandb.log({"general_precision": precision, "general_recall": recall, "general_f0.5": f05})

In [None]:
wandb.finish();