In [None]:
import torch

def available_gpus():
    gpus = torch.cuda.device_count()
    return [torch.cuda.get_device_name(i) for i in range(gpus)]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("GPUs disponibles:", available_gpus())

## Libraries

In [2]:
import os
from datetime import datetime
from libraries.utils import read_csv
import pandas as pd
import torch

from metrics_libraries.metrics_calculator import MetricsCalculator

## Parameters

In [3]:
MISSION = 1
PHASE = 4

WINDOW_SIZE = 50
BATCH_SIZE = 256

CHANNELS = ["allchannels", "subset", "target"][2]

MODEL_SAVE_PATH = f"../models/Mission{MISSION}-AutoEnconderFullWindow/Phase4_target_window50_percentile99_epochs25_lr0.0001__2025-01-14_10-43-52.pth"
# MODEL_SAVE_PATH = f"../models/Mission{MISSION}-AutoEnconderLastEvent/Phase5_target_window50_percentile99_epochs25_lr0.0001__2025-01-14_00-02-38.pth"
# MODEL_SAVE_PATH = f"../models/Mission{MISSION}-VariationalAutoencoderFullWindow/Phase5_target_window50_percentile99_epochs25_lr0.0001__2025-01-14_00-02-38.pth"
# MODEL_SAVE_PATH = f"../models/Mission{MISSION}-VariationalAutoencoderLastEvent/Phase1_Channels18-28_window50_percentile99_epochs25_lr0.0001__2025-01-07_17-24-26.pth"

CHANNELS_INFO_PATH = f"../data/Mission{MISSION}-ESA/channels.csv"
ESA_ANOMALIES_PATH = f"../esa-anomalies/anomalies_mission{MISSION}.csv"
METRICS_SAVE_PATH = f"../metrics/metrics.csv"

In [4]:
first_channel_number = 41 if MISSION == 1 else 18  # Only if CHANNELS == "subset" 
last_channel_number = 46 if MISSION == 1 else 28  # Only if CHANNELS == "subset"

if CHANNELS == "subset":
    input_data_path = f'../data/Mission{MISSION}-Preprocessed/data_preprocessed_channels{first_channel_number}_{last_channel_number}_frequency-previous_2000_{2013 if MISSION == 1 else 2003}.csv'
else:
    input_data_path = f'../data/Mission{MISSION}-Preprocessed/data_preprocessed_{CHANNELS}_frequency-previous_2000_{2013 if MISSION == 1 else 2003}.csv'

In [5]:
mission1_phases_dates = {
    "test_start_date": "2007-01-01T00:00:00",
    "test_end_date": "2014-01-01T00:00:00",

    "phase1_start_date_train": "2000-01-01T00:00:00",
    "phase1_end_date_train": "2000-03-11T00:00:00",
    "phase1_start_date_val": "2000-03-11T00:00:00",
    "phase1_end_date_val": "2000-04-01T00:00:00",
    
    "phase2_start_date_train": "2000-01-01T00:00:00",
    "phase2_end_date_train": "2000-09-01T00:00:00",
    "phase2_start_date_val": "2000-09-01T00:00:00",
    "phase2_end_date_val": "2000-11-01T00:00:00",
    
    "phase3_start_date_train": "2000-01-01T00:00:00",
    "phase3_end_date_train": "2001-07-01T00:00:00",
    "phase3_start_date_val": "2001-07-01T00:00:00",
    "phase3_end_date_val": "2001-11-01T00:00:00",
    
    "phase4_start_date_train": "2000-01-01T00:00:00",
    "phase4_end_date_train": "2003-04-01T00:00:00",
    "phase4_start_date_val": "2003-04-01T00:00:00",
    "phase4_end_date_val": "2003-07-01T00:00:00",
    
    "phase5_start_date_train": "2000-01-01T00:00:00",
    "phase5_end_date_train": "2006-10-01T00:00:00",
    "phase5_start_date_val": "2006-10-01T00:00:00",
    "phase5_end_date_val": "2007-01-01T00:00:00"
}

mission2_phases_dates = {
    "test_start_date": "2001-10-01T00:00:00",
    "test_end_date": "2003-07-01T00:00:00",

    "phase1_start_date_train": "2000-01-01T00:00:00",
    "phase1_end_date_train": "2000-01-24T00:00:00",
    "phase1_start_date_val": "2000-01-24T00:00:00",
    "phase1_end_date_val": "2000-02-01T00:00:00",
    
    "phase2_start_date_train": "2000-01-01T00:00:00",
    "phase2_end_date_train": "2000-05-01T00:00:00",
    "phase2_start_date_val": "2000-05-01T00:00:00",
    "phase2_end_date_val": "2000-06-01T00:00:00",
    
    "phase3_start_date_train": "2000-01-01T00:00:00",
    "phase3_end_date_train": "2000-09-01T00:00:00",
    "phase3_start_date_val": "2000-09-01T00:00:00",
    "phase3_end_date_val": "2000-11-01T00:00:00",
    
    "phase4_start_date_train": "2000-01-01T00:00:00",
    "phase4_end_date_train": "2001-07-01T00:00:00",
    "phase4_start_date_val": "2001-07-01T00:00:00",
    "phase4_end_date_val": "2001-10-01T00:00:00"
}

missions_phases_dates = {
    1: mission1_phases_dates,
    2: mission2_phases_dates
}

In [6]:
start_date_train = pd.to_datetime(missions_phases_dates[MISSION][f"phase{PHASE}_start_date_train"])
end_date_train = pd.to_datetime(missions_phases_dates[MISSION][f"phase{PHASE}_end_date_train"])
start_date_val = pd.to_datetime(missions_phases_dates[MISSION][f"phase{PHASE}_start_date_val"])
end_date_val = pd.to_datetime(missions_phases_dates[MISSION][f"phase{PHASE}_end_date_val"])
start_date_test = pd.to_datetime(missions_phases_dates[MISSION]["test_start_date"])
end_date_test = pd.to_datetime(missions_phases_dates[MISSION]["test_end_date"])

if CHANNELS == "target":
    channels_info = pd.read_csv(CHANNELS_INFO_PATH)
    channels_list = list(channels_info[channels_info['Target']=="YES"]['Channel'])
else:
    channels_list = None if CHANNELS == "allchannels" else [f"channel_{i}" for i in range(first_channel_number, last_channel_number+1)]

## Load model

In [7]:
checkpoint = torch.load(MODEL_SAVE_PATH)
model = checkpoint['model']   # Load the full model
threshold_list = checkpoint['threshold']  # Access the threshold metadata
scaler = checkpoint['scaler']  # Access the scaler metadata

## Load data

In [8]:
data = read_csv(input_data_path, sep=";")
if channels_list is not None:
    data = data[channels_list]

# Filtrar los datos entre start_date_train y end_date_train
data_train = data.loc[(data.index >= start_date_train) & (data.index < end_date_train)]
data_val = data.loc[(data.index >= start_date_val) & (data.index < end_date_val)]
data_test = data.loc[(data.index >= start_date_test) & (data.index < end_date_test)]

In [9]:
df_train_normalized = pd.DataFrame(scaler.transform(data_train), index=data_train.index, columns=data_train.columns)
df_val_normalized = pd.DataFrame(scaler.transform(data_val), index=data_val.index, columns=data_val.columns)
df_test_normalized = pd.DataFrame(scaler.transform(data_test), index=data_test.index, columns=data_test.columns)

## Predict

In [None]:
anomalies_train = model.predict(threshold_list, df_train_normalized, WINDOW_SIZE, BATCH_SIZE, device)
anomalies_train.head()

In [None]:
anomalies_val = model.predict(threshold_list, df_val_normalized, WINDOW_SIZE, BATCH_SIZE, device)
anomalies_val.head()

In [None]:
anomalies_test = model.predict(threshold_list, df_test_normalized, WINDOW_SIZE, BATCH_SIZE, device)
anomalies_test.head()

## Calculate metrics

In [13]:
metrics_calculator = MetricsCalculator(ESA_ANOMALIES_PATH, CHANNELS_INFO_PATH, channels_list)

In [None]:
metrics_train = metrics_calculator.get_metrics(anomalies_train, start_date_train, end_date_train)
metrics_calculator.print_metrics_table(metrics_train)

In [None]:
metrics_val = metrics_calculator.get_metrics(anomalies_val, start_date_val, end_date_val)
metrics_calculator.print_metrics_table(metrics_val)

In [None]:
metrics_test = metrics_calculator.get_metrics(anomalies_test, start_date_test, end_date_test)
metrics_calculator.print_metrics_table(metrics_test)

## Save metrics

In [24]:
__model_type = MODEL_SAVE_PATH.split("/")[-2].split("-")[-1]
__model_name = ".".join(os.path.basename(MODEL_SAVE_PATH).split(".")[:-1])
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

In [25]:
for metrics, dataset in zip([metrics_train, metrics_val, metrics_test], ["train", "val", "test"]):
    metrics["mission"] = MISSION
    metrics["phase"] = PHASE
    metrics["model_type"] = __model_type
    metrics["data"] = dataset
    metrics["model_name"] = __model_name
    metrics["timestamp"] = timestamp
    metrics["window_size"] = WINDOW_SIZE
    metrics["channels"] = CHANNELS

In [26]:
df_train = pd.DataFrame([metrics_train])
df_val = pd.DataFrame([metrics_val])
df_test = pd.DataFrame([metrics_test])

# Combinar los DataFrames
df = pd.concat([df_train, df_val, df_test])
new_columns = ["mission", "phase", "model_type", "data", "model_name", "timestamp", "window_size", "channels"]
columns_order = new_columns + [col for col in df.columns if col not in new_columns]
df = df[columns_order]

# Guardar las métricas en el archivo CSV
if os.path.isfile(METRICS_SAVE_PATH):
    # Si el archivo existe, agregar al final
    df.to_csv(METRICS_SAVE_PATH, mode='a', header=False, index=False)
else:
    # Si no existe, crear el archivo con el encabezado
    df.to_csv(METRICS_SAVE_PATH, index=False)


## Save metrics

In [None]:
import wandb

wandb.login()

In [28]:
__model_type = MODEL_SAVE_PATH.split("/")[-2].split("-")[-1]
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
test_type = f"{CHANNELS}" if CHANNELS == "allchannels" or CHANNELS == "target" else f"Channels{first_channel_number}-{last_channel_number}"

__model_name = ".".join(os.path.basename(MODEL_SAVE_PATH).split(".")[:-1])
parameters = __model_name.split("_")
percentil = parameters[3][len("percentile"):]
epochs = parameters[4][len("epochs"):]
learning_rate = parameters[5][len("lr"):]

In [None]:
# run_name = f"Mission{MISSION}_{__model_type}_{__model_name}_{timestamp.replace(' ', '_').replace(':', '-')}"
run_name = f"Mission{MISSION}_{__model_type}_{__model_name}"
run_name

In [None]:
run = wandb.init(
    # set the wandb project where this run will be logged
    project="MET-ESA",
    id=run_name,
    # id="oayk9f9m",
    name=run_name,

    # track hyperparameters and run metadata
    config={
        "mission": MISSION,
        "phase": PHASE,
        "model_type": __model_type,
        "model_name": __model_name,
        "timestamp": timestamp,
        "channels": test_type,
        "window_size": WINDOW_SIZE,
        "percentile": percentil,
        "epochs": epochs,
        "learning_rate": learning_rate,
    },

    resume="allow"
)

In [31]:
table = wandb.Table(columns= ["model_name", "data"] + list(metrics_train.keys()))
table.add_data(__model_name, "train", *list(metrics_train.values()))
table.add_data(__model_name, "val", *list(metrics_val.values()))
table.add_data(__model_name, "test", *list(metrics_test.values()))
wandb.log({"metrics": table})

In [32]:
metrics_train_renamed = {f"train_{key}": value for key, value in metrics_train.items()}
metrics_val_renamed = {f"val_{key}": value for key, value in metrics_val.items()}
metrics_test_renamed = {f"test_{key}": value for key, value in metrics_test.items()}

wandb.log({**metrics_train_renamed, **metrics_val_renamed, **metrics_test_renamed})

In [None]:
wandb.finish()