In [None]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colormaps
import matplotlib.dates as mdates

from model import SpotLSTM, Evaluate, CheckpointTracker

# Load data

In [None]:
train_df = pd.read_pickle("data/train_df.pkl")
val_df = pd.read_pickle("data/val_df.pkl")
test_df = pd.read_pickle("data/test_df.pkl")
instance_info_df = pd.read_pickle("data/instance_info_df.pkl")

train_df = pd.concat([train_df, val_df], ignore_index=True)

print(f"Number of different id_instances: {test_df['id_instance'].nunique()}")
test_df.info()

# Model Evaluation

In [None]:
model = SpotLSTM("config.yaml")
config, loss = CheckpointTracker().load(model)

In [None]:
ev = Evaluate(model, "config.yaml")
metrics = ev.evaluate_all(test_df)

In [None]:
def dump_metrics_to_csv(
    evaluate_instance: Evaluate, instance_info_df: pd.DataFrame, output_dir
):
    """Dump segmented metrics to a CSV file with instance properties"""
    output_file = os.path.join(output_dir, "evaluation_metrics.csv")

    # Get metrics from Evaluate class
    metric_columns = evaluate_instance.metrics

    # Flatten the metrics data
    rows = []
    for instance_id, metrics_list in evaluate_instance.segmented_metrics.items():
        # Get instance properties
        instance_props = instance_info_df.loc[
            instance_id,
            ["region", "av_zone", "instance_type", "generation", "modifiers", "size"],
        ].to_dict()  # type: ignore

        for metric in metrics_list:
            row = {
                "instance_id": instance_id,
                **instance_props,  # Unpack instance properties
                **{
                    col: metric[col] for col in metric_columns
                },  # Unpack metrics using the defined columns
            }
            rows.append(row)

    # Convert to DataFrame and save to CSV
    metrics_df = pd.DataFrame(rows)
    column_order = [
        "instance_id",
        "region",
        "av_zone",
        "instance_type",
        "generation",
        "modifiers",
        "size",
    ] + metric_columns

    metrics_df = metrics_df[column_order]
    metrics_df.to_csv(output_file, index=False)


dump_metrics_to_csv(ev, instance_info_df, "output")

In [None]:
def plot_time_series_predictions(
    historical_values: np.ndarray,
    true_values: np.ndarray,
    predictions_list: list[np.ndarray],
    step_size: int,
    timestamps: np.ndarray,
    title: str = "Time Series Predictions",
    save: bool = False,
):
    plt.figure(figsize=(12, 6))

    # Configurar el formato de fecha en el eje x
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
    plt.gcf().autofmt_xdate()  # Rotar las etiquetas de fecha

    # Plotear valores históricos
    plt.plot(
        timestamps[: len(historical_values)],
        historical_values,
        "b-",
        label="Historical Values",
        linewidth=2,
    )

    # Plotear valores reales
    plt.plot(
        timestamps[len(historical_values) :],
        true_values,
        "g-",
        label="True Values",
        linewidth=2,
    )

    # Crear un mapa de colores desde rojo a amarillo
    colors = colormaps.get_cmap("viridis")

    # Calcular colores para cada predicción
    n_predictions = len(predictions_list)
    color_indices = np.linspace(0, 1, n_predictions)

    # Plotear cada predicción con un color diferente
    for i, (prediction, color_idx) in enumerate(zip(predictions_list, color_indices)):
        # Calcular el rango de tiempo para esta predicción
        start_idx = len(historical_values) + i * step_size
        end_idx = start_idx + len(prediction)

        # Asegurarse de que tenemos suficientes timestamps
        if end_idx <= len(timestamps):
            pred_timestamps = timestamps[start_idx:end_idx]

            plt.plot(
                pred_timestamps,
                prediction,
                "--",
                color=colors(color_idx),
                label=f"Prediction {i+1}",
                alpha=0.9,
            )

            # Añadir línea vertical al inicio de cada predicción
            plt.axvline(
                x=pred_timestamps[0], color="lightgray", linestyle="-", alpha=0.3
            )

    # Configurar el gráfico
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.title(title, fontsize=14, pad=20)
    plt.xlabel("Fecha", fontsize=12)
    plt.ylabel("Valor", fontsize=12)
    plt.legend(loc="best")

    # Ajustar los márgenes
    plt.tight_layout()

    # Guardar si se solicita
    if save:
        plt.savefig(
            f'{title.lower().replace(" ", "_")}.png', dpi=300, bbox_inches="tight"
        )

    plt.show()

In [None]:
id = train_df["id_instance"].sample().values[0]
print(f"Plotting instance {id}")

results = ev.get_prediction_results(id)
list_pred = [r[1] for r in results]

previous = train_df[train_df["id_instance"] == id]["spot_price"].values[
    -len(list_pred[0]) * 6 :
]
target = test_df[test_df["id_instance"] == id]["spot_price"].values
timestamps = np.concatenate(
    [
        train_df[train_df["id_instance"] == id]["price_timestamp"].values[
            -len(list_pred[0]) * 6 :
        ],
        test_df[test_df["id_instance"] == id]["price_timestamp"].values,
    ]
)
plot_time_series_predictions(previous, target, list_pred, 28, timestamps)