In [7]:
import pandas as pd

from model import SpotLSTM, Evaluate, CheckpointTracker
from dataset import LoadSpotDataset

# Load data

In [8]:
prices_df = pd.read_pickle('data/prices_df.pkl')
instance_info_df = pd.read_pickle('data/instance_info_df.pkl')

compute_instances = instance_info_df[(instance_info_df["instance_family"] == 'c') & (~instance_info_df["size"].str.contains('metal'))].index
prices_df = prices_df[prices_df["id_instance"].isin(compute_instances)]
# prices_df = prices_df[prices_df["id_instance"] == 48207]

num_series = len(prices_df.groupby('id_instance'))
print(f"Processing {num_series} series.")

Processing 1378 series.


In [9]:
lsd = LoadSpotDataset('config.yaml', 'data')
train_df, _, test_df = lsd.get_training_validation_test_split(prices_df, train_ratio=0.85, val_ratio=0.0)

train_start_date = train_df['price_timestamp'].min()
train_end_date = train_df['price_timestamp'].max()
train_days = (train_end_date - train_start_date).days

test_start_date = test_df['price_timestamp'].min()
test_end_date = test_df['price_timestamp'].max()
test_days = (test_end_date - test_start_date).days

print(f"Train DataFrame: Start Date = {train_start_date}, End Date = {train_end_date}, Number of Days = {train_days}")
print(f"Test DataFrame: Start Date = {test_start_date}, End Date = {test_end_date}, Number of Days = {test_days}")

Train DataFrame: Start Date = 2024-04-01 00:00:00+00:00, End Date = 2024-09-21 08:00:00+00:00, Number of Days = 173
Test DataFrame: Start Date = 2024-09-21 08:00:00+00:00, End Date = 2024-10-20 00:00:00+00:00, Number of Days = 28


# Model Evaluation

In [10]:
model = SpotLSTM('config.yaml')
config, loss = CheckpointTracker().load(model)

Successfully loaded checkpoint.


  checkpoint = torch.load(best_file, map_location=self.device)


In [11]:
ev = Evaluate(model, 'config.yaml')
metrics = ev.evaluate_all(test_df)


Evaluation Configuration:
- Sequence length: 28
- Prediction length: 80
- Total instances: 1378


Evaluating instances: 100%|██████████| 1378/1378 [00:08<00:00, 169.72it/s]


Completed evaluation of 1378 instances





In [13]:
import os

def dump_metrics_to_csv(segmented_metrics, instance_info_df, output_dir):
    """Dump segmented metrics to a CSV file with instance properties"""
    output_file = os.path.join(output_dir, "evaluation_metrics.csv")
    
    # Flatten the metrics data
    rows = []
    for instance_id, metrics_list in segmented_metrics.items():
        # Get instance properties
        instance_props = instance_info_df.loc[instance_id, [
            'region', 
            'av_zone', 
            'instance_type',
            'generation',
            'modifiers',
            'size'
        ]].to_dict()
        
        for metric in metrics_list:
            row = {
                "instance_id": instance_id,
                **instance_props,  # Unpack instance properties
                "n_timestep": metric["n_timestep"],
                "rmse": metric["rmse"],
                "mape": metric["mape"],
                "smape_std": metric["smape_std"],
                "smape_cv": metric["smape_cv"],
                "smape": metric["smape"],
                "direction_accuracy": metric["direction_accuracy"]
            }
            rows.append(row)
    
    # Convert to DataFrame and save to CSV
    metrics_df = pd.DataFrame(rows)
    column_order = [
        "instance_id", 
        "region",
        "av_zone",
        "instance_type",
        "generation",
        "modifiers",
        'size',
        "n_timestep", 
        "rmse", 
        "mape", 
        "smape_std", 
        "smape_cv",        
        "smape", 
        "direction_accuracy"
    ]
    metrics_df = metrics_df[column_order]
    metrics_df.to_csv(output_file, index=False)
    
dump_metrics_to_csv(metrics, instance_info_df, 'output')