In [1]:
import json

from model import SpotLSTM, Training
from dataset import LoadSpotDataset, SpotDataset
from utils import ResultPlotter

## Load variables from config file

In [2]:
lsd = LoadSpotDataset("config.yaml", "data")

prices_df, instance_info_df = lsd.load_data()

In [3]:
prices_df.head()

Unnamed: 0,price_timestamp,id_instance,spot_price
0,2024-04-01 00:00:00+00:00,8364,0.0438
1,2024-04-01 00:00:00+00:00,44263,2.2684
2,2024-04-01 00:00:00+00:00,39658,1.0328
3,2024-04-01 00:00:00+00:00,19681,12.5172
4,2024-04-01 00:00:00+00:00,39617,7.8889


In [4]:
instance_info_df.head()

Unnamed: 0_level_0,region,av_zone,instance_type,instance_family,generation,modifiers,size,vcpu,memory,architectures,product_description,on_demand_price
id_instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
48207,us-east-1,b,c6a.8xlarge,c,6,[a],8xlarge,32,65536,[x86_64],Linux/UNIX,1.224
6910,us-east-1,c,c6a.8xlarge,c,6,[a],8xlarge,32,65536,[x86_64],Linux/UNIX,1.224
32082,us-east-1,f,c6a.8xlarge,c,6,[a],8xlarge,32,65536,[x86_64],Linux/UNIX,1.224
39235,us-east-1,a,c6a.8xlarge,c,6,[a],8xlarge,32,65536,[x86_64],Linux/UNIX,1.224
17992,us-east-1,d,c6a.8xlarge,c,6,[a],8xlarge,32,65536,[x86_64],Linux/UNIX,1.224


In [5]:
compute_instances = instance_info_df[(instance_info_df["instance_family"] == 'c') & (~instance_info_df["size"].str.contains('metal'))].index
prices_df = prices_df[prices_df["id_instance"].isin(compute_instances)]

In [6]:
print(len(compute_instances))
prices_df.info()

1412
<class 'pandas.core.frame.DataFrame'>
Index: 1601058 entries, 1 to 8001851
Data columns (total 3 columns):
 #   Column           Non-Null Count    Dtype              
---  ------           --------------    -----              
 0   price_timestamp  1601058 non-null  datetime64[ns, UTC]
 1   id_instance      1601058 non-null  int64              
 2   spot_price       1601058 non-null  float64            
dtypes: datetime64[ns, UTC](1), float64(1), int64(1)
memory usage: 48.9 MB


In [7]:
train_df, val_df, test_df = lsd.get_training_validation_test_split(prices_df)

train_dataset = SpotDataset(train_df, 'config.yaml')
train_loader = train_dataset.get_data_loader()

val_dataset = SpotDataset(val_df, 'config.yaml')
val_loader = val_dataset.get_data_loader()

In [8]:
# Get start and end dates for train_df
train_start_date = train_df['price_timestamp'].min()
train_end_date = train_df['price_timestamp'].max()
train_days = (train_end_date - train_start_date).days

# Get start and end dates for val_df
val_start_date = val_df['price_timestamp'].min()
val_end_date = val_df['price_timestamp'].max()
val_days = (val_end_date - val_start_date).days

# Get start and end dates for test_df
test_start_date = test_df['price_timestamp'].min()
test_end_date = test_df['price_timestamp'].max()
test_days = (test_end_date - test_start_date).days

print(f"Train DataFrame: Start Date = {train_start_date}, End Date = {train_end_date}, Number of Days = {train_days}")
print(f"Validation DataFrame: Start Date = {val_start_date}, End Date = {val_end_date}, Number of Days = {val_days}")
print(f"Test DataFrame: Start Date = {test_start_date}, End Date = {test_end_date}, Number of Days = {test_days}")

Train DataFrame: Start Date = 2024-04-01 00:00:00+00:00, End Date = 2024-08-23 00:00:00+00:00, Number of Days = 144
Validation DataFrame: Start Date = 2024-08-23 00:00:00+00:00, End Date = 2024-09-21 08:00:00+00:00, Number of Days = 29
Test DataFrame: Start Date = 2024-09-21 08:00:00+00:00, End Date = 2024-10-20 00:00:00+00:00, Number of Days = 28


## Hyperparameter Tuning

### Learning Rate Range Test

In [9]:
def lr():
    from model import find_lr
    
    # Model configuration
    model_config = {
        "window_size": 20,
        "batch_size": 128,        # Smaller for better generalization
        "shuffle_buffer": 1000,
        "epochs": 150,           # More training time
        "steps_per_epoch": len(train_dataset),
        "init_learning_rate": 6e-7,
        "final_learning_rate": 1.2e-6,
        "weight_decay": 1.5e-5,
        "mse_weight": 0.8
    }
    model = SpotLSTM('config.yaml')

    log_lrs, losses = find_lr(model, train_loader, model_config)

    ResultPlotter().plot_learning_rate_finder(log_lrs, losses)
    
# lr()

### Model Training with Selected Learning Rate

In [11]:
model = SpotLSTM("config.yaml")

modelTraining = Training(model, 'config.yaml')
modelTraining.train_model(train_loader, val_loader)

  checkpoint = torch.load(best_file, map_location=device)


Successfully loaded checkpoint.

Training for 100 epochs



                                                                             

Epoch 1/100
T:0.0006;V:0.0006;L:4.4e-07;G:11.4;76.4s


                                                                             

Epoch 2/100
T:0.0007;V:0.0008;L:5.4e-07;G:34.0;77.6s


                                                                             

Epoch 3/100
T:0.0007;V:0.0006;L:7.2e-07;G:4.1;75.3s


                                                                             

Epoch 4/100
T:0.0007;V:0.0008;L:9.7e-07;G:31.7;75.4s


                                                                             

Epoch 5/100
T:0.0007;V:0.0010;L:1.3e-06;G:22.8;75.9s


                                                                             

Epoch 6/100
T:0.0008;V:0.0008;L:1.7e-06;G:17.7;74.9s


                                                                             

Epoch 7/100
T:0.0008;V:0.0010;L:2.1e-06;G:28.0;76.7s


                                                                             

Epoch 8/100
T:0.0009;V:0.0007;L:2.5e-06;G:11.4;74.6s


                                                                             

Epoch 9/100
T:0.0010;V:0.0009;L:3.1e-06;G:39.1;77.0s


                                                                            


Training interrupted by user
Saving current model state...


In [1]:
with open('output/training_history.json', 'r') as f:
    history = json.load(f)

ResultPlotter().plot_training_history(history)

FileNotFoundError: [Errno 2] No such file or directory: 'output/training_history.json'