 # TTM zero-shot and few-shot benchmarking on multiple datasets

  **Using TTM-512-96 model.**

## Imports

In [1]:
import math
import warnings

import matplotlib.pyplot as plt
import pandas as pd
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed

from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset
from tsfm_public.toolkit.lr_finder import optimal_lr_finder
from tsfm_public.toolkit.visualization import plot_predictions


warnings.filterwarnings("ignore")

2024-10-04 08:58:19.045912: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-04 08:58:23.064504: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  warn(f"Failed to load image Python extension: {e}")


## Important arguments

In [2]:
# Set seed
SEED = 42
set_seed(SEED)

# Specify model parameters
context_length = 512
forecast_length = 96
freeze_backbone = True

# Other args
EPOCHS = 50
NUM_WORKERS = 16

# Make sure all the datasets in the following `list_datasets` are
# saved in the `DATA_ROOT_PATH` folder. Or, change it accordingly.
# Refer to the load_datasets() function
# in notebooks/hfdemo/tinytimemixer/utils/ttm_utils.py
# to see how it is used.
DATA_ROOT_PATH = "/dccstor/tsfm23/datasets/"

# This is where results will be saved
OUT_DIR = f"ttm_v2_results_benchmark_{context_length}_{forecast_length}/"

## List of benchmark datasets (TTM was not pre-trained on any of these)

In [3]:
list_datasets = [
    "etth1",
    "etth2",
    "ettm1",
    "ettm2",
    "weather",
    "electricity",
    "traffic",
]

## Get model path

In [4]:
# Granite TTM models are here: https://huggingface.co/ibm-granite/granite-timeseries-ttm-v1/tree/main
# Please provide the branch name properly based on context_len and forecast_len

hf_model_path = "ibm-granite/granite-timeseries-ttm-v1"
hf_model_branch = f"{context_length}_{forecast_length}_r2"

## Main benchmarking loop

In [5]:
all_results = {
    "dataset": [],
    "zs_mse": [],
    "fs5_mse": [],
    "zs_eval_time": [],
    "fs5_mean_epoch_time": [],
    "fs5_total_train_time": [],
    "fs5_best_val_metric": [],
}
# Loop over data
for DATASET in list_datasets:
    print()
    print("=" * 100)
    print(
        f"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}"
    )
    print(f"Model will be loaded from {hf_model_path}/{hf_model_branch}")
    SUBDIR = f"{OUT_DIR}/{DATASET}"

    # Set batch size
    if DATASET == "traffic":
        BATCH_SIZE = 8
    elif DATASET == "electricity":
        BATCH_SIZE = 32
    else:
        BATCH_SIZE = 64

    # Data prep: Get dataset
    _, _, dset_test = load_dataset(DATASET, context_length, forecast_length, dataset_root_path=DATA_ROOT_PATH)

    #############################################################
    ##### Use the pretrained model in zero-shot forecasting #####
    #############################################################
    # Load model
    zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)

    # zeroshot_trainer
    zeroshot_trainer = Trainer(
        model=zeroshot_model,
        args=TrainingArguments(
            output_dir=f"{SUBDIR}/zeroshot",
            per_device_eval_batch_size=BATCH_SIZE,
            seed=SEED,
        ),
        eval_dataset=dset_test,
    )

    # evaluate = zero-shot performance
    print("+" * 20, "Test MSE zero-shot", "+" * 20)
    zeroshot_output = zeroshot_trainer.evaluate(dset_test)
    print(zeroshot_output)
    print("+" * 60)
    all_results["zs_eval_time"].append(zeroshot_output["eval_runtime"])

    # Plot
    plot_predictions(
        model=zeroshot_trainer.model,
        dset=dset_test,
        plot_dir=SUBDIR,
        num_plots=10,
        plot_prefix="test_zeroshot",
        channel=0,
    )
    plt.close()

    # write results
    all_results["dataset"].append(DATASET)
    all_results["zs_mse"].append(zeroshot_output["eval_loss"])

    ################################################################
    ## Use the pretrained model in few-shot 5% and 10% forecasting #
    ################################################################
    for fewshot_percent in [5]:
        # Set learning rate
        learning_rate = None # `None` value indicates that the optimal_lr_finder() will be used

        print("-" * 20, f"Running few-shot {fewshot_percent}%", "-" * 20)
        # Data prep: Get dataset
        dset_train, dset_val, dset_test = load_dataset(
            DATASET,
            context_length,
            forecast_length,
            fewshot_fraction=fewshot_percent / 100,
            dataset_root_path=DATA_ROOT_PATH,
        )

        # change head dropout to 0.7 for ett datasets
        if "ett" in DATASET:
            finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(
                hf_model_path, revision=hf_model_branch, head_dropout=0.7
            )
        else:
            finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(
                hf_model_path, revision=hf_model_branch
            )

        if freeze_backbone:
            print(
                "Number of params before freezing backbone",
                count_parameters(finetune_forecast_model),
            )

            # Freeze the backbone of the model
            for param in finetune_forecast_model.backbone.parameters():
                param.requires_grad = False

            # Count params
            print(
                "Number of params after freezing the backbone",
                count_parameters(finetune_forecast_model),
            )

        if learning_rate is None:
            learning_rate, finetune_forecast_model = optimal_lr_finder(
                finetune_forecast_model,
                dset_train,
                batch_size=BATCH_SIZE,
            )
            print("OPTIMAL SUGGESTED LEARNING RATE =", learning_rate)

        print(f"Using learning rate = {learning_rate}")
        finetune_forecast_args = TrainingArguments(
            output_dir=f"{SUBDIR}/fewshot_{fewshot_percent}",
            overwrite_output_dir=True,
            learning_rate=learning_rate,
            num_train_epochs=EPOCHS,
            do_eval=True,
            evaluation_strategy="epoch",
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            dataloader_num_workers=NUM_WORKERS,
            report_to=None,
            save_strategy="epoch",
            logging_strategy="epoch",
            save_total_limit=1,
            logging_dir=f"{SUBDIR}/fewshot_{fewshot_percent}",  # Make sure to specify a logging directory
            load_best_model_at_end=True,  # Load the best model when training ends
            metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
            greater_is_better=False,  # For loss
            seed=SEED
        )

        # Create the early stopping callback
        early_stopping_callback = EarlyStoppingCallback(
            early_stopping_patience=10,  # Number of epochs with no improvement after which to stop
            early_stopping_threshold=0.0,  # Minimum improvement required to consider as improvement
        )
        tracking_callback = TrackingCallback()

        # Optimizer and scheduler
        optimizer = AdamW(finetune_forecast_model.parameters(), lr=learning_rate)
        scheduler = OneCycleLR(
            optimizer,
            learning_rate,
            epochs=EPOCHS,
            steps_per_epoch=math.ceil(len(dset_train) / (BATCH_SIZE)),
        )

        finetune_forecast_trainer = Trainer(
            model=finetune_forecast_model,
            args=finetune_forecast_args,
            train_dataset=dset_train,
            eval_dataset=dset_val,
            callbacks=[early_stopping_callback, tracking_callback],
            optimizers=(optimizer, scheduler),
        )

        # Fine tune
        finetune_forecast_trainer.train()

        # Evaluation
        print(
            "+" * 20,
            f"Test MSE after few-shot {fewshot_percent}% fine-tuning",
            "+" * 20,
        )
        fewshot_output = finetune_forecast_trainer.evaluate(dset_test)
        print(fewshot_output)
        print("+" * 60)

        # Plot
        plot_predictions(
            model=finetune_forecast_trainer.model,
            dset=dset_test,
            plot_dir=SUBDIR,
            num_plots=10,
            plot_prefix=f"test_fewshot_{fewshot_percent}",
            channel=0,
        )
        plt.close()

        # write results
        all_results[f"fs{fewshot_percent}_mse"].append(fewshot_output["eval_loss"])
        all_results[f"fs{fewshot_percent}_mean_epoch_time"].append(tracking_callback.mean_epoch_time)
        all_results[f"fs{fewshot_percent}_total_train_time"].append(tracking_callback.total_train_time)
        all_results[f"fs{fewshot_percent}_best_val_metric"].append(tracking_callback.best_eval_metric)

    df_out = pd.DataFrame(all_results).round(3)
    print(df_out[["dataset", "zs_mse", "fs5_mse"]])
    df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv")
    df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv")

INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96



Running zero-shot/few-shot for TTM-512 on dataset = etth1, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785


config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.24M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]

INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.3628121316432953, 'eval_model_preparation_time': 0.0026, 'eval_runtime': 5.9814, 'eval_samples_per_second': 465.613, 'eval_steps_per_second': 7.356}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00017073526474706903
OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903
Using learning rate = 0.00017073526474706903


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.8127,0.664259
2,0.8322,0.664153
3,0.7938,0.66397
4,0.7722,0.66376
5,0.8112,0.663474
6,0.7692,0.663127
7,0.7496,0.66282
8,0.7192,0.662412
9,0.7166,0.662103
10,0.7098,0.661821


INFO:p-3887243:t-23136586168064:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 08:59:21 EDT)" (scheduled at 2024-10-04 08:59:21.016140-04:00)
INFO:p-3887243:t-23136586168064:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 08:59:36 EDT)" executed successfully
INFO:p-3887243:t-23136586168064:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 08:59:51 EDT)" (scheduled at 2024-10-04 08:59:36.016140-04:00)
INFO:p-3887243:t-23136586168064:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 08:59:51 EDT)" executed successfully
INFO:p-3887243:t-23150364181248:base.py:shutdown:Scheduler has been shut down
ERROR:p-3887243:t-23150364181248:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 0.9159166812896729 seconds, Total Train Time = 44.12124967575073
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.36197009682655334, 'eval_runtime': 1.0302, 'eval_samples_per_second': 2703.275, 'eval_steps_per_second': 42.709, 'epoch': 21.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.362

Running zero-shot/few-shot for TTM-512 on dataset = etth2, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.2757423520088196, 'eval_model_preparation_time': 0.002, 'eval_runtime': 0.6214, 'eval_samples_per_second': 4482.024, 'eval_steps_per_second': 70.811}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.0002477076355991711
OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711
Using learning rate = 0.0002477076355991711


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.3224,0.218503
2,0.3287,0.218445
3,0.307,0.218306
4,0.3034,0.218164
5,0.2858,0.2179
6,0.287,0.217582
7,0.2574,0.217252
8,0.2429,0.216912
9,0.2274,0.217226
10,0.2208,0.21833


INFO:p-3887243:t-23136672057088:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:00:11 EDT)" (scheduled at 2024-10-04 09:00:11.371807-04:00)
INFO:p-3887243:t-23136672057088:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:00:26 EDT)" executed successfully
INFO:p-3887243:t-23136672057088:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:00:41 EDT)" (scheduled at 2024-10-04 09:00:26.371807-04:00)
INFO:p-3887243:t-23136672057088:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:00:41 EDT)" executed successfully
INFO:p-3887243:t-23150364181248:base.py:shutdown:Scheduler has been shut down
ERROR:p-3887243:t-23150364181248:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 0.879707932472229 seconds, Total Train Time = 39.25970387458801
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.2727772295475006, 'eval_runtime': 1.3475, 'eval_samples_per_second': 2066.766, 'eval_steps_per_second': 32.653, 'epoch': 18.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.362
1   etth2   0.276    0.273

Running zero-shot/few-shot for TTM-512 on dataset = ettm1, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425
INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.3376680314540863, 'eval_model_preparation_time': 0.002, 'eval_runtime': 2.5227, 'eval_samples_per_second': 4528.917, 'eval_steps_per_second': 70.956}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00035938136638046257
OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257
Using learning rate = 0.00035938136638046257


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.4244,0.407012
2,0.3734,0.41397
3,0.3389,0.425914
4,0.2984,0.441531
5,0.2803,0.451525
6,0.2663,0.446058
7,0.262,0.441062
8,0.2508,0.432998
9,0.2468,0.425966
10,0.2417,0.421164


INFO:p-3887243:t-23137808574208:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:00:59 EDT)" (scheduled at 2024-10-04 09:00:59.639080-04:00)
INFO:p-3887243:t-23137808574208:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:01:14 EDT)" executed successfully
INFO:p-3887243:t-23137808574208:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:01:29 EDT)" (scheduled at 2024-10-04 09:01:14.639080-04:00)
INFO:p-3887243:t-23137808574208:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:01:29 EDT)" executed successfully
INFO:p-3887243:t-23150364181248:base.py:shutdown:Scheduler has been shut down
ERROR:p-3887243:t-23150364181248:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 1.3348177129572087 seconds, Total Train Time = 41.04363250732422
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.3408427834510803, 'eval_runtime': 2.0507, 'eval_samples_per_second': 5571.326, 'eval_steps_per_second': 87.288, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.362
1   etth2   0.276    0.273
2   ettm1   0.338    0.341

Running zero-shot/few-shot for TTM-512 on dataset = ettm2, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425
INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.17649634182453156, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 2.9051, 'eval_samples_per_second': 3932.716, 'eval_steps_per_second': 61.615}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00035938136638046257
OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257
Using learning rate = 0.00035938136638046257


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.2881,0.122861
2,0.256,0.123697
3,0.2342,0.125028
4,0.2062,0.126568
5,0.1885,0.128257
6,0.18,0.131432
7,0.1681,0.132874
8,0.1625,0.135289
9,0.1569,0.134445
10,0.1535,0.138203


INFO:p-3887243:t-23137804371712:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:01:50 EDT)" (scheduled at 2024-10-04 09:01:50.649271-04:00)
INFO:p-3887243:t-23137804371712:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:02:05 EDT)" executed successfully
INFO:p-3887243:t-23137804371712:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:02:20 EDT)" (scheduled at 2024-10-04 09:02:05.649271-04:00)
INFO:p-3887243:t-23137804371712:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:02:20 EDT)" executed successfully
INFO:p-3887243:t-23150364181248:base.py:shutdown:Scheduler has been shut down
ERROR:p-3887243:t-23150364181248:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 1.3339608149095015 seconds, Total Train Time = 41.09284782409668
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.17622655630111694, 'eval_runtime': 2.1012, 'eval_samples_per_second': 5437.469, 'eval_steps_per_second': 85.191, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.362
1   etth2   0.276    0.273
2   ettm1   0.338    0.341
3   ettm2   0.176    0.176

Running zero-shot/few-shot for TTM-512 on dataset = weather, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 36280, val = 5175, test = 10444
INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.15046171844005585, 'eval_model_preparation_time': 0.0022, 'eval_runtime': 3.6, 'eval_samples_per_second': 2901.142, 'eval_steps_per_second': 45.556}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96
INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 1723, val = 5175, test = 10444


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.0033516026509388406
OPTIMAL SUGGESTED LEARNING RATE = 0.0033516026509388406
Using learning rate = 0.0033516026509388406


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.16,0.405245
2,0.1537,0.412479
3,0.1476,0.424075
4,0.1403,0.47608
5,0.136,0.461959
6,0.1324,0.488006
7,0.1308,0.474276
8,0.1274,0.495313
9,0.1263,0.50316
10,0.129,0.461539


INFO:p-3887243:t-23143592994560:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:02:42 EDT)" (scheduled at 2024-10-04 09:02:42.924288-04:00)
INFO:p-3887243:t-23143592994560:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:02:57 EDT)" executed successfully
INFO:p-3887243:t-23143592994560:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:03:12 EDT)" (scheduled at 2024-10-04 09:02:57.924288-04:00)
INFO:p-3887243:t-23143592994560:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:03:12 EDT)" executed successfully
INFO:p-3887243:t-23150364181248:base.py:shutdown:Scheduler has been shut down
ERROR:p-3887243:t-23150364181248:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 1.5050957853143865 seconds, Total Train Time = 40.163811922073364
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.15043412148952484, 'eval_runtime': 2.5545, 'eval_samples_per_second': 4088.477, 'eval_steps_per_second': 64.201, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96


   dataset  zs_mse  fs5_mse
0    etth1   0.363    0.362
1    etth2   0.276    0.273
2    ettm1   0.338    0.341
3    ettm2   0.176    0.176
4  weather   0.150    0.150

Running zero-shot/few-shot for TTM-512 on dataset = electricity, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 17805, val = 2537, test = 5165
INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.18014171719551086, 'eval_model_preparation_time': 0.002, 'eval_runtime': 14.0682, 'eval_samples_per_second': 367.141, 'eval_steps_per_second': 11.515}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96


-------------------- Running few-shot 5% --------------------


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 800, val = 2537, test = 5165


Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00017073526474706903
OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903
Using learning rate = 0.00017073526474706903


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.2114,0.154705
2,0.2089,0.152494
3,0.2064,0.149164
4,0.203,0.144169
5,0.1987,0.139577
6,0.1946,0.137131
7,0.1913,0.134782
8,0.188,0.132271
9,0.185,0.130578
10,0.1822,0.128892


INFO:p-3887243:t-23143549196032:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:03:54 EDT)" (scheduled at 2024-10-04 09:03:54.001639-04:00)
INFO:p-3887243:t-23143549196032:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:04:09 EDT)" executed successfully
INFO:p-3887243:t-23143549196032:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:04:24 EDT)" (scheduled at 2024-10-04 09:04:09.001639-04:00)
INFO:p-3887243:t-23143549196032:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:04:24 EDT)" executed successfully
INFO:p-3887243:t-23143549196032:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:04:39 EDT)" (scheduled at 2024-10-04 09:04:24.001639-04:00)
INFO:p-3887243:t-231435

[TrackingCallback] Mean Epoch Time = 3.212447304725647 seconds, Total Train Time = 452.76988768577576
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.14508052170276642, 'eval_runtime': 10.3136, 'eval_samples_per_second': 500.796, 'eval_steps_per_second': 15.707, 'epoch': 50.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96


       dataset  zs_mse  fs5_mse
0        etth1   0.363    0.362
1        etth2   0.276    0.273
2        ettm1   0.338    0.341
3        ettm2   0.176    0.176
4      weather   0.150    0.150
5  electricity   0.180    0.145

Running zero-shot/few-shot for TTM-512 on dataset = traffic, forecast_len = 96
Model will be loaded from ibm-granite/granite-timeseries-ttm-v1/512_96_r2


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 11673, val = 1661, test = 3413
INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.5177494287490845, 'eval_model_preparation_time': 0.002, 'eval_runtime': 24.4254, 'eval_samples_per_second': 139.731, 'eval_steps_per_second': 17.482}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96


-------------------- Running few-shot 5% --------------------


INFO:p-3887243:t-23150364181248:data_handling.py:load_dataset:Data lengths: train = 493, val = 1661, test = 3413


Number of params before freezing backbone 805280
Number of params after freezing the backbone 289696
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-3887243:t-23150364181248:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.0002477076355991711
OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711
Using learning rate = 0.0002477076355991711


INFO:p-3887243:t-23150364181248:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-3887243:t-23150364181248:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.2975,0.417052
2,0.2871,0.403708
3,0.2797,0.395805
4,0.2736,0.390183
5,0.268,0.384714
6,0.2616,0.376553
7,0.2553,0.370888
8,0.2493,0.365246
9,0.2436,0.358401
10,0.2386,0.35454


INFO:p-3887243:t-23143500941056:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:14 EDT)" (scheduled at 2024-10-04 09:12:14.166967-04:00)
INFO:p-3887243:t-23143500941056:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:29 EDT)" executed successfully
INFO:p-3887243:t-23143500941056:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:44 EDT)" (scheduled at 2024-10-04 09:12:29.166967-04:00)
INFO:p-3887243:t-23143500941056:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:44 EDT)" executed successfully
INFO:p-3887243:t-23143500941056:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:59 EDT)" (scheduled at 2024-10-04 09:12:44.166967-04:00)
INFO:p-3887243:t-231435

[TrackingCallback] Mean Epoch Time = 4.679463973045349 seconds, Total Train Time = 697.0795240402222
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.40992745757102966, 'eval_runtime': 18.0082, 'eval_samples_per_second': 189.525, 'eval_steps_per_second': 23.711, 'epoch': 50.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       dataset  zs_mse  fs5_mse
0        etth1   0.363    0.362
1        etth2   0.276    0.273
2        ettm1   0.338    0.341
3        ettm2   0.176    0.176
4      weather   0.150    0.150
5  electricity   0.180    0.145
6      traffic   0.518    0.410


## Benchmarking results*

*Some slight differences in the results as compared to the TTM paper results is possible due to different training environments.

In [6]:
df_out

Unnamed: 0,dataset,zs_mse,fs5_mse,zs_eval_time,fs5_mean_epoch_time,fs5_total_train_time,fs5_best_val_metric
0,etth1,0.363,0.362,5.981,0.916,44.121,0.662
1,etth2,0.276,0.273,0.621,0.88,39.26,0.217
2,ettm1,0.338,0.341,2.523,1.335,41.044,0.407
3,ettm2,0.176,0.176,2.905,1.334,41.093,0.123
4,weather,0.15,0.15,3.6,1.505,40.164,0.405
5,electricity,0.18,0.145,14.068,3.212,452.77,0.12
6,traffic,0.518,0.41,24.425,4.679,697.08,0.333
