In [2]:
%matplotlib inline
import external_ts.my_extract as my 
import numpy as np
import pandas as pd
import pickle as pkl
import importlib 
import timesfm
import sys 
import os
importlib.reload(my)

from tqdm import tqdm

# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__

tfm = timesfm.TimesFm(
    context_len=512,
    horizon_len=720,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend="gpu",
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

filenames = [
    "ETT-small/ETTh1.csv",
    "ETT-small/ETTh2.csv",
    "ETT-small/ETTm1.csv",
    "ETT-small/ETTm2.csv",
    "electricity/electricity.csv",
    "traffic/traffic.csv",
    "weather/weather.csv",
    "exchange_rate/exchange_rate.csv",
    "illness/national_illness.csv",
]

tvt_ratios = [
    (1, 0, 0),
    (1, 0, 0),
    (1, 0, 0),
    (1, 0, 0),
    (0.7, 0.1, 0.2),
    (0.7, 0.1, 0.2),
    (0.7, 0.1, 0.2),
    (0.7, 0.1, 0.2),
    (0.7, 0.1, 0.2),
]

input_length = 512
ili_horizons = [24, 36, 48, 60]
other_horizons = [96, 192, 336, 720]


result_filename = "timesfm_results.pkl"
try:
    with open(result_filename, "rb") as file:
        results_dict = pkl.load(file) 
except FileNotFoundError:
    results_dict = dict()

for filename, tvt_ratio in zip(filenames, tvt_ratios):
    print(filename)
    input_length = 96 if "illness" in filename else 512

    horizons = ili_horizons if "illness" in filename else other_horizons
    min_horizon = min(horizons)
    train_loader, val_loader, test_loader = my.get_timeseries_dataloaders(
        f"./datasets/all_six_datasets/{filename}", 
        batch_size=32,
        seq_len=input_length,
        forecast_horizon=1,
        train_ratio=tvt_ratio[0],
        val_ratio=tvt_ratio[1],
        test_ratio=tvt_ratio[2]
    )
    train_data, val_data, test_data = train_loader.dataset, val_loader.dataset, test_loader.dataset
    train_df, val_df, test_df = pd.DataFrame(train_data.data), pd.DataFrame(val_data.data), pd.DataFrame(test_data.data)

    raw_data = pd.read_csv(f"./datasets/all_six_datasets/{filename}")
    test_df["ds"] = raw_data.iloc[train_data.test_start: train_data.test_end, :]["date"].to_numpy()
    test_df["ds"] = pd.to_datetime(test_df["ds"])


    if (filename, min_horizon) not in results_dict or "illness" in filename:
        mses = {h: list() for h in horizons}
        for t in tqdm(range(len(test_df) - input_length - min_horizon + 1), desc=filename):
            test_df_melted = test_df.iloc[t:t+input_length].melt(["ds"], var_name="unique_id", value_name="values")

            blockPrint()
            preds = tfm.forecast_on_df(
                test_df_melted, 
                freq="H",
            )
            enablePrint()
            
            for horizon in horizons:
                if t + input_length + horizon > len(test_df):
                    continue
                step_mse = ((
                    preds[['unique_id', 'ds', 'timesfm']].pivot(columns="unique_id", values="timesfm", index="ds").iloc[0:horizon, :].to_numpy() - 
                    test_df.iloc[t+input_length:t+input_length+horizon].drop(columns=["ds"]).to_numpy()
                )**2).mean()
                mses[horizon].append(step_mse)
    
        for horizon in horizons:
            results_dict[(filename, horizon)] = mses[horizon]


        with open(result_filename, "wb") as file:
            pkl.dump(results_dict, file)


2024-09-13 16:51:30.224801: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-09-13 16:52:16.577718: W external/xla/xla/service/gpu/nvptx_compiler.cc:718] The NVIDIA driver's CUDA version is 12.0 which is older than the ptxas CUDA version (12.5.40). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 4.44 seconds.
Restoring checkpoint from /home/ajshen/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


E0913 16:52:22.746279    9556 pjrt_stream_executor_client.cc:2809] Execution of replica 0 failed: INVALID_ARGUMENT: executable is built for device CUDA:0 of type "Tesla V100-PCIE-16GB"; cannot run it on device CUDA:3 of type "Tesla P40"
E0913 16:52:22.746264    9554 pjrt_stream_executor_client.cc:2809] Execution of replica 0 failed: INVALID_ARGUMENT: executable is built for device CUDA:0 of type "Tesla V100-PCIE-16GB"; cannot run it on device CUDA:2 of type "Tesla P40"


XlaRuntimeError: INVALID_ARGUMENT: executable is built for device CUDA:0 of type "Tesla V100-PCIE-16GB"; cannot run it on device CUDA:2 of type "Tesla P40": while running replica 0 and partition 2 of a replicated computation (other replicas may have failed as well).

In [None]:
import pickle as pkl
import numpy as np
with open("timesfm_results.pkl", "rb") as file:
    results_dict = pkl.load(file)

for key, value in results_dict.items():
    print(key, np.mean(value))