In [6]:
import os
import pandas as pd
pd.options.plotting.backend = "plotly"
import re

list_of_metrics_dfs = []
directory_path = "./results"
for root, dirs, files in os.walk(directory_path):
    for dir_name in dirs:
        df = pd.read_csv(f"{os.path.join(root, dir_name)}/_metrics.csv")

        df["exp_name"] = dir_name

        # example1: long_term_forecast_ETTh1_96_96ETTh1_TiDE_ETTh1_ftMS_sl96_ll48_pl96_dm256_nh8_el2_dl2_df256_expand2_dc4_fc1_ebtimeF_dtTrue_test_0 
        # example2: long_term_forecast_ETTh1_96_96ETTh1_w501_ipentropy_TiDE_ETTh1_ftMS_sl96_ll48_pl96_dm256_nh8_el2_dl2_df256_expand2_dc4_fc1_ebtimeF_dtTrue_test_0
        
        split_index = dir_name.index("_ft")
        model_name = dir_name[:split_index].split("_")[-2]
        df["model_name"] = model_name

        try:
            split_index = dir_name.index("_w")
            pwindow = int(dir_name[split_index+1:].split("_")[0][1:])
            pconstructor = dir_name[split_index+1:].split("_")[1]
        except Exception as e:
            pwindow = None
            if "duplicated" in dir_name:
                pconstructor = "duplicated"
            elif "zeroed" in dir_name:
                pconstructor = "zeroed"
            else:
                pconstructor = "identity"
        
        pred_len = dir_name.split("long_term_forecast")[1].split("_")[2]
        df["pred_len"] = int(pred_len)
        lookback_len = dir_name.split("long_term_forecast")[1].split("_")[3]
        lookback_len_cleaned = ""
        for character in lookback_len:
            if character.isdigit():
                lookback_len_cleaned += character
            else:
                break
        
        df["lookback_len"] = int(lookback_len_cleaned)

        df["pwindow"] = pwindow
        df["pconstructor"] = pconstructor

        df["dataset"] = dir_name[dir_name.find("96ETTh1")+7: dir_name.find("_ft")].split("_")[-1]

        list_of_metrics_dfs += [df]

results_df = pd.concat(list_of_metrics_dfs).sort_values(by="mse", ascending=True)

# print(results_df["model_name"].unique())
# print(results_df["pconstructor"].unique())
# print(results_df["pwindow"].unique())

# results_df = results_df[
#     (results_df["pconstructor"] == "identity") & 
#     (results_df["pred_len"] == 96)
#     # (results_df["model_name"] == "TiDE")
# ]


results_df[["mse", "model_name", "pred_len", "pwindow", "pconstructor", "dataset"]].sort_values("mse")
# results_df

Unnamed: 0,mse,model_name,pred_len,pwindow,pconstructor,dataset
0,0.029947,iTransformer,96,1501,ipmutual,ETTm1
0,0.029961,iTransformer,96,1001,prollcorr.csv,ETTm1
0,0.029964,iTransformer,96,751,iprollcov.csv,ETTm1
0,0.029966,iTransformer,96,751,ipmutual,ETTm1
0,0.029969,iTransformer,96,1501,ipfarm.csv,ETTm1
...,...,...,...,...,...,...
0,0.292702,iTransformer,96,501,prollcov.csv,ECL
0,0.295578,iTransformer,96,1001,iprollcov.csv,ECL
0,0.302843,iTransformer,96,1251,iprollcov.csv,ECL
0,0.317247,iTransformer,96,751,iprollcov.csv,ECL


In [60]:
# count how many rows show better mse results than the identity baseline for each model
for model_name in results_df["model_name"].unique():
    model_df = results_df[results_df["model_name"] == model_name]
    identity_df = model_df[model_df["pconstructor"] == "identity"]
    if not identity_df.empty:
        identity_mse = identity_df["mse"].values[0]
        better_than_identity_count = (model_df["mse"] < identity_mse).sum()
        worse_than_identity_count = (model_df["mse"] > identity_mse).sum()
        print(f"{model_name}: {better_than_identity_count} better than identity | {worse_than_identity_count} worse than identity")
    else:
        print(f"{model_name}: No identity baseline found")


PatchTST: 10 better than identity | 42 worse than identity
TiDE: 0 better than identity | 30 worse than identity
iTransformer: 2 better than identity | 37 worse than identity
TimesNet: 18 better than identity | 62 worse than identity
TimeXer: 33 better than identity | 48 worse than identity
DLinear: 0 better than identity | 0 worse than identity


In [61]:
# check what combination of pconstructor and pwindow provides best enhancement for each model
list_of_best_enhancements = []

for model_name in results_df["model_name"].unique():
    model_df = results_df[results_df["model_name"] == model_name]
    identity_df = model_df[model_df["pconstructor"] == "identity"]
    if not identity_df.empty:
        identity_mse = identity_df["mse"].values[0]
        enhanced_df = model_df[model_df["mse"] < identity_mse]
        if not enhanced_df.empty:
            best_enhancement = enhanced_df.loc[enhanced_df["mse"].idxmin()]
            list_of_best_enhancements += [best_enhancement]
        else:
            print(f"{model_name}: No enhancements found")
    else:
        print(f"{model_name}: No identity baseline found")

full_best_enhancements = pd.concat(list_of_best_enhancements)
full_best_enhancements

TiDE: No enhancements found
DLinear: No enhancements found


Unnamed: 0,mae,mse,rmse,mape,mspe,exp_name,model_name,pred_len,lookback_len,pwindow,pconstructor
0,0.177786,0.055515,0.235617,0.138821,0.034883,long_term_forecast_ETTh1_96_96ETTh1_w1501_prol...,PatchTST,96,96,1501,prollcov
0,0.177873,0.055548,0.235687,0.138908,0.034925,long_term_forecast_ETTh1_96_96ETTh1_w1001_prol...,PatchTST,96,96,1001,prollcov
0,0.177807,0.055556,0.235704,0.138804,0.034867,long_term_forecast_ETTh1_96_96ETTh1_w1001_pent...,PatchTST,96,96,1001,pentropy
0,0.177797,0.055561,0.235713,0.138784,0.034859,long_term_forecast_ETTh1_96_96ETTh1_w1501_ipfa...,PatchTST,96,96,1501,ipfarm
0,0.177812,0.055561,0.235714,0.138820,0.034882,long_term_forecast_ETTh1_96_96ETTh1_w751_ipfar...,PatchTST,96,96,751,ipfarm
...,...,...,...,...,...,...,...,...,...,...,...
0,0.186280,0.058943,0.242781,0.146847,0.039208,long_term_forecast_ETTh1_96_96ETTh1_w1251_ipro...,TimeXer,96,96,1251,iprollcorr
0,0.186277,0.058970,0.242837,0.147244,0.039524,long_term_forecast_ETTh1_96_96ETTh1_w1251_prol...,TimeXer,96,96,1251,prollcov
0,0.186618,0.059019,0.242938,0.148034,0.040307,long_term_forecast_ETTh1_96_96ETTh1_w501_iprol...,TimeXer,96,96,501,iprollcorr
0,0.186587,0.059024,0.242949,0.147456,0.039578,long_term_forecast_ETTh1_96_96ETTh1_w1251_ipen...,TimeXer,96,96,1251,ipentropy


In [62]:
full_best_enhancements["pconstructor"].hist()

In [63]:
full_best_enhancements["pwindow"].hist()

In [64]:
full_best_enhancements["model_name"].hist()

In [72]:
dir_name = "long_term_forecast_ETTh1_96_96ETTh1_w751_prollcorr_TimesNet_ETTh1_ftMS_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0"
root = "./"

df = pd.read_csv(f"{os.path.join(root, dir_name)}/preds_vs_trues.csv")
print(max(df["cutoff_index"].unique().tolist()))
cutoff_index = 2000
df[df["cutoff_index"] == cutoff_index][["preds", "true"]].plot()

2784
