In [184]:
from pandas import json_normalize

In [185]:
import pandas as pd
import numpy as np
import wandb

api = wandb.Api()


In [186]:
context = "0"

In [187]:
metrics = [
    "abs_error",
    "abs_target_mean",
    "abs_target_sum",
    "mean_mae",
    "mean_mape",
    "mean_mase",
    "mean_msmape",
    "mean_rmse",
    "mean_smape",
    "median_mae",
    "median_mase",
    "median_msmape",
    "median_rmse",
    "median_smape",
    "mse",
    "_runtime",
]
metric_dict = { metric: ['mean', 'std'] for metric in metrics }
metric_dict

{'abs_error': ['mean', 'std'],
 'abs_target_mean': ['mean', 'std'],
 'abs_target_sum': ['mean', 'std'],
 'mean_mae': ['mean', 'std'],
 'mean_mape': ['mean', 'std'],
 'mean_mase': ['mean', 'std'],
 'mean_msmape': ['mean', 'std'],
 'mean_rmse': ['mean', 'std'],
 'mean_smape': ['mean', 'std'],
 'median_mae': ['mean', 'std'],
 'median_mase': ['mean', 'std'],
 'median_msmape': ['mean', 'std'],
 'median_rmse': ['mean', 'std'],
 'median_smape': ['mean', 'std'],
 'mse': ['mean', 'std'],
 '_runtime': ['mean', 'std']}

In [188]:
def get_config_df(model_name, context):
    runs = api.runs(f"garethmd/{model_name}-{context}")

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        summary_list.append(run.summary._json_dict)
        config_list.append(
            {k: v for k, v in run.config.items() if not k.startswith("_")}
        )
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"summary": summary_list, "config": config_list, "name": name_list}
    )

    config_df = json_normalize(runs_df["config"])
    config_df.set_index(runs_df.index, inplace=True)
    runs_df = runs_df.join(config_df)

    config_df["model_id"] = (
        model_name
        + "-"
        + runs_df["dataset"]
        + "-"
        + runs_df["context_length"].astype(str)
        + "-"
        + runs_df["prediction_length"].astype(str)
    )
    config_df["model_name"] = model_name

    return config_df.head(1)

In [189]:
# Project is specified by <entity/project-name>
def get_metrics_df(model_name, context):
    runs = api.runs(f"garethmd/{model_name}-{context}")

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        summary_list.append(run.summary._json_dict)
        config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"summary": summary_list, "config": config_list, "name": name_list}
    )

    config_df = json_normalize(runs_df['config'])
    config_df.set_index(runs_df.index, inplace=True)
    runs_df = runs_df.join(config_df)

    summary_df = json_normalize(runs_df["summary"])
    summary_df.set_index(runs_df.index, inplace=True)
    runs_df = runs_df.join(summary_df)

    runs_df.drop('config', axis=1, inplace=True)
    runs_df.drop("summary", axis=1, inplace=True)
    runs_df["model_id"] = (
        model_name
        + "-"
        + runs_df["dataset"]
        + "-"
        + runs_df["context_length"].astype(str)
        + "-"
        + runs_df["prediction_length"].astype(str)
    )
    runs_df["model_name"] = f"{model_name}-{context}"
    runs_df["model_architecture"] = model_name

    for metric in metrics:
        runs_df[metric] = runs_df[metric].astype(float)
    metrics_df = runs_df.groupby("model_id").agg(metric_dict)
    metrics_df.columns = ["_".join(col).strip() for col in metrics_df.columns.values]

    summary_df = runs_df.groupby("model_id").first()[
        [
            "name",
            "model_name",
            "model_architecture", 
            "dataset",
            "context_length",
            "prediction_length",
        ]
    ]
    metrics_df = metrics_df.join(summary_df)
    return metrics_df

In [190]:
df_list = [
    get_metrics_df(model_name, context)
    for model_name, context in zip(
        [
            "nhits-global",
            "tide-global",
            "nbeats-global",
            "patchtst-independent",
            "autoformer-independent",
            "nlinear-independent",
            "dlinear-independent",
            "patchtst-multivariate",
            "nlinear-multivariate",
            "dlinear-multivariate",
        ],
        [context] * 10,
    )
]

In [157]:
df_list_1 = [
    get_metrics_df(model_name, context)
    for model_name, context in zip(
        [
            "nhits-global",
            "tide-global",
            "nbeats-global",
            "dlinear-global",
            "nlinear-global",
            "patchtst-independent",
            #"autoformer-independent",
            "nlinear-independent",
            "dlinear-independent",
        ],
        [1] * 8,
    )
]

df_list = df_list + df_list_1

In [158]:
df_list_2 = [
    get_metrics_df(model_name, context)
    for model_name, context in zip(
        [
            "nhits-global",
            "tide-global",
            "nbeats-global",
            "dlinear-global",
            "nlinear-global",
            "patchtst-independent",
            "autoformer-independent",
            "nlinear-independent",
            "dlinear-independent"
        ],
        [2] * 9,
    )
]

df_list = df_list + df_list_2

In [193]:
# BENCHMARK FOR CONTEXT
global_df = pd.concat(df_list)
for metric in metrics:
    global_df[["dataset", "model_name", f"{metric}_mean"]].pivot_table(
        index="dataset",
        columns="model_name",
        values=[f"{metric}_mean"],
    ).to_csv(f"results/{context}/{metric}.csv")

In [177]:
# LTSF COMPARISON
df_list = [
    get_metrics_df(model_name, context)
    for model_name, context in zip(
        [
            "dlinear-local",
            "dlinear-global",
            "dlinear-independent",
            "dlinear-multivariate",
        ],
        [context] * 8,
    )
]
global_df = pd.concat(df_list)
metric = "mean_mase"
global_df.loc[
    :,
    [
        "dataset",
        "model_name",
        "model_architecture",
        "context_length",
        f"{metric}_mean",
    ],
].pivot_table(
    index=["dataset", "context_length"],
    columns=["model_architecture"],
    values=[f"{metric}_mean"],
).dropna().to_csv(
    f"results/linear_ltsf_comparison.csv"
)

In [164]:
# CONTEXT LENGTHS COMPARISON
metric = 'mean_mase'
global_df.loc[
    global_df["model_architecture"].isin(
        ["dlinear-independent", "nlinear-independent", "nhits-global"]
    ),
    [
        "dataset",
        "model_name",
        "model_architecture",
        "context_length",
        f"{metric}_mean",
    ],
].pivot_table(
    index=["dataset", "context_length"],
    columns=["model_architecture"],
    values=[f"{metric}_mean"],
).dropna().to_csv(f"results/context_lengths.csv")

In [139]:
metric = "mean_mase"
global_df.loc[
    global_df["model_architecture"].isin(
        ["nhits-global", "tide-global", "nbeats-global"]
    ),
    [
        "dataset",
        "model_name",
        "model_architecture",
        "context_length",
        f"{metric}_mean",
    ],
].pivot_table(
    index=["dataset", "context_length"],
    columns=["model_architecture"],
    values=[f"{metric}_mean"],
)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_mase_mean,mean_mase_mean,mean_mase_mean
Unnamed: 0_level_1,model_architecture,nbeats-global,nhits-global,tide-global
dataset,context_length,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
australian_electricity_demand,420,1.093611,1.142679,1.217485
australian_electricity_demand,672,,1.236908,1.178688
bitcoin,9,6.407952,6.066285,
bitcoin,38,8.27517,6.029959,
bitcoin,60,,5.904967,
car_parts,15,0.752702,0.748139,0.747368
car_parts,24,,0.749708,0.746927
cif_2016,15,2.261364,1.622303,2.352136
cif_2016,24,,2.290737,2.785457
covid_deaths,9,7.910913,7.362239,9.068501


In [138]:
pd.set_option("display.max_rows", 115)

In [53]:
global_df[["_runtime_mean", "_runtime_std"]].to_csv(f"results/runtime.csv")

Unnamed: 0_level_0,_runtime_mean
model_id,Unnamed: 1_level_1
nhits-global-australian_electricity_demand-420-336,105.919006
nhits-global-car_parts-15-12,53.062888
nhits-global-cif_2016-15-12,48.878760
nhits-global-covid_deaths-9-30,49.335870
nhits-global-dominick-10-8,252.621908
...,...
dlinear-multivariate-sunspot-9-30,10.057173
dlinear-multivariate-traffic_hourly-30-168,455.205816
dlinear-multivariate-traffic_weekly-65-8,5.023633
dlinear-multivariate-us_births-9-30,9.112481


In [52]:
global_df.columns.tolist()

['abs_error_mean',
 'abs_error_std',
 'abs_target_mean_mean',
 'abs_target_mean_std',
 'abs_target_sum_mean',
 'abs_target_sum_std',
 'mean_mae_mean',
 'mean_mae_std',
 'mean_mape_mean',
 'mean_mape_std',
 'mean_mase_mean',
 'mean_mase_std',
 'mean_msmape_mean',
 'mean_msmape_std',
 'mean_rmse_mean',
 'mean_rmse_std',
 'mean_smape_mean',
 'mean_smape_std',
 'median_mae_mean',
 'median_mae_std',
 'median_mase_mean',
 'median_mase_std',
 'median_msmape_mean',
 'median_msmape_std',
 'median_rmse_mean',
 'median_rmse_std',
 'median_smape_mean',
 'median_smape_std',
 'mse_mean',
 'mse_std',
 '_runtime_mean',
 '_runtime_std',
 'name',
 'model_name',
 'dataset',
 'context_length',
 'prediction_length']