In [None]:
import pandas as pd

In [47]:
df = pd.read_csv("combined_results.csv")
df = df[df["split_method"] != "random_split"]

In [None]:
metric = "RMSE"
dfm   = df[df["metric"] == metric]

avg_per_task = dfm.groupby("task_id")["value"].mean()

avg_per_task = avg_per_task.reset_index(name=f"avg_{metric}")
avg_per_task.to_csv(f"avg_{metric}_per_task.csv", index=False)


Average Difference and Average Relative Difference RMSE

In [None]:
metric = "RMSE"
dfm   = df[df["metric"] == metric]

pivot_rmse = dfm.pivot_table(
    index="task_id",
    columns="model",
    values="value",
    aggfunc="mean"
)

# 4) (Optional) write out to CSV
pivot_rmse.to_csv(f"avg_{metric}_per_task_per_model.csv", float_format="%.3f")


In [42]:
def avg_relative_diff(pivot: pd.DataFrame, error_metric: bool = True) -> pd.Series:

    if error_metric:
        best_per_task = pivot.min(axis=1)   # best is minimum
        # (v - best)/best * 100
        rel_diff = (pivot.sub(best_per_task, axis=0)
                         .div(best_per_task, axis=0)
                         * 100)
    else:
        best_per_task = pivot.max(axis=1)   # best is maximum
        # (best - v)/best * 100
        rel_diff = (best_per_task.sub(pivot, axis=0)
                              .div(best_per_task, axis=0)
                              * 100)

    # average across tasks (skipping any NaNs)
    return rel_diff.mean(axis=0)


avg_diff_rmse = avg_relative_diff(pivot_rmse, error_metric=True)

print("Average relative difference (RMSE):")
print(avg_diff_rmse.round(2))


Average relative difference (RMSE):
model
ConstantPredictor          280.01
Engression                  78.69
FTTransformer               43.66
LGBMRegressor               35.15
LinearRegressor          92264.08
MLP                        133.95
RandomForestRegressor       39.83
ResNet                     109.58
dtype: float64


Average Difference and Average Relative Difference CRPS

In [None]:
metric1 = "CRPS"
dfm1   = df[df["metric"] == metric1]

pivot_crps = dfm1.pivot_table(
    index="task_id",
    columns="model",
    values="value",
    aggfunc="mean"
)

# 4) (Optional) write out to CSV
pivot_crps.to_csv(f"avg_{metric1}_per_task_per_model.csv", float_format="%.3f")


In [48]:
def avg_relative_diff(pivot: pd.DataFrame, error_metric: bool = True) -> pd.Series:

    if error_metric:
        best_per_task = pivot.min(axis=1)   # best is minimum
        # (v - best)/best * 100
        rel_diff = (pivot.sub(best_per_task, axis=0)
                         .div(best_per_task, axis=0)
                         * 100)
    else:
        best_per_task = pivot.max(axis=1)   # best is maximum
        # (best - v)/best * 100
        rel_diff = (best_per_task.sub(pivot, axis=0)
                              .div(best_per_task, axis=0)
                              * 100)

    # average across tasks (skipping any NaNs)
    return rel_diff.mean(axis=0)


avg_diff_crps = avg_relative_diff(pivot_crps, error_metric=True)

print("Average relative difference (CRPS):")
print(avg_diff_crps.round(2))


Average relative difference (CRPS):
model
ConstantPredictor          371.53
DGBT                        42.65
DRF                         62.20
Engression               15127.91
FTTransformer               46.80
LGBMRegressor               42.55
LinearRegressor          71983.54
MLP                         54.37
RandomForestRegressor       50.31
ResNet                     123.53
dtype: float64


Average Difference and Average Relative Difference LOGLOSS

In [52]:
metric2 = "LOGLOSS"
dfm2   = df[df["metric"] == metric2]

pivot_ll = dfm2.pivot_table(
    index="task_id",
    columns="model",
    values="value",
    aggfunc="mean"
)

# 4) (Optional) write out to CSV
pivot_ll.to_csv(f"avg_{metric2}_per_task_per_model.csv", float_format="%.3f")


In [51]:
def avg_relative_diff(pivot: pd.DataFrame, error_metric: bool = True) -> pd.Series:

    if error_metric:
        best_per_task = pivot.min(axis=1)   # best is minimum
        # (v - best)/best * 100
        rel_diff = (pivot.sub(best_per_task, axis=0)
                         .div(best_per_task, axis=0)
                         * 100)
    else:
        best_per_task = pivot.max(axis=1)   # best is maximum
        # (best - v)/best * 100
        rel_diff = (best_per_task.sub(pivot, axis=0)
                              .div(best_per_task, axis=0)
                              * 100)

    # average across tasks (skipping any NaNs)
    return rel_diff.mean(axis=0)


avg_diff_ll = avg_relative_diff(pivot_ll, error_metric=True)

print("Average relative difference (LogLoss):")
print(avg_diff_ll.round(2))


Average relative difference (LogLoss):
model
ConstantPredictor          371.53
DGBT                        42.65
DRF                         62.20
Engression               15127.91
FTTransformer               46.80
LGBMRegressor               42.55
LinearRegressor          71983.54
MLP                         54.37
RandomForestRegressor       50.31
ResNet                     123.53
dtype: float64


Average Difference and Average Relative Difference Accuracy

In [53]:
metric3 = "Accuracy"
dfm3   = df[df["metric"] == metric3]

pivot_acc = dfm3.pivot_table(
    index="task_id",
    columns="model",
    values="value",
    aggfunc="mean"
)

pivot_acc.to_csv(f"avg_{metric3}_per_task_per_model.csv", float_format="%.3f")


In [54]:
def avg_relative_diff(pivot: pd.DataFrame, error_metric: bool = True) -> pd.Series:

    if error_metric:
        best_per_task = pivot.min(axis=1)   # best is minimum
        # (v - best)/best * 100
        rel_diff = (pivot.sub(best_per_task, axis=0)
                         .div(best_per_task, axis=0)
                         * 100)
    else:
        best_per_task = pivot.max(axis=1)   # best is maximum
        # (best - v)/best * 100
        rel_diff = (best_per_task.sub(pivot, axis=0)
                              .div(best_per_task, axis=0)
                              * 100)

    # average across tasks (skipping any NaNs)
    return rel_diff.mean(axis=0)


avg_diff_accuracy = avg_relative_diff(pivot_acc, error_metric=False)

print("\nAverage relative difference (Accuracy):")
print(avg_diff_accuracy.round(2))



Average relative difference (Accuracy):
ConstantPredictor        NaN
Engression               NaN
FTTransformer            NaN
LGBMClassifier           NaN
LogisticRegressor        NaN
MLP                      NaN
RandomForestClassifier   NaN
ResNet                   NaN
361055                   NaN
361060                   NaN
361061                   NaN
361062                   NaN
361063                   NaN
361065                   NaN
361066                   NaN
361068                   NaN
361069                   NaN
361070                   NaN
361110                   NaN
361111                   NaN
361113                   NaN
361273                   NaN
361274                   NaN
361275                   NaN
361276                   NaN
361277                   NaN
361278                   NaN
361282                   NaN
361283                   NaN
361285                   NaN
361286                   NaN
dtype: float64
