In [1]:
from pathlib import Path
import sys

# Add project root to Python path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

from utils.evaluation import Evaluator
from pathlib import Path
import yaml

def load_config(config_path: str):
    """Load configuration from YAML file."""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)

    # Get the directory containing the base config
    config_dir = Path(config_path).parent

    # Load and merge model-specific configs
    model_configs = {}
    model_config_dir = config_dir / 'model_configs'

    for config_file in model_config_dir.glob('*.yaml'):
        with open(config_file, 'r') as f:
            model_configs[config_file.stem] = yaml.safe_load(f)

    # Add model configs to main config
    config['model_configs'] = model_configs

    return config

# Load config using path relative to project root
config_path = "config/base_config.yaml"
config = load_config(str(config_path))
evaluator = Evaluator(config)

# First merge all temporary results into results_log.json
evaluator.merge_results()

# Then convert the merged results to CSV
json_path ="results/results_log.json"
output_path = "results/summary.csv"
df = evaluator.generate_results_csv(json_path, output_path)

print(f"Results merged and saved to: {output_path}")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/yongboyu/Documents/PhD/PMF_Benchmark/results/results_log.json'

## Adjust the result format and layout

In [41]:
import pandas as pd

results_df = pd.read_csv("../results/merged_metrics.csv")

In [42]:
# dropout the duplicated rows in terms of dataset, horizon, and model, but only keep the latest one based on the timestamp
results_df = results_df.sort_values(by='timestamp').drop_duplicates(subset=['dataset', 'horizon', 'model'], keep='last')

In [43]:
results_df

Unnamed: 0,dataset,horizon,model,all_points_mae,all_points_rmse,last_point_mae,last_point_rmse,timestamp
51,BPI2017,7,univariate_regression_uni_random_forest,8.118182,10.366304,8.337517,14.868017,2025/3/10 18:35
52,BPI2017,7,univariate_regression_uni_xgboost,8.210440,10.760308,8.499555,15.615578,2025/3/10 18:55
202,BPI2019_1,7,univariate_regression_uni_random_forest,14.990082,21.834107,15.466607,81.038211,2025/3/10 20:36
118,Hospital_Billing,7,univariate_regression_uni_random_forest,2.201632,2.499325,2.462450,6.719690,2025/3/10 21:29
321,sepsis,7,univariate_regression_uni_random_forest,0.217632,0.245193,0.224137,0.492055,2025/3/10 22:04
...,...,...,...,...,...,...,...,...
266,RTFMP,7,statistical_prophet,3.081788,5.140460,3.222462,11.397139,2025/5/4 22:23
204,BPI2019_1,7,statistical_exp_smoothing,18.882726,27.422862,18.437111,87.768996,2025/5/5 15:06
162,BPI2019_1,28,statistical_exp_smoothing,18.870870,30.528025,17.768139,88.054391,2025/5/5 15:06
166,BPI2019_1,28,statistical_prophet,105.121012,185.225373,149.482981,1145.628324,2025/5/5 15:07


In [31]:
# divide the model column into two columns: model_type and model_name
model_type= ['baseline', 'statistical', 'regression', 'deep_learning', 'foundation', 'univariate_regression', 'univariate_dl', 'covariate_regression', 'covariate_dl']

# put the correponding model type in a new columnand the rest of the model name in another column
for model in model_type:
    results_df.loc[results_df['model'].str.contains(model), 'model_type'] = model
    results_df.loc[results_df['model'].str.contains(model), 'model_name'] = results_df['model'].str.replace(f"{model}_", "", regex=False)

In [32]:
results_df

Unnamed: 0,dataset,horizon,model,all_points_mae,all_points_rmse,last_point_mae,last_point_rmse,timestamp,model_type,model_name
51,BPI2017,7,univariate_regression_uni_random_forest,8.118182,10.366304,8.337517,14.868017,2025/3/10 18:35,univariate_regression,uni_random_forest
52,BPI2017,7,univariate_regression_uni_xgboost,8.210440,10.760308,8.499555,15.615578,2025/3/10 18:55,univariate_regression,uni_xgboost
202,BPI2019_1,7,univariate_regression_uni_random_forest,14.990082,21.834107,15.466607,81.038211,2025/3/10 20:36,univariate_regression,uni_random_forest
118,Hospital_Billing,7,univariate_regression_uni_random_forest,2.201632,2.499325,2.462450,6.719690,2025/3/10 21:29,univariate_regression,uni_random_forest
321,sepsis,7,univariate_regression_uni_random_forest,0.217632,0.245193,0.224137,0.492055,2025/3/10 22:04,univariate_regression,uni_random_forest
...,...,...,...,...,...,...,...,...,...,...
266,RTFMP,7,statistical_prophet,3.081788,5.140460,3.222462,11.397139,2025/5/4 22:23,statistical,prophet
204,BPI2019_1,7,statistical_exp_smoothing,18.882726,27.422862,18.437111,87.768996,2025/5/5 15:06,statistical,exp_smoothing
162,BPI2019_1,28,statistical_exp_smoothing,18.870870,30.528025,17.768139,88.054391,2025/5/5 15:06,statistical,exp_smoothing
166,BPI2019_1,28,statistical_prophet,105.121012,185.225373,149.482981,1145.628324,2025/5/5 15:07,statistical,prophet


In [40]:
# put the model_type and model_name columns after dataset and horizon
results_df = results_df[['dataset', 'horizon', 'model_type', 'model_name'] + [col for col in results_df.columns if col not in ['dataset', 'horizon', 'model_type', 'model_name']]]

# drop the model and timestamp column
results_df = results_df.drop(columns=['model', 'timestamp'])

KeyError: "['model'] not found in axis"

In [9]:
# create subset of the dataframe grouped by dataset and horizon
def create_subset(df, dataset, horizon):
    subset = df[(df['dataset'] == dataset) & (df['horizon'] == horizon)]
    return subset

In [10]:
# extract the unique datasets and horizons
datasets = results_df['dataset'].unique()
horizons = results_df['horizon'].unique()

In [33]:
# create a dictionary to store the subsets
subsets = {}
for dataset in datasets:
    for horizon in horizons:
        subset = create_subset(results_df, dataset, horizon)
        subsets[(dataset, horizon)] = subset

In [34]:
subsets.keys()

dict_keys([('BPI2017', 28), ('BPI2017', 7), ('Hospital_Billing', 28), ('Hospital_Billing', 7), ('BPI2019_1', 28), ('BPI2019_1', 7), ('RTFMP', 28), ('RTFMP', 7), ('sepsis', 28), ('sepsis', 7)])

In [35]:
# check the number of rows in each subset
for key, subset in subsets.items():
    print(f"Dataset: {key[0]}, Horizon: {key[1]}, Rows: {len(subset)}")

Dataset: BPI2017, Horizon: 28, Rows: 31
Dataset: BPI2017, Horizon: 7, Rows: 32
Dataset: Hospital_Billing, Horizon: 28, Rows: 31
Dataset: Hospital_Billing, Horizon: 7, Rows: 31
Dataset: BPI2019_1, Horizon: 28, Rows: 28
Dataset: BPI2019_1, Horizon: 7, Rows: 28
Dataset: RTFMP, Horizon: 28, Rows: 26
Dataset: RTFMP, Horizon: 7, Rows: 27
Dataset: sepsis, Horizon: 28, Rows: 31
Dataset: sepsis, Horizon: 7, Rows: 31


In [39]:
results_df

Unnamed: 0,dataset,horizon,model_type,model_name,all_points_mae,all_points_rmse,last_point_mae,last_point_rmse,timestamp
51,BPI2017,7,univariate_regression,uni_random_forest,8.118182,10.366304,8.337517,14.868017,2025/3/10 18:35
52,BPI2017,7,univariate_regression,uni_xgboost,8.210440,10.760308,8.499555,15.615578,2025/3/10 18:55
202,BPI2019_1,7,univariate_regression,uni_random_forest,14.990082,21.834107,15.466607,81.038211,2025/3/10 20:36
118,Hospital_Billing,7,univariate_regression,uni_random_forest,2.201632,2.499325,2.462450,6.719690,2025/3/10 21:29
321,sepsis,7,univariate_regression,uni_random_forest,0.217632,0.245193,0.224137,0.492055,2025/3/10 22:04
...,...,...,...,...,...,...,...,...,...
266,RTFMP,7,statistical,prophet,3.081788,5.140460,3.222462,11.397139,2025/5/4 22:23
204,BPI2019_1,7,statistical,exp_smoothing,18.882726,27.422862,18.437111,87.768996,2025/5/5 15:06
162,BPI2019_1,28,statistical,exp_smoothing,18.870870,30.528025,17.768139,88.054391,2025/5/5 15:06
166,BPI2019_1,28,statistical,prophet,105.121012,185.225373,149.482981,1145.628324,2025/5/5 15:07


In [23]:
# split the model column into two columns: model_type and model_name
model_type= ['baseline', 'statistical', 'regression', 'deep_learning', 'foundation', 'univariate_regression', 'univariate_dl', 'covariate_regression', 'covariate_dl']

# put the correponding model type in a new columnand the rest of the model name in another column
results_df['model_type'] = results_df['model'].apply(lambda x: next((m for m in model_type if m in x), None))
results_df['model_name'] = results_df['model'].apply(lambda x: x.split('_')[1] if '_' in x else x)

# for model in model_type:
#     results_df.loc[results_df['model'].str.contains(model), 'model_type'] = model

In [24]:
results_df

Unnamed: 0,dataset,horizon,model,all_points_mae,all_points_rmse,last_point_mae,last_point_rmse,timestamp,model_type,model_name
51,BPI2017,7,univariate_regression_uni_random_forest,8.118182,10.366304,8.337517,14.868017,2025/3/10 18:35,regression,regression
52,BPI2017,7,univariate_regression_uni_xgboost,8.210440,10.760308,8.499555,15.615578,2025/3/10 18:55,regression,regression
202,BPI2019_1,7,univariate_regression_uni_random_forest,14.990082,21.834107,15.466607,81.038211,2025/3/10 20:36,regression,regression
118,Hospital_Billing,7,univariate_regression_uni_random_forest,2.201632,2.499325,2.462450,6.719690,2025/3/10 21:29,regression,regression
321,sepsis,7,univariate_regression_uni_random_forest,0.217632,0.245193,0.224137,0.492055,2025/3/10 22:04,regression,regression
...,...,...,...,...,...,...,...,...,...,...
240,RTFMP,28,statistical_prophet,3.894544,8.505600,4.370770,18.069098,2025/5/4 22:13,statistical,prophet
266,RTFMP,7,statistical_prophet,3.081788,5.140460,3.222462,11.397139,2025/5/4 22:23,statistical,prophet
204,BPI2019_1,7,statistical_exp_smoothing,18.882726,27.422862,18.437111,87.768996,2025/5/5 15:06,statistical,exp
166,BPI2019_1,28,statistical_prophet,105.121012,185.225373,149.482981,1145.628324,2025/5/5 15:07,statistical,prophet
