In [1]:
import torch
import numpy as np
from copy import deepcopy

def read_file(kernel_type, dim, benchmark_index, nruns):
    file_name = 'tsp_botorch_'+kernel_type+'_EI_dim_'+str(dim)+'benchmark_index_'+str(benchmark_index)+'_nrun_'+str(nruns)+'.pkl'
    data = torch.load(file_name, weights_only=False)
    l = data['outputs']
    l = [float(i) for i in l]
    return l


def read_file_filename(file_name):
    data = torch.load(file_name, weights_only=False)
    l = data['outputs']
    l = [float(i) for i in l]
    return l


def read_file_no_anchor(kernel_type, dim, benchmark_index, nruns):
    file_name = 'tsp_botorch_'+kernel_type+'_EI_dim_'+str(dim)+'benchmark_index_no_anchor_'+str(benchmark_index)+'_nrun_'+str(nruns)+'.pkl'
    data = torch.load(file_name, weights_only=False)
    l = data['outputs']
    l = [float(i) for i in l]
    return l

In [21]:
import os

def analyse_trial(dim=10, benchmark_index=0, cut=200, trial_cut=20):
    folders = os.listdir('./results')
    nruns = 20
    results_dict = {}

    for folder in folders:
        if '.' in folder:
            continue
        results_dict[folder] = []
        for nrun in range(nruns):
            results_dict[folder].append(read_file_filename(os.path.join('./results', folder, folder+f'_nrun_{nrun}.pkl')))

    all_results = []
    for key in results_dict.keys():
        results_dict[key] = np.array(results_dict[key])
        all_results.append(results_dict[key][:trial_cut, :cut])
    # print(all_results[0].shape)
    # return all_results
    global_minimum = np.min(all_results)
    print(global_minimum)
    best_so_far = [np.minimum.accumulate(res, axis=1) for res in all_results]
    regrets = [bfs - global_minimum for bfs in best_so_far]
    for i, key in enumerate(results_dict.keys()):
        # results_dict[key] = regrets[i]
        results_dict[key] = all_results[i]
    return results_dict


import numpy as np
import pandas as pd
from sklearn.metrics import auc

import numpy as np
import pandas as pd
from sklearn.metrics import auc

import numpy as np
import pandas as pd
from sklearn.metrics import auc

def evaluate_algorithms(r: dict, f_opt=None, threshold=None):
    """
    r: dict of {algorithm_name: np.ndarray of shape (n_repeats, n_iterations)}
    f_opt: known global minimum value (float)
    threshold: optional regret threshold to measure how many iterations are needed
    
    Returns:
        pd.DataFrame with aggregated metrics for each algorithm
    """
    results = []

    for algo, regrets in r.items():
        regrets = np.array(regrets)  # shape: (n_repeats, n_iterations)
        n_repeats, n_iterations = regrets.shape

        best_so_far = np.minimum.accumulate(regrets, axis=1)  # shape: (n_repeats, n_iterations)
        final_best = best_so_far[:, -1]
        auc_vals = np.array([
            auc(np.arange(1, n_iterations+1), best_so_far[i])
            for i in range(n_repeats)
        ])

        metrics = {
            "algorithm": algo,
            "final_best_mean": np.mean(final_best),
            "final_best_std": np.std(final_best),
            "auc_best_so_far_mean": np.mean(auc_vals),
            "auc_best_so_far_std": np.std(auc_vals),
        }

        if f_opt is not None:
            simple_regrets = regrets - f_opt  # ✅ 对于最小化，目标值应减去最优值
            cumulative_regrets = np.cumsum(simple_regrets, axis=1)
            mean_simple = np.mean(simple_regrets, axis=1)
            final_simple = simple_regrets[:, -1]
            final_cum = cumulative_regrets[:, -1]

            metrics.update({
                "mean_simple_regret_mean": np.mean(mean_simple),
                "mean_simple_regret_std": np.std(mean_simple),
                "final_simple_regret_mean": np.mean(final_simple),
                "final_simple_regret_std": np.std(final_simple),
                "cumulative_regret_mean": np.mean(final_cum),
                "cumulative_regret_std": np.std(final_cum),
            })
        else:
            metrics.update({
                "mean_simple_regret_mean": None,
                "mean_simple_regret_std": None,
                "final_simple_regret_mean": None,
                "final_simple_regret_std": None,
                "cumulative_regret_mean": None,
                "cumulative_regret_std": None,
            })

        if threshold is not None:
            evals_to_threshold = []
            for i in range(n_repeats):
                for j in range(n_iterations):
                    if best_so_far[i, j] <= threshold:
                        evals_to_threshold.append(j + 1)
                        break
                else:
                    evals_to_threshold.append(n_iterations)
            evals_to_threshold = np.array(evals_to_threshold)
            metrics.update({
                "evals_to_threshold_mean": np.mean(evals_to_threshold),
                "evals_to_threshold_std": np.std(evals_to_threshold),
            })
        else:
            metrics.update({
                "evals_to_threshold_mean": None,
                "evals_to_threshold_std": None,
            })

        results.append(metrics)

    return pd.DataFrame(results)


r = analyse_trial(cut=200, trial_cut=20)
evaluate_algorithms(r, 14172.0)

14172.0


Unnamed: 0,algorithm,final_best_mean,final_best_std,auc_best_so_far_mean,auc_best_so_far_std,mean_simple_regret_mean,mean_simple_regret_std,final_simple_regret_mean,final_simple_regret_std,cumulative_regret_mean,cumulative_regret_std,evals_to_threshold_mean,evals_to_threshold_std
0,qap_botorch_merge_EI_benchmark_index_k5_hash_p...,26751.8,5515.443079,6337779.55,834651.519343,40177.2895,4872.198106,33206.0,11214.765856,8035457.9,974439.6,,
1,qap_botorch_merge_EI_benchmark_index_k34_with_...,24243.0,5746.568802,5932120.75,839942.851319,37062.754,7447.692664,29689.4,15914.752478,7412550.8,1489539.0,,
2,qap_botorch_merge_EI_benchmark_index_k5_hash_b...,25713.4,4784.025798,6268920.35,808438.199904,36665.4575,5080.486505,32015.5,15921.822985,7333091.5,1016097.0,,
3,qap_botorch_merge_EI_benchmark_index_k5_permut...,31146.7,4233.899917,6927978.8,602226.732131,47270.717,718.452376,49760.4,10945.903994,9454143.4,143690.5,,
4,qap_botorch_merge_EI_benchmark_index_k5_with_p...,27259.8,5476.901493,6305699.45,836049.569777,42888.375,7668.752072,41725.6,14865.956217,8577675.0,1533750.0,,
5,qap_botorch_mallows_EI_benchmark_index_3,21908.4,4101.909048,5499942.05,744154.991184,29343.471,7675.019482,24068.5,14282.984378,5868694.2,1535004.0,,
6,qap_botorch_merge_EI_benchmark_index_k4_pairwi...,22098.6,4536.022138,5725304.95,743481.970657,33526.9195,8083.18111,29777.7,14759.668821,6705383.9,1616636.0,,
7,qap_botorch_merge_EI_benchmark_index_3,28794.6,5557.929438,6582964.85,848461.864423,43255.7725,7797.406915,45409.3,16193.501391,8651154.5,1559481.0,,
8,qap_botorch_merge_EI_benchmark_index_k4_with_p...,22994.3,5584.717406,5802634.2,880702.995549,33183.3915,8108.600598,25998.4,10537.630049,6636678.3,1621720.0,,
9,qap_botorch_merge_EI_benchmark_index_k4_permut...,29830.4,4307.413173,6796543.75,586074.306303,47453.916,710.010153,48029.2,10950.875223,9490783.2,142002.0,,


In [5]:
r = analyse_trial()

for key in r.keys():
    print(f'{key} mean regret: ', r[key].mean())

14172.0
qap_botorch_merge_EI_benchmark_index_k5_permutation_pattern_3 mean regret:  20691.926
qap_botorch_merge_EI_benchmark_index_k5_with_permutation_pattern_3 mean regret:  17570.812
qap_botorch_mallows_EI_benchmark_index_3 mean regret:  13528.6465
qap_botorch_merge_EI_benchmark_index_3 mean regret:  18960.976
qap_botorch_merge_EI_benchmark_index_k4_with_permutation_pattern_3 mean regret:  15044.822
qap_botorch_merge_EI_benchmark_index_k4_permutation_pattern_3 mean regret:  20031.46
