In [1]:
%load_ext autoreload
%autoreload 2

In [111]:
import argparse
import json
from pathlib import Path

import pandas as pd
import numpy as np

from mepf.experiments.postprocessing import (
    scatter_result,
    get_validity,
    get_valid_setup,
    load_scatter_results,
    load_valid_exp
)

# save_dir = Path.home() / 'Code' / 'entsearch' / 'results'
save_dir = Path.home() / 'mepf'
res_dir = Path.home() / 'mepf' / 'data'
# tolerance on delta
tol = .9

grid = {
    "method": ["ES", "AS", "TS", "HTS", "E", "SE", "HSE"],
    "problem": ["dirichlet", "one", "two", "geometric"],
    "num_classes": [3, 10, 30, 100, 300, 1000],
    "delta": [2 ** -i for i in range(1, 10)],
    "constant": [0.5, 1, 3, 10, 24],
}

# scatter_result(save_dir, res_dir, grid)
valid_df = get_validity(res_dir, grid, tol=tol)
valid_setup = get_valid_setup(valid_df)
if valid_setup is None:
    raise ValueError("No valid setup found")
res = load_valid_exp(res_dir, valid_setup)
res.loc[res['nb_queries'] == -1, 'nb_queries'] = np.inf
res.loc[res['success'] == False, 'nb_queries'] = np.inf

In [112]:
list(valid_setup.groupby(['problem', 'm', 'delta']).mean().index.values)

[('dirichlet', 10, 0.25),
 ('dirichlet', 10, 0.5),
 ('geometric', 10, 0.25),
 ('geometric', 10, 0.5),
 ('one', 10, 0.5),
 ('two', 10, 0.25),
 ('two', 10, 0.5)]

In [105]:
problem = 'geometric'
m = 10
delta = 2 ** -7
num_exp = int(np.ceil(10 / delta)) * 6
num_best = int(num_exp * (1 - delta * (1 + tol)))

cur = res[(res['delta'] == delta) & (res['m'] == m) & (res['problem'] == problem)]

In [106]:
for method in grid['method']:
    tmp = cur[cur['method'] == method]
    ordered = tmp.sort_values('nb_queries')[:num_best]
    print(method, f"{(tmp['nb_queries'] == np.inf).mean():.5f}", rf"{ordered['nb_queries'].mean():.1f} $\pm$ {ordered['nb_queries'].std():.1f}")

ES 0.00156 377.0 $\pm$ 27.4
AS 0.00156 238.0 $\pm$ 16.3
TS 0.00156 165.4 $\pm$ 34.1
HTS 0.00156 169.9 $\pm$ 29.0
E 0.00625 123.0 $\pm$ 74.7
SE 0.00859 214.0 $\pm$ 117.8
