In [1]:
import pandas as pd
from pymer4 import Lmer
import scipy.stats as stats

from significance_analysis import conduct_analysis

# Load example dataset
data = pd.read_csv("./example_dataset.csv")

def GLRT(mod1, mod2):
    chi_square = 2 * abs(mod1.logLike - mod2.logLike)
    delta_params = abs(len(mod1.coefs) - len(mod2.coefs))
    return {
        "chi_square": chi_square,
        "df": delta_params,
        "p": 1 - stats.chi2.cdf(chi_square, df=delta_params),
}

In [2]:
# First Analysis: Analyse performance of acquisition functions over all benchmarks and trainingrounds
data2=data.loc[(data["acquisition"]!="ExpectedImprovement")| (data["benchmark"]!="Branin") ]
#conduct_analysis(data2, "mean", "acquisition", "benchmark",show_plots=False,summarize=False)

In [3]:
data3=data
dimension={
    "Branin":2,
    "Hartmann6":6,
    "Jahs_Bench":10,
    "NN_HPO_Bench":10
}
categorical={
    "Branin":"Numerical",
    "Hartmann6":"Numerical",
    "Jahs_Bench":"Categorical",
    "NN_HPO_Bench":"Numerical"
}
data3["benchmark_dim"]=data3["benchmark"].apply(lambda x:dimension[x])
data3["benchmark_cat"]=data3["benchmark"].apply(lambda x:categorical[x])


metric="mean"
input_id="benchmark"
system_id="acquisition"
bin_id="budget"
"""
# "Common"-Model assumes significant difference, which is why the system-identifier is included
complex_model = Lmer(
                formula=f"{metric}~{system_id}+{meta_cat}+(1|{input_id})", data=data3
            )
simple_model = Lmer(
                formula=f"{metric}~{system_id}+(1|{input_id})", data=data3
            )
            

#complex_model = Lmer(
#                formula=f"{metric}~{input_id}+{meta_cat}+(1|{system_id})", data=data3
#            )

meta_dim="benchmark_dim"
meta_cat="benchmark_cat"
input_id="acquisition"
system_id="benchmark"
complex_model = Lmer(
                formula=f"{metric}~{meta_dim}+{meta_cat}+(1|{input_id})", data=data3
            )
simple_model = Lmer(
                formula=f"{metric}~{meta_dim}+(1|{input_id})", data=data3
            )

# factors specifies names of system_identifier, i.e. Baseline, or Algorithm1
complex_model.fit(
    factors={system_id: list(data[system_id].unique())},
    REML=False,
    summarize=False,
)
simple_model.fit(
    factors={system_id: list(data[system_id].unique())},
    REML=False,
    summarize=False,
)
print(GLRT(simple_model, complex_model))
print(complex_model.summary())
print(complex_model.ranef)
print(simple_model.summary())
print(simple_model.ranef)
"""

'\n# "Common"-Model assumes significant difference, which is why the system-identifier is included\ncomplex_model = Lmer(\n                formula=f"{metric}~{system_id}+{meta_cat}+(1|{input_id})", data=data3\n            )\nsimple_model = Lmer(\n                formula=f"{metric}~{system_id}+(1|{input_id})", data=data3\n            )\n            \n\n#complex_model = Lmer(\n#                formula=f"{metric}~{input_id}+{meta_cat}+(1|{system_id})", data=data3\n#            )\n\nmeta_dim="benchmark_dim"\nmeta_cat="benchmark_cat"\ninput_id="acquisition"\nsystem_id="benchmark"\ncomplex_model = Lmer(\n                formula=f"{metric}~{meta_dim}+{meta_cat}+(1|{input_id})", data=data3\n            )\nsimple_model = Lmer(\n                formula=f"{metric}~{meta_dim}+(1|{input_id})", data=data3\n            )\n\n# factors specifies names of system_identifier, i.e. Baseline, or Algorithm1\ncomplex_model.fit(\n    factors={system_id: list(data[system_id].unique())},\n    REML=False,\n    su

In [8]:
acqu_dict1={'ExpectedImprovement': 'EI', 'ProbabilityOfImprovement': 'PI', 'UpperConfidenceBound': 'UCB', 'qExpectedImprovement': 'EI', 'qKnowledgeGradient': 'KG', 'qProbabilityOfImprovement': 'PI', 'qSimpleRegret': 'SR', 'qUpperConfidenceBound': 'UCB', 'randomSearch': 'RS'}
acqu_dict2={'ExpectedImprovement': 'AN', 'ProbabilityOfImprovement': 'AN', 'UpperConfidenceBound': 'AN', 'qExpectedImprovement': 'MC', 'qKnowledgeGradient': 'MC', 'qProbabilityOfImprovement': 'MC', 'qSimpleRegret': 'MC', 'qUpperConfidenceBound': 'MC', 'randomSearch': 'AN'}
data3["acquisition_fam"]=data3["acquisition"].apply(lambda x:acqu_dict1[x])
data3["acquisition_cat"]=data3["acquisition"].apply(lambda x:acqu_dict2[x])

def dict_keys_to_list(dict):
  new_dict = {}
  for key, value in dict.items():
    if value not in new_dict:
      new_dict[value] = [key]
    else:
      if value in new_dict:
        new_dict[value].append(key)
  return list(new_dict.values())

#print(conduct_analysis(data3,metric,"acquisition_fam",input_id,show_plots=False,show_contrasts=False, subset=[input_id,dict_keys_to_list(categorical)]))
print(conduct_analysis(data3,metric,"acquisition_cat",input_id,show_plots=False,show_contrasts=False, subset=(input_id,categorical)))

Analysis for ['Branin', 'Hartmann6', 'NN_HPO_Bench']
P-value: 0.030374259978896423

As the p-value 0.030374259978896423 is smaller than 0.05, we can reject the Null-Hypothesis that the model that does not consider the acquisition_cat describes the data as well as the one that does. Therefore there is significant difference within acquisition_cat.

  acquisition_cat  Estimate  2.5_ci  97.5_ci     SE     DF
1              AN     0.605  -3.898    5.108  1.415  3.002
2              MC     0.519  -3.984    5.022  1.415  3.001
The best performing acquisition_cat is MC, all other perform significantly worse.

Analysis for ['Jahs_Bench']
boundary (singular) fit: see help('isSingular') 

boundary (singular) fit: see help('isSingular') 

P-value: 0.7130985659018074

As the p-value 0.7130985659018074 is not smaller than 0.05, we cannot reject the Null-Hypothesis that the model that does not consider the acquisition_cat describes the data as well as the one that does. Therefore there is no signifi