In [5]:
import pandas as pd
from pymer4 import Lmer
import scipy.stats as stats

from significance_analysis import conduct_analysis

# Load example dataset
data = pd.read_csv("./example_dataset.csv")

def GLRT(mod1, mod2):
    chi_square = 2 * abs(mod1.logLike - mod2.logLike)
    delta_params = abs(len(mod1.coefs) - len(mod2.coefs))
    return {
        "chi_square": chi_square,
        "df": delta_params,
        "p": 1 - stats.chi2.cdf(chi_square, df=delta_params),
}

In [6]:
# First Analysis: Analyse performance of acquisition functions over all benchmarks and trainingrounds
data2=data.loc[(data["acquisition"]!="ExpectedImprovement")| (data["benchmark"]!="Branin") ]
#conduct_analysis(data2, "mean", "acquisition", "benchmark",show_plots=False,summarize=False)

In [7]:
data3=data
dimension={
    "Branin":2,
    "Hartmann6":6,
    "Jahs_Bench":10,
    "NN_HPO_Bench":10
}
categorical={
    "Branin":"Numerical",
    "Hartmann6":"Numerical",
    "Jahs_Bench":"Categorical",
    "NN_HPO_Bench":"Numerical"
}
data3["benchmark_dim"]=data3["benchmark"].apply(lambda x:dimension[x])
data3["benchmark_cat"]=data3["benchmark"].apply(lambda x:categorical[x])


metric="mean"
input_id="benchmark"
system_id="acquisition"
bin_id="budget"
"""
# "Common"-Model assumes significant difference, which is why the system-identifier is included
complex_model = Lmer(
                formula=f"{metric}~{system_id}+{meta_cat}+(1|{input_id})", data=data3
            )
simple_model = Lmer(
                formula=f"{metric}~{system_id}+(1|{input_id})", data=data3
            )
            

#complex_model = Lmer(
#                formula=f"{metric}~{input_id}+{meta_cat}+(1|{system_id})", data=data3
#            )

meta_dim="benchmark_dim"
meta_cat="benchmark_cat"
input_id="acquisition"
system_id="benchmark"
complex_model = Lmer(
                formula=f"{metric}~{meta_dim}+{meta_cat}+(1|{input_id})", data=data3
            )
simple_model = Lmer(
                formula=f"{metric}~{meta_dim}+(1|{input_id})", data=data3
            )

# factors specifies names of system_identifier, i.e. Baseline, or Algorithm1
complex_model.fit(
    factors={system_id: list(data[system_id].unique())},
    REML=False,
    summarize=False,
)
simple_model.fit(
    factors={system_id: list(data[system_id].unique())},
    REML=False,
    summarize=False,
)
print(GLRT(simple_model, complex_model))
print(complex_model.summary())
print(complex_model.ranef)
print(simple_model.summary())
print(simple_model.ranef)
"""


'\n# "Common"-Model assumes significant difference, which is why the system-identifier is included\ncomplex_model = Lmer(\n                formula=f"{metric}~{system_id}+{meta_cat}+(1|{input_id})", data=data3\n            )\nsimple_model = Lmer(\n                formula=f"{metric}~{system_id}+(1|{input_id})", data=data3\n            )\n            \n\n#complex_model = Lmer(\n#                formula=f"{metric}~{input_id}+{meta_cat}+(1|{system_id})", data=data3\n#            )\n\nmeta_dim="benchmark_dim"\nmeta_cat="benchmark_cat"\ninput_id="acquisition"\nsystem_id="benchmark"\ncomplex_model = Lmer(\n                formula=f"{metric}~{meta_dim}+{meta_cat}+(1|{input_id})", data=data3\n            )\nsimple_model = Lmer(\n                formula=f"{metric}~{meta_dim}+(1|{input_id})", data=data3\n            )\n\n# factors specifies names of system_identifier, i.e. Baseline, or Algorithm1\ncomplex_model.fit(\n    factors={system_id: list(data[system_id].unique())},\n    REML=False,\n    su

In [8]:
acqu_dict_fam={'ExpectedImprovement': 'EI', 'ProbabilityOfImprovement': 'PI', 'UpperConfidenceBound': 'UCB', 'qExpectedImprovement': 'EI', 'qKnowledgeGradient': 'KG', 'qProbabilityOfImprovement': 'PI', 'qSimpleRegret': 'SR', 'qUpperConfidenceBound': 'UCB', 'randomSearch': 'RS'}
acqu_dict_cat={'ExpectedImprovement': 'AN', 'ProbabilityOfImprovement': 'AN', 'UpperConfidenceBound': 'AN', 'qExpectedImprovement': 'MC', 'qKnowledgeGradient': 'MC', 'qProbabilityOfImprovement': 'MC', 'qSimpleRegret': 'MC', 'qUpperConfidenceBound': 'MC', 'randomSearch': 'AN'}
data3["acquisition_fam"]=data3["acquisition"].apply(lambda x:acqu_dict_fam[x])
data3["acquisition_cat"]=data3["acquisition"].apply(lambda x:acqu_dict_cat[x])



In [9]:
categorical_means=conduct_analysis(data3,metric,system_id,input_id,show_plots=False,show_contrasts=False,summarize=False, subset=(input_id,categorical))

Analysis for ['Branin', 'Hartmann6', 'NN_HPO_Bench']
P-value: 1.1102230246251565e-16

As the p-value 1.1102230246251565e-16 is smaller than 0.05, we can reject the Null-Hypothesis that the model that does not consider the acquisition describes the data as well as the one that does. Therefore there is significant difference within acquisition.

P-values adjusted by tukey method for family of 36 estimates
The best performing acquisition is qExpectedImprovement, but ['ExpectedImprovement', 'UpperConfidenceBound', 'qKnowledgeGradient', 'qSimpleRegret', 'qUpperConfidenceBound'] are only insignificantly worse.

Analysis for ['Jahs_Bench']
boundary (singular) fit: see help('isSingular') 

boundary (singular) fit: see help('isSingular') 

P-value: 0.0

As the p-value 0.0 is smaller than 0.05, we can reject the Null-Hypothesis that the model that does not consider the acquisition describes the data as well as the one that does. Therefore there is significant difference within acquisition.

P-va

In [10]:
dimensional_means=conduct_analysis(data3,metric,system_id,input_id,show_plots=False,show_contrasts=False,summarize=False, subset=(input_id,dimension))

Analysis for ['Branin']
boundary (singular) fit: see help('isSingular') 

boundary (singular) fit: see help('isSingular') 

P-value: 2.2334027233172904e-06

As the p-value 2.2334027233172904e-06 is smaller than 0.05, we can reject the Null-Hypothesis that the model that does not consider the acquisition describes the data as well as the one that does. Therefore there is significant difference within acquisition.

P-values adjusted by tukey method for family of 36 estimates
The best performing acquisition is ProbabilityOfImprovement, but ['ExpectedImprovement', 'UpperConfidenceBound', 'qExpectedImprovement', 'qKnowledgeGradient', 'qSimpleRegret', 'qUpperConfidenceBound'] are only insignificantly worse.

Analysis for ['Hartmann6']
boundary (singular) fit: see help('isSingular') 

boundary (singular) fit: see help('isSingular') 

P-value: 0.0

As the p-value 0.0 is smaller than 0.05, we can reject the Null-Hypothesis that the model that does not consider the acquisition describes the data

In [11]:
assert isinstance(categorical_means,dict) and isinstance(dimensional_means,dict)
training_set=pd.DataFrame(columns=["acquisition","mean","SE"])
for category in range(len(categorical_means.keys())):
    category_dataset=pd.DataFrame()
    category_dataset[["acquisition","mean","SE"]]=categorical_means[list(categorical_means.keys())[category]][1].loc[:,["acquisition","Estimate","SE"]]
    category_dataset["benchmark_group"]=list(categorical_means.keys())[category]
    training_set=pd.concat([training_set,category_dataset])
for category in range(len(dimensional_means.keys())):
    category_dataset=pd.DataFrame()
    category_dataset[["acquisition","mean","SE"]]=dimensional_means[list(dimensional_means.keys())[category]][1].loc[:,["acquisition","Estimate","SE"]]
    category_dataset["benchmark_group"]=list(dimensional_means.keys())[category]
    training_set=pd.concat([training_set,category_dataset])
training_set["acquisition_fam"]=training_set["acquisition"].apply(lambda x:acqu_dict_fam[x])
training_set["acquisition_cat"]=training_set["acquisition"].apply(lambda x:acqu_dict_cat[x])
print(training_set)


                 acquisition    mean     SE                  benchmark_group acquisition_fam acquisition_cat
1        ExpectedImprovement   0.384  1.416  Branin__Hartmann6__NN_HPO_Bench              EI              AN
2   ProbabilityOfImprovement   0.608  1.416  Branin__Hartmann6__NN_HPO_Bench              PI              AN
3       UpperConfidenceBound   0.468  1.416  Branin__Hartmann6__NN_HPO_Bench             UCB              AN
4       qExpectedImprovement   0.335  1.416  Branin__Hartmann6__NN_HPO_Bench              EI              MC
5         qKnowledgeGradient   0.486  1.416  Branin__Hartmann6__NN_HPO_Bench              KG              MC
6  qProbabilityOfImprovement   0.774  1.416  Branin__Hartmann6__NN_HPO_Bench              PI              MC
7              qSimpleRegret   0.589  1.416  Branin__Hartmann6__NN_HPO_Bench              SR              MC
8      qUpperConfidenceBound   0.410  1.416  Branin__Hartmann6__NN_HPO_Bench             UCB              MC
9               ran

In [42]:
from keras import Sequential
from keras.layers import Dense
from sklearn.preprocessing import OneHotEncoder

def create_model():
  model = Sequential()
  model.add(Dense(16, activation='relu', input_shape=(None,13)))
  model.add(Dense(32, activation='sigmoid'))
  model.add(Dense(2, activation='linear'))

  model.compile(loss='mse', optimizer='adam',metrics=["accuracy"])
  return model

model = create_model()

ohe = OneHotEncoder()
x_data = ohe.fit_transform(training_set[['acquisition_cat', 'acquisition_fam',"benchmark_group"]]).toarray()

y_data = training_set[['mean', 'SE']]
# Train the model
model.fit(x_data, y_data, epochs=1000,verbose=0)


# Evaluate the model
loss, accuracy = model.evaluate(x_data, y_data)
print('Loss:', loss)
print('Accuracy:', accuracy)
print(model.predict(x_data[0:10]))
y_data[0:10]

Loss: 0.11963988095521927
Accuracy: 1.0
[[ 0.2793731   1.382649  ]
 [ 0.67498803  1.3733635 ]
 [ 0.46001232  1.3780336 ]
 [ 0.39606345  1.4025085 ]
 [ 0.4747377   1.451755  ]
 [ 0.7299327   1.4330691 ]
 [ 0.46569258  1.4321262 ]
 [ 0.3796398   1.4035348 ]
 [ 1.0583689   1.4520493 ]
 [21.7602      0.13514015]]


Unnamed: 0,mean,SE
1,0.384,1.416
2,0.608,1.416
3,0.468,1.416
4,0.335,1.416
5,0.486,1.416
6,0.774,1.416
7,0.589,1.416
8,0.41,1.416
9,0.96,1.416
1,21.886,0.106
