In [4]:
# Fit the mixed effects model
from patsy import dmatrix
from statsmodels.formula.api import mixedlm
from statsmodels.stats.multicomp import MultiComparison
import pandas as pd
import numpy as np
from pymer4.models import Lmer

pd.options.mode.chained_assignment = None
pd.set_option("display.max_rows", 5000)
pd.set_option("display.max_columns", 5000)
pd.set_option("display.width", 10000)

In [5]:
data = pd.read_csv("./dataset_copy_DELETEAFTER.csv")

metric="mean"
input_id="benchmark"
system_id="acquisition"
bin_id="budget"
bins=[0,15,30,49]


differentMeans_model = mixedlm(formula=f"{metric} ~ {system_id}", data=data, groups=input_id)
diffModelFit = differentMeans_model.fit( reml=False)
print(diffModelFit.summary())
print(diffModelFit.random_effects)

bins_set = set(bins)
bins_set.add(data[bin_id].min())
bins_set.add(data[bin_id].max())
bins = sorted(list(bins_set))

bin_labels = [f"{bins[i]}_{bins[i+1]}" for i in range(len(bins) - 1)]


data[f"{bin_id}_bins"] = pd.cut(
    data[bin_id], bins=bins, labels=bin_labels, include_lowest=True
)

# New model "expanded": Divides into system AND bin-classes (Term system:bin_id allows for Cartesian Product, i.e. different Mean for each system and bin-class)
model_expanded = Lmer(
    f"{metric} ~  {system_id} + {bin_id}_bins + {system_id}:{bin_id}_bins + (1 | {input_id})",
    data=data,
)
model_expanded.fit(factors={
    system_id: list(data[system_id].unique()),
    f"{bin_id}_bins": list(data[f"{bin_id}_bins"].unique())},
REML=False,
summarize=False,
)
#print(model_expanded.ranef)
#print("")
#print(model_expanded.summary())


                       Mixed Linear Model Regression Results
Model:                     MixedLM         Dependent Variable:         mean        
No. Observations:          180000          Method:                     ML          
No. Groups:                4               Scale:                      53.4669     
Min. group size:           45000           Log-Likelihood:             -613546.8042
Max. group size:           45000           Converged:                  Yes         
Mean group size:           45000.0                                                 
-----------------------------------------------------------------------------------
                                         Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------------------
Intercept                                 5.760    4.374  1.317 0.188 -2.813 14.332
acquisition[T.ProbabilityOfImprovement]   0.337    0.073  4.609 0.000  0.194  0.480
acquisition[T.U

In [6]:
# Get predicted values for each level of system_id
"""
grid = (
    np.array(
        np.meshgrid(
            data[input_id].unique(),
            data[system_id].unique(),
        )
    )
    .reshape(2, len(data[input_id].unique()) * len(data[system_id].unique())).T)

grid = pd.DataFrame(grid, columns=[input_id, system_id)"""
grid = (
    np.array(
        np.meshgrid(
            data[system_id].unique(),
        )
    )
    .reshape(1, len(data[system_id].unique())).T)
grid = pd.DataFrame(grid, columns=[ system_id])
print("Grid:\n",grid)
betas = diffModelFit.fe_params
print("Coeffs:\n",betas)
mat = dmatrix(f"C({system_id})", grid, return_type="matrix")
print("Matrix:\n",mat)
emmeans = grid.copy()
emmeans["means"] = mat @ betas
#print(emmeans)
vcov = diffModelFit.cov_params()
# print(vcov)

vcov = vcov[~vcov.index.str.contains("Var|Cor")]
vcov = vcov.loc[:, ~vcov.columns.str.contains("Var|Cor")]
#print(vcov)
emmeans["SE"] = np.sqrt(np.diagonal(mat @ vcov) @ mat.T)
print(emmeans)


Grid:
                  acquisition
0        ExpectedImprovement
1   ProbabilityOfImprovement
2       UpperConfidenceBound
3       qExpectedImprovement
4         qKnowledgeGradient
5  qProbabilityOfImprovement
6              qSimpleRegret
7      qUpperConfidenceBound
8               randomSearch
Coeffs:
 Intercept                                   5.759636
acquisition[T.ProbabilityOfImprovement]     0.336997
acquisition[T.UpperConfidenceBound]         0.118736
acquisition[T.qExpectedImprovement]         0.061894
acquisition[T.qKnowledgeGradient]           0.495266
acquisition[T.qProbabilityOfImprovement]    0.523160
acquisition[T.qSimpleRegret]                0.096439
acquisition[T.qUpperConfidenceBound]        0.152831
acquisition[T.randomSearch]                 0.892227
dtype: float64
Matrix:
 [[1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0.

In [58]:
grid[input_id]=data[input_id].unique()[0]
n=9
sample_size=8
emmeans["SD"] = emmeans["SE"]/(n**2)
#print("Grid:\n",grid)
predicted_values = pd.DataFrame()
data_list=[]
for acqu in emmeans[system_id]:
    #print(emmeans.loc[emmeans[system_id]==acqu]["means"],emmeans.loc[emmeans[system_id]==acqu]["SD"])
    artif_data=np.random.normal(emmeans.loc[emmeans[system_id]==acqu]["means"],emmeans.loc[emmeans[system_id]==acqu]["SD"],sample_size)
    data_list.extend(artif_data)
predicted_values["data"]=data_list
predicted_values["names"]=np.repeat(list(emmeans[system_id]),sample_size)

from statsmodels.stats.multicomp import pairwise_tukeyhsd
print(pairwise_tukeyhsd(predicted_values["data"],predicted_values["names"],0.05))


                   Multiple Comparison of Means - Tukey HSD, FWER=0.05                    
          group1                    group2          meandiff p-adj   lower   upper  reject
------------------------------------------------------------------------------------------
      ExpectedImprovement  ProbabilityOfImprovement   0.2949    0.0  0.2104  0.3795   True
      ExpectedImprovement      UpperConfidenceBound   0.0517 0.5732 -0.0329  0.1363  False
      ExpectedImprovement      qExpectedImprovement   0.0088    1.0 -0.0758  0.0934  False
      ExpectedImprovement        qKnowledgeGradient   0.4299    0.0  0.3453  0.5145   True
      ExpectedImprovement qProbabilityOfImprovement   0.4402    0.0  0.3556  0.5248   True
      ExpectedImprovement             qSimpleRegret   0.0285 0.9746 -0.0561  0.1131  False
      ExpectedImprovement     qUpperConfidenceBound   0.1307 0.0002  0.0461  0.2153   True
      ExpectedImprovement              randomSearch   0.8213    0.0  0.7367  0.9059   True