In [1]:
import os
import typing

import numpy as np
import pandas as pd
from scipy.stats import rankdata
from itertools import product,combinations,chain
from models import bt_plot,model,glrt
from dataset_management import get_dataset
from pymer4.models import Lmer


pd.set_option("chained_assignment", None)
pd.set_option("display.max_rows", 5000)
pd.set_option("display.max_columns", 5000)
pd.set_option("display.width", 10000)
fig5_df=get_dataset(dataset_name="fig5_24_at25_bad_intsteps")

In [5]:
algos=["1","2","3"]
seeds=[str(x) for x in range(50)]
benchmarks=["0","2","3","4"]
budgets=[1,2]
combination = list(product(algos, benchmarks, [0],seeds,budgets))

random_df = pd.DataFrame(combination, columns=["algorithm", "benchmark", "value", "seed","budget"])
random_df["value"] = random_df.apply(lambda x: np.random.normal(0, 0.1),axis=1)

algo_by_seed_df = pd.DataFrame(combination, columns=["algorithm", "benchmark", "value", "seed","budget"])
algo_by_seed_df["value"] = algo_by_seed_df.apply(lambda row:np.random.normal(int(row["seed"])*0.5 if row["algorithm"] in ["1","2"] and int(row["seed"])%5==0 else 0.5, 0.1), axis=1)


algo_by_benchmark_df = pd.DataFrame(combination, columns=["algorithm", "benchmark", "value", "seed","budget"])
algo_by_benchmark_df["value"] = algo_by_benchmark_df.apply(lambda row: np.random.normal(0.5*int(row["algorithm"])*int(row["benchmark"]), 0.01), axis=1)


value_by_budget_df=pd.DataFrame(combination, columns=["algorithm", "benchmark", "value", "seed","budget"])
value_by_budget_df["value"] = value_by_budget_df.apply(lambda row:np.random.normal(0.5*int(row["budget"]), 0.1), axis=1)

In [23]:

class model_builder():
    def __init__(self,df:pd.DataFrame,loss_var:str="value",system_var="algorithm",benchmark_var="benchmark",fidelities=None):
        self.df=df
        self.loss_formula=f"{loss_var} ~ "
        self.exploratory_var=system_var
        self.benchmark_var=benchmark_var
        self.fidelities=fidelities
        self.fidelity_sig={f: -1 for f in self.fidelities}

    def test_seed_dependency(self,verbose:bool=False):
        simpel_model=model(formula=f"{self.loss_formula}+{self.exploratory_var}",data=self.df,factor_list=[self.exploratory_var],dummy=False)
        seed_model=model(formula=f"{self.loss_formula}+(0+{self.exploratory_var}|seed)",data=self.df,factor_list=[self.exploratory_var],dummy=False)
        test_result=glrt(simpel_model,seed_model,names=["simple","seed"] if verbose else None,returns=True)
        if test_result["p"]<0.05 and seed_model.logLike>simpel_model.logLike:
            ranef_var=seed_model.ranef_var
            print(f"Seed is significant, likely influenced algorithms: {ranef_var.loc[(ranef_var['Var']/10 >= ranef_var['Var'].min())&(ranef_var.index!='Residual')&(ranef_var['Var']*10 >= ranef_var['Var'].max())]['Name'].to_list()}")
            return ranef_var.loc[(ranef_var['Var']/10 >= ranef_var['Var'].min())&(ranef_var.index!='Residual')&(ranef_var['Var']*10 >= ranef_var['Var'].max())]['Name'].to_list()
        else:
            print("Seed is not significant")
            return []

    def test_benchmark_information(self,rank_benchmarks:bool=False,verbose:bool=False):
        test_results={}
        benchmark_info={}
        for benchmark in self.df[self.benchmark_var].unique():
            simple_mod=model(formula=f"{self.loss_formula}+1",data=self.df.loc[self.df[self.benchmark_var]==benchmark],factor_list=[self.exploratory_var],dummy=False)
            benchmark_mod=model(formula=f"{self.loss_formula}+{self.exploratory_var}",data=self.df.loc[self.df[self.benchmark_var]==benchmark],factor_list=[self.exploratory_var],dummy=False)
            if verbose:
                print(f"\nBenchmark: {benchmark}")
            test_results[benchmark]=glrt(simple_mod,benchmark_mod,names=["simple","algorithm"] if verbose else None,returns=True)
            if test_results[benchmark]["p"]<0.05 and benchmark_mod.logLike>simple_mod.logLike:
                print(f"Benchmark {benchmark} is informative.")
                benchmark_info[benchmark]=True
            else:
                print(f"Benchmark {benchmark} is uninformative.")
                benchmark_info[benchmark]=False
        if any(test_results[b]["p"]>0.05 for b in test_results.keys()):
            if rank_benchmarks:
                all_benchmarks_mod=model(formula=f"{self.loss_formula}+(0+{self.benchmark_var}|{self.exploratory_var})",data=self.df,factor_list=[self.exploratory_var],dummy=False)
                print(all_benchmarks_mod.ranef_var)

    def test_fidelity(self, fidelity_var:str,verbose:bool=False):
        significances={fidelity_var:0,f"{fidelity_var}_group":0}
        simple_formula=f"{self.loss_formula} {self.exploratory_var}{f' + (1|{self.benchmark_var})' if self.df[self.benchmark_var].nunique()>1 else ''}"
        simple_mod=model(formula=simple_formula,data=self.df,factor_list=[self.exploratory_var],dummy=self.df[self.benchmark_var].nunique()==1)
        fidelity_mod=model(formula=f"{simple_formula} + {fidelity_var}",data=self.df,factor_list=[self.exploratory_var],dummy=self.df[self.benchmark_var].nunique()==1)
        test_result=glrt(simple_mod,fidelity_mod,names=["simple","fidelity"] if verbose else None,returns=True)
        if test_result["p"]<0.05 and fidelity_mod.logLike>simple_mod.logLike:
            significances[fidelity_var]=1
        fid_group_mod=model(formula=f"{simple_formula} + {self.exploratory_var}:{fidelity_var}",data=self.df,factor_list=[self.exploratory_var],dummy=self.df[self.benchmark_var].nunique()==1)
        test_result=glrt(simple_mod,fid_group_mod,names=["simple","fidelity_group"] if verbose else None,returns=True)
        if test_result["p"]<0.05 and fid_group_mod.logLike>simple_mod.logLike:
            significances[f"{fidelity_var}_group"]=1
        if significances[fidelity_var]==1 and significances[f"{fidelity_var}_group"]==1:
            test_result=glrt(fidelity_mod,fid_group_mod,names=["fidelity","fidelity_group"] if verbose else None,returns=True)
            if test_result["p"]<0.05 and fid_group_mod.logLike>fidelity_mod.logLike:
                if verbose:
                    print(f"Fidelity {fidelity_var} as single and interaction effect are both significant, but interaction is more significant.")
                self.fidelity_sig[fidelity_var]=2
            else:
                if verbose:
                    print(f"Fidelity {fidelity_var} as single and interaction effect both significant, but as single factor is more significant.")
                self.fidelity_sig[fidelity_var]=1
        elif significances[fidelity_var]==1:
            if verbose:
                print(f"Fidelity {fidelity_var} as single factor significant.")
            self.fidelity_sig[fidelity_var]= 1
        elif significances[f"{fidelity_var}_group"]==1:
            if verbose:
                print(f"Fidelity {fidelity_var} as interaction is significant.")
            self.fidelity_sig[fidelity_var]= 2
        else:
            if verbose:
                print(f"Fidelity {fidelity_var} is not significant.")
            self.fidelity_sig[fidelity_var]= 0

    def full_test(self, verbose:bool=False):
        self.test_seed_dependency(verbose=verbose)
        self.test_benchmark_information(verbose=verbose)
        for f in self.fidelities:
            self.test_fidelity(f,verbose=verbose)

    def build_model(self):
        for fidelity,sig in self.fidelity_sig.items():
            if sig==-1:
                self.test_fidelity(fidelity,verbose=True)
        model_formula=f"{self.loss_formula} + {self.exploratory_var} + (1|{self.benchmark_var})"+"".join([f" + {self.exploratory_var}:{f}" if self.fidelity_sig[f]==2 else f" + {f}" for f in self.fidelities if self.fidelity_sig[f]>0])
        print(model_formula)

builder=model_builder(fig5_df,system_var="algorithm",benchmark_var="benchmark",fidelities=["used_fidelity"])
builder.build_model()

simple (-244290.11) << fidelity (-243312.56)
Chi-Square: 1955.0920213969657, P-Value: 0.0
simple (-244290.11) << fidelity_group (-243001.25)
Chi-Square: 2577.7240843258332, P-Value: 0.0
fidelity (-243312.56) << fidelity_group (-243001.25)
Chi-Square: 622.6320629288675, P-Value: 0.0
value ~  algorithm + (1|benchmark) + used_fidelity + algorithm:used_fidelity
fidelity (-243312.56) << fidelity_both (-243001.25)
Chi-Square: 622.6320629238035, P-Value: 0.0
Fidelity used_fidelity as single and interaction effect are both significant, but interaction is more significant.
value ~  + algorithm + (1|benchmark) + algorithm:used_fidelity


In [None]:
# Trying to get grouping on ordinal variables:

data=fig5_df.copy(); system_id="algorithm";fidelity_var="used_fidelity"
data=data.loc[data[fidelity_var]<6]
data[f"{fidelity_var}_group"]=data[fidelity_var].apply(lambda x: str(x))
from pymer4 import Lmer

mod1=Lmer(formula=f"value~{system_id}+{fidelity_var}+{fidelity_var}_group+{system_id}:{fidelity_var}_group+(1|benchmark)",data=data)
factor_list = {system_id: list(data[system_id].unique())}
factor_list[f"{fidelity_var}_group"] = list(data[f"{fidelity_var}_group"].unique())
print(factor_list)
mod1.fit(factors=factor_list, REML=False, summarize=False, verbose=True)
print(mod1.summary())
print(mod1.fixef)
print(mod1.post_hoc(marginal_vars="algorithm",grouping_vars=f"{fidelity_var}_group"))

{'algorithm': ['PB', 'RS', 'HB'], 'used_fidelity_group': ['1.0', '2.0', '3.0', '4.0', '5.0']}
Fitting linear model using lmer with Wald confidence intervals...



R[write to console]: fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

R[write to console]: fixed-effect model matrix is rank deficient so dropping 1 column / coefficient



Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: value~algorithm+used_fidelity+used_fidelity_group+algorithm:used_fidelity_group+(1|benchmark)

Family: gaussian	 Inference: parametric

Number of observations: 18000	 Groups: {'benchmark': 12.0}

Log-likelihood: -62793.362 	 AIC: 125620.724

Random effects:

                  Name     Var    Std
benchmark  (Intercept)  35.643  5.970
Residual                62.468  7.904

No random effect correlations specified

Fixed effects:

                                 Estimate  2.5_ci  97.5_ci     SE         DF  T-stat  P-val  Sig
(Intercept)                        16.021  12.595   19.447  1.748     12.664   9.166    0.0  ***
algorithm1                         -9.578 -10.210   -8.945  0.323  17988.000 -29.683    0.0  ***
algorithm2                         -6.308  -6.940   -5.675  0.323  17988.000 -19.549    0.0  ***
used_fidelity                      -2.781  -2.939   -2.623  0.081  17988.000 -34.476    0.0  ***
used_fidelity_gro

  values = self.values.round(decimals)  # type: ignore[union-attr]


In [None]:
plot_1=bt_plot([[0,25]],rows=4,dataset=fig5_df.loc[(fig5_df["algorithm"]!="RS")&(fig5_df["bench_prior"]=="LC-167190_at25")],algorithm_var="algorithm",budget_var="used_fidelity")
plot_1.change_row(0,None,globality=False,loss="value")
plot_1.change_row(1,f"algorithm",globality=False,loss="rel_rank")
plot_1.change_row(2,f"algorithm",globality=False,loss="value")
plot_1.change_row(3,f"algorithm",globality=False,loss="regret")
plot_1.show()

TypeError: unsupported operand type(s) for +: 'float' and 'NoneType'

                            Name         Var        Std
algorithm      benchmarkJAHS-C10  135.685244  11.648401
algorithm       benchmarkJAHS-CH   52.165100   7.222541
algorithm       benchmarkJAHS-FM   35.441897   5.953310
algorithm     benchmarkLC-126026    0.254739   0.504716
algorithm     benchmarkLC-167190    0.125850   0.354753
algorithm     benchmarkLC-168330    0.016117   0.126953
algorithm     benchmarkLC-168910    0.046651   0.215989
algorithm     benchmarkLC-189906    0.143302   0.378552
algorithm  benchmarkPD1-Cifar100    0.045919   0.214287
algorithm  benchmarkPD1-ImageNet    0.065133   0.255213
algorithm      benchmarkPD1-LM1B    0.017931   0.133906
algorithm       benchmarkPD1-WMT    0.016730   0.129346
Residual                           16.604217   4.074827

In [None]:


def test_feature(self,feature:str):
    formulae=[feature,f"(1|{feature})","(0+"+f"{self.exploratory_var}|{feature})","(1+"+f"{self.exploratory_var}|{feature})"]

    base_formula=self.loss_formula+f" + {self.exploratory_var} + ".join([self.effect_types[feature] for feature in self.included_features])
    base_model=model(formula=base_formula,data=self.df,factor_list=["algorithm"],dummy=False)
    base_model=model(formula=base_formula,data=self.df,factor_list=[factor for factor in self.factors if factor in self.included_features],dummy=False)
    new_models:list[Lmer]=[]
    for formula in formulae:

        new_formula=self.loss_formula+" + ".join([self.effect_types[feature] for feature in self.included_features])+f" + {formula}"
        # print(new_formula)
                                                
        new_models.append(model(formula=new_formula,data=self.df,factor_list=[factor for factor in self.factors if factor in self.included_features+[feature]],dummy=False))
        print(new_models[-1].summary())
        if type(new_models[-1]) is Lmer:
            print(new_models[-1].ranef)
            print(new_models[-1].ranef_var)
        
    test_results=glrt(base_model,new_models[0],names=["baseline",f"baseline + {feature}"],returns=True)
    test_results=glrt(new_models[0],new_models[1],names=[f"baseline + {feature}",f"baseline + {formulae[1]}"],returns=True)
    test_results=glrt(new_models[0],new_models[2],names=[f"baseline + {feature}",f"baseline + {formulae[2]}"],returns=True)
    test_results=glrt(new_models[0],new_models[3],names=[f"baseline + {feature}",f"baseline + {formulae[3]}"],returns=True)
    test_results=glrt(new_models[1],new_models[2],names=[f"baseline + {formulae[1]}",f"baseline + {formulae[2]}"],returns=True)
    test_results=glrt(new_models[1],new_models[3],names=[f"baseline + {formulae[1]}",f"baseline + {formulae[3]}"],returns=True)
    test_results=glrt(new_models[2],new_models[3],names=[f"baseline + {formulae[2]}",f"baseline + {formulae[3]}"],returns=True)

model_builder.test_feature=test_feature

        # if test_results["p"]<0.05:
        #     print(f"Feature {feature} is significant")

print("############ first case: No seed influence ############")
builder=model_builder(random_df,system_var="algorithm",features=["seed"],factors=["seed","algorithm"])
builder.test_feature(feature="seed")

print("\n ############ second case: Linear seed influence ############")
builder2=model_builder(algo_by_seed_df,system_var="algorithm",features=["seed"],factors=["seed","algorithm"])
builder2.test_feature(feature="seed")

print("\n ############ third case: Modulo seed influence ############")
builder3=model_builder(seed2_df,system_var="algorithm",features=["seed"],factors=["seed","algorithm"])
builder3.test_feature(feature="seed")


############ first case: No seed influence ############


TypeError: model_builder.__init__() got an unexpected keyword argument 'features'