In [1]:
import os
import typing

import numpy as np
import pandas as pd
from scipy.stats import rankdata
from itertools import product
from models import bt_plot,model,glrt
from pymer4.models import Lmer


pd.set_option("chained_assignment", None)
pd.set_option("display.max_rows", 5000)
pd.set_option("display.max_columns", 5000)
pd.set_option("display.width", 10000)

In [2]:
algos=["algo_1","algo_2"]
seeds=list(range(50))
benchmarks=["bench_1","bench_2"]
budgets=[1,2]

combinations = product(algos, benchmarks, [0],seeds,budgets)
toy_df = pd.DataFrame(combinations, columns=["algorithm", "benchmark", "value", "seed","budget"])

def gen_value_rand(row):
    return np.random.normal(0, 0.1)
toy_df["value"] = toy_df.apply(gen_value_rand, axis=1)

def gen_value_seed(row):
    return np.random.normal(row["seed"]*0.05, 0.1)
toy_df["value"] = toy_df.apply(gen_value_rand, axis=1)
toy_df2=toy_df.copy()
toy_df2["value"] = toy_df.apply(gen_value_seed, axis=1)


toy_df3=toy_df.copy()
def gen_value_seed2(row):
    return np.random.normal(row["seed"]%5*0.5, 0.1)
toy_df3["value"] = toy_df.apply(gen_value_seed2, axis=1)

# plot_1=bt_plot([[0,1]],3,toy_df,"algorithm","budget")
# plot_1.change_row(0,None,False)
# plot_1.change_row(1,f"algorithm + seed",False)
# plot_1.show()

In [5]:

class model_builder():
    def __init__(self,df:pd.DataFrame,loss_var:str="value",exploratory_var="algorithm",features:typing.List[str]=[],factors:typing.List[str]=[]):
        self.df=df
        self.loss_formula=f"{loss_var} ~ "
        self.features=features
        self.included_features=["algorithm"]
        self.factors=factors
        self.effect_types={"algorithm":"algorithm"}
        self.exploratory_var=exploratory_var
    def test_feature(self,feature:str):
        formulae=[feature,f"(1|{feature})","(0+"+f"{self.exploratory_var}|{feature})","(1+"+f"{self.exploratory_var}|{feature})"]

        base_formula=self.loss_formula+f" + {self.exploratory_var} + ".join([self.effect_types[feature] for feature in self.included_features])
        base_model=model(formula=base_formula,data=self.df,factor_list=[factor for factor in self.factors if factor in self.included_features],dummy=False)
        new_models:list[Lmer]=[]
        for formula in formulae:

            new_formula=self.loss_formula+" + ".join([self.effect_types[feature] for feature in self.included_features])+f" + {formula}"
            # print(new_formula)
                                                    
            new_models.append(model(formula=new_formula,data=self.df,factor_list=[factor for factor in self.factors if factor in self.included_features],dummy=False))
            # print(new_model.summary())
        test_results=glrt(base_model,new_models[0],names=["baseline",f"baseline + {feature}"],returns=True)

        test_results=glrt(new_models[0],new_models[1],names=[f"baseline + {feature}",f"baseline + {formulae[1]}"],returns=True)
        test_results=glrt(new_models[0],new_models[2],names=[f"baseline + {feature}",f"baseline + {formulae[2]}"],returns=True)
        test_results=glrt(new_models[0],new_models[3],names=[f"baseline + {feature}",f"baseline + {formulae[3]}"],returns=True)
        test_results=glrt(new_models[1],new_models[2],names=[f"baseline + {formulae[1]}",f"baseline + {formulae[2]}"],returns=True)
        test_results=glrt(new_models[1],new_models[3],names=[f"baseline + {formulae[1]}",f"baseline + {formulae[3]}"],returns=True)
        test_results=glrt(new_models[2],new_models[3],names=[f"baseline + {formulae[2]}",f"baseline + {formulae[3]}"],returns=True)

        print(new_models[1].ranef_var)
        print(new_models[2].ranef_var)
        print(new_models[3].ranef_var)

        # if test_results["p"]<0.05:
        #     print(f"Feature {feature} is significant")

print("############ first case: No seed influence ############")
builder=model_builder(toy_df,exploratory_var="algorithm",features=["seed"])
builder.test_feature(feature="seed")

print("\n ############ second case: Linear seed influence ############")
builder2=model_builder(toy_df2,exploratory_var="algorithm",features=["seed"])
builder2.test_feature(feature="seed")

print("\n############ third case: Modulo seed influence ############")
builder3=model_builder(toy_df3,exploratory_var="algorithm",features=["seed"])
builder3.test_feature(feature="seed")

############ first case: No seed influence ############
boundary (singular) fit: see help('isSingular') 

boundary (singular) fit: see help('isSingular') 

baseline (358.53) == baseline + seed (358.6)
Chi-Square: 0.13179679514587406, P-Value: 0.7165762864903529
baseline + seed (358.6) == baseline + (1|seed) (358.59)
Chi-Square: 0.01943027327490654, P-Value: 0.889139986678746
baseline + seed (358.6) == baseline + (0+algorithm|seed) (358.89)
Chi-Square: 0.577063066783694, P-Value: 0.4474658130108483
baseline + seed (358.6) == baseline + (1+algorithm|seed) (358.89)
Chi-Square: 0.57706306783939, P-Value: 0.44746581259538787
baseline + (1|seed) (358.59) << baseline + (0+algorithm|seed) (358.89)
Chi-Square: 0.5964933400586006, P-Value: nan
baseline + (1|seed) (358.59) << baseline + (1+algorithm|seed) (358.89)
Chi-Square: 0.5964933411142965, P-Value: nan
baseline + (0+algorithm|seed) (358.89) << baseline + (1+algorithm|seed) (358.89)
Chi-Square: 1.0556959750829265e-09, P-Value: nan
          