In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import sys
import logging
import pandas as pd
import numpy as np
import yaml

sys.path.append(os.path.dirname(os.path.abspath('')))

from auto_ab.splitter.params import SplitBuilderParams
from auto_ab.prepilot.params import PrepilotParams
from auto_ab.prepilot.prepilot_experiment_builder import PrepilotExperimentBuilder
from auto_ab.prepilot.prepilot_split_builder import PrepilotSplitBuilder
from auto_ab.auto_ab.abtest import ABTest
from auto_ab.auto_ab.params import ABTestParams

logging.basicConfig(level = logging.INFO)

%load_ext autoreload
%autoreload 2

In [2]:
df = pd.read_csv('./data/ab_data.csv')

with open("./config/prepilot_demo_config.yaml", "r") as stream:
    try:
        ab_config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [3]:
from auto_ab.auto_ab.params import DataParams, HypothesisParams

data_params = DataParams(
    id_col='id', 
    group_col='groups',
    control_name='control',
    treatment_name='target',
    strata_col='country', 
    target='height_now', 
    target_flg='bought', 
    predictors=['weight_now'], 
    numerator='clicks', 
    denominator='sessions', 
    covariate='height_prev', 
    target_prev='height_prev', 
    predictors_prev=['weight_prev'], 
    cluster_col='kl-divergence', 
    clustering_cols=['col1', 'col2', 'col3'], 
    is_grouped=True
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='two-sided', 
    split_ratios=(0.5, 0.5), 
    strategy='simple_test', 
    strata='country', 
    strata_weights={'US': 0.8, 'UK': 0.2}, 
    metric_type='solid', 
    metric_name='mean', 
    metric=np.mean, 
    n_boot_samples=200, 
    n_buckets=50
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [4]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    main_strata_col = "moda_city",
    split_metric_col = "height_now",
    id_col = "id",
    cols = ["height_prev"],
    cat_cols=["country"],
    pvalue=0.05,
    n_top_cat=5,
    stat_test="ttest_ind",
    n_bins_rto = 6,
    bin_min_size = 500
)

# Кейс №7. Препилот для числовой переменной

In [5]:
prepilot_params = PrepilotParams(
    metrics_names=['height_now'],
    injects=[1.0001,1.0002,1.0003],
    min_group_size=35000, 
    max_group_size=50000, 
    step=5000,
    variance_reduction = None,
    use_buckets = False,
    stat_test = ABTest.test_hypothesis_boot_confint,
    iterations_number = 10,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [6]:
prepilot = PrepilotExperimentBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [7]:
beta,alpha = prepilot.collect()

In [8]:
beta

Unnamed: 0_level_0,split_rate,"(35000, 35000)","(40000, 40000)","(45000, 45000)","(50000, 50000)"
metric,MDE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
height_now,0.01%,>=0.9,>=0.9,>=0.9,>=0.9
height_now,0.02%,>=0.9,>=0.9,>=0.9,>=0.9
height_now,0.03%,0.8,>=0.9,>=0.9,0.7


In [9]:
alpha

split_rate,"(35000, 35000)","(40000, 40000)","(45000, 45000)","(50000, 50000)"
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
height_now,0.0,0.0,0.0,0.0


# Кейс №8.  Препилот для числовой переменной со снижением дисперсии

In [None]:
prepilot_params = PrepilotParams(
    metrics_names=['height_now'],
    injects=[1.0001,1.0002,1.0003,1.0004,1.00042,1.00044,1.00046,1.00048,1.0005,1.001],
    min_group_size=35000, 
    max_group_size=45000, 
    step=5000,
    variance_reduction = ABTest.cuped,
    use_buckets = False,
    stat_test = ABTest.test_hypothesis_boot_confint,
    iterations_number = 50,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [None]:
prepilot = PrepilotExperimentBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [None]:
beta,alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha

# Кейс №9. Препилот для бинарной переменной

In [None]:
df = pd.read_csv('ab_data.csv')

In [None]:
data_params = DataParams(
    id_col='id', 
    group_col='groups', 
    strata_col='country', 
    target='conversion', 
    target_flg='bought', 
    predictors=['weight_now'], 
    numerator='clicks', 
    denominator='sessions', 
    covariate='height_prev', 
    target_prev='height_prev', 
    predictors_prev=['weight_prev'], 
    cluster_col='kl-divergence', 
    clustering_cols=['col1', 'col2', 'col3'], 
    is_grouped=True
)

ab_params = ABTestParams(data_params,simulation_params,hypothesis_params,result_params,splitter_params)

In [None]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    region_col = "moda_city",
    split_metric_col = "conversion",
    customer_col = "id",
    cols = [],
    cat_cols=[
        
    ],
    pvalue=0.05,
    n_top_cat=100,
    stat_test="ttest_ind"
)

In [None]:
prepilot_params = PrepilotParams(
    metrics_names=['conversion'],
    injects=[1.001,1.002,1.0022,1.0024,1.0026,1.0028,1.003,1.005,1.01],
    min_group_size=35000, 
    max_group_size=55000, 
    step=5000,
    variance_reduction = None,
    use_buckets = False,
    stat_test = ABTest.test_hypothesis_boot_confint,
    iterations_number = 50,
    max_beta_score=0.9,
    min_beta_score=0.01,
)

ab_params = ABTestParams(data_params,simulation_params,hypothesis_params,result_params,splitter_params)

In [None]:
prepilot = PrepilotExperimentBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [None]:
beta,alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha