In [19]:
import warnings
warnings.filterwarnings('ignore')

import os
import sys
import logging
import pandas as pd
import numpy as np
import yaml

#sys.path.append(os.path.dirname(os.path.abspath('')))
sys.path.append('../')

from abacus.splitter.params import SplitBuilderParams
from abacus.mde_researcher.params import MdeParams
from abacus.mde_researcher.mde_research_builder import MdeResearchBuilder
from abacus.mde_researcher.multiple_split_builder import MultipleSplitBuilder
from abacus.auto_ab.abtest import ABTest
from abacus.auto_ab.params import ABTestParams
from abacus.auto_ab.params import DataParams, HypothesisParams

logging.basicConfig(level = logging.INFO)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
df = pd.read_csv('./data/ab_data.csv')

with open("./config/prepilot_demo_config.yaml", "r") as stream:
    try:
        ab_config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [21]:
data_params = DataParams(
    id_col='id', 
    group_col='groups',
    control_name='control',
    treatment_name='target',
    strata_col='country', 
    target='height_now', 
    target_flg='bought', 
    predictors=['weight_now'], 
    numerator='clicks', 
    denominator='sessions', 
    covariate='height_prev', 
    target_prev='height_prev', 
    predictors_prev=['weight_prev'], 
    cluster_col='kl-divergence', 
    clustering_cols=['col1', 'col2', 'col3'], 
    is_grouped=True
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='two-sided', 
    split_ratios=(0.5, 0.5), 
    strategy='simple_test', 
    strata='country', 
    strata_weights={'US': 0.8, 'UK': 0.2}, 
    metric_type='solid', 
    metric_name='mean', 
    metric=np.mean, 
    n_boot_samples=200, 
    n_buckets=50
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [22]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    main_strata_col = "moda_city",
    split_metric_col = "height_now",
    id_col = "id",
    cols = ["height_prev"],
    cat_cols=["country"],
    pvalue=0.05,
    n_bins = 6,
    min_cluster_size = 500
)

# Кейс №7. Препилот для числовой переменной

In [16]:
prepilot_params = MdeParams(
    metrics_names=['height_now'],
    injects=[1.0001,1.0002,1.0003],
    min_group_size=35000, 
    max_group_size=50000, 
    step=5000,
    variance_reduction = None,
    use_buckets = False,
    stat_test = ABTest.test_boot_confint,
    iterations_number = 10,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [17]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [18]:
beta,alpha = prepilot.collect()

INFO:numexpr.utils:NumExpr defaulting to 4 threads.


ValidationError: 2 validation errors for ABTestParams
data_params
  instance of DataParams, tuple or dict expected (type=type_error.dataclass; class_name=DataParams)
hypothesis_params
  instance of HypothesisParams, tuple or dict expected (type=type_error.dataclass; class_name=HypothesisParams)

In [None]:
beta

In [None]:
alpha

# Кейс №8.  Препилот для числовой переменной со снижением дисперсии

In [None]:
prepilot_params = MdeParams(
    metrics_names=['height_now'],
    injects=[1.0001,1.0002,1.0003,1.0004,1.00042,1.00044,1.00046,1.00048,1.0005,1.001],
    min_group_size=35000, 
    max_group_size=45000, 
    step=5000,
    variance_reduction = ABTest.cuped,
    use_buckets = False,
    stat_test = ABTest.test_boot_confint,
    iterations_number = 50,
    max_beta_score=0.9,
    min_beta_score=0.02,
)

In [None]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [None]:
beta,alpha = prepilot.collect()

In [None]:
beta

In [None]:
alpha

# Кейс №9. Препилот для бинарной переменной

In [23]:
df = pd.read_csv('./data/ab_data.csv')

In [24]:
data_params = DataParams(
    id_col='id', 
    group_col='groups', 
    strata_col='country', 
    target='conversion', 
    target_flg='bought', 
    predictors=['weight_now'], 
    numerator='clicks', 
    denominator='sessions', 
    covariate='height_prev', 
    target_prev='height_prev', 
    predictors_prev=['weight_prev'], 
    cluster_col='kl-divergence', 
    clustering_cols=['col1', 'col2', 'col3'], 
    is_grouped=True
)

ab_params = ABTestParams(data_params,hypothesis_params)

In [25]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': None,
        'target': None
    },
    main_strata_col = "moda_city",
    split_metric_col = "height_now",
    id_col = "id",
    cols = ["height_prev"],
    cat_cols=["country"],
    pvalue=0.05,
    n_bins = 6,
    min_cluster_size = 500
)

In [26]:
prepilot_params = MdeParams(
    metrics_names=['conversion'],
    injects=[1.001,1.002,1.0022,1.0024,1.0026,1.0028,1.003,1.005,1.01],
    min_group_size=35000, 
    max_group_size=55000, 
    step=5000,
    variance_reduction = None,
    use_buckets = False,
    stat_test = ABTest.test_boot_confint,
    iterations_number = 50,
    max_beta_score=0.9,
    min_beta_score=0.01,
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [27]:
prepilot = MdeResearchBuilder(df, ab_params,
                                     prepilot_params,
                                     split_builder_params)

In [28]:
beta,alpha = prepilot.collect()

ValidationError: 2 validation errors for ABTestParams
data_params
  instance of DataParams, tuple or dict expected (type=type_error.dataclass; class_name=DataParams)
hypothesis_params
  instance of HypothesisParams, tuple or dict expected (type=type_error.dataclass; class_name=HypothesisParams)

In [None]:
beta

In [None]:
alpha