# How to use auto_ab library

In [None]:
import sys, yaml, os, json
import pandas as pd
import numpy as np

sys.path.append(str('../'))
from auto_ab import ABTest, Splitter, VarianceReduction, Graphics

## Loading config file

In [None]:
try:
    project_dir = os.path.dirname(os.path.abspath(''))
    config_file = os.path.join(project_dir, 'config.yaml')
    with open (config_file, 'r') as file:
        config = yaml.safe_load(file)
except yaml.YAMLError as exc:
    print(exc)
    sys.exit(1)
except Exception as e:
    print('Error reading the config file')
    sys.exit(1)

## Loading dataset

In [None]:
data = pd.read_csv(os.path.join(project_dir, 'data/data.csv'), index_col='id')
data.head()

## Initialization of splitter

If you are going to run MDE simulation, **split_rate** parameter can be omitted.

In [None]:
splitter = Splitter(split_rate=config['splitter']['split_rate'])

## Initialization of A/B-test

Here
- **alpha** — significance level
- **alternative** — 'less', 'more', 'two-sided'

In [None]:
ab = ABTest(alpha=config['hypothesis']['alpha'], 
            alternative=config['hypothesis']['alternative'])

### Set loaded dataset as analyzed

In [None]:
ab.use_dataset(data, id_col=config['data']['id_col'],
              target=config['data']['target'])

### Set previously defined splitter for test

In [None]:
ab.splitter = splitter

### Set list of split rates for MDE exploration

In [None]:
ab.split_rates = config['simulation']['split_rates']

### Set list of increments for MDE exploration

Here
- **inc_var** — list of increments, i.e. [1, 2, 3, 4, 5]
- **extra_paramms** — extra parameters for increment, currently not used in analysis

In [None]:
ab.set_increment(inc_var=config['simulation']['increment']['vars'],
                extra_params=config['simulation']['increment']['extra_params'])

### Create metric which you want to compare

In the example below, we want to compare 10th percentile of control and treatment distributions

In [None]:
def metric(X: np.array) -> float:
    return np.quantile(X, 0.1)

### MDE simulation in order to find the best combination of split rate—increment

Here
- **n_iter** — number of iterations of simulation
- **n_boot_samples** — set if you choose bootstrap hypothesis testing
- **metric_type** — metric type: ratio or solid (continuous)
- **metric** — Python function as tested metric (quantile, median, mean, etc)
- **strategy** — strategy of hypothesis testing
- **strata** — strata column name for variance reduction
- **strata_weights** — weights of each unique value in strata column as a dictionary
- **to_csv** — whether or not to save the result to csv file
- **csv_path** — path to the newly created csv file

In [None]:
res = ab.mde_simulation(n_iter=config['simulation']['n_iter'],
                       n_boot_samples=config['hypothesis']['n_boot_samples'],
                       metric_type=config['metric']['metric_type'],
                       metric=metric,
                       strategy=config['hypothesis']['strategy'],
                       strata=config['hypothesis']['strata'],
                       strata_weights=config['hypothesis']['strata_weights'],
                       to_csv=config['result']['to_csv'],
                       csv_path=config['result']['csv_path'])

### Print simulation log

Here
- **first key** — split rate
- **second key** — increment
- **value** — share of rejected H0

In [None]:
print(json.dumps(res, indent=4))

### Visualize simulation log in plot

In [None]:
gf = Graphics()
gf.plot_simulation_log(config['result']['csv_path'])