In [10]:
from datetime import datetime, timedelta
from pybrew.analytics import base_alpha, base_beta, google_analytics_io, \
    google_analytics_view_id, to_dataframe, ga_target_audience_segment, business_cycle, parallel_test_groups, ga_segment_stats_io
from pybrew import pipe

print(datetime.utcnow().isoformat())

import json

def to_dict(x):
    return dict(zip(x.names, [list(y) for y in list(x) if y]))

def pprint(x):
    print(json.dumps(x, sort_keys=True, indent=4))

2020-02-09T20:40:26.608622


In [11]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


---

# Pre Split Test Analysis
### Can we detect difference in KPI in one business cycle?

We select "checkout visits" as a poxy KPI

- H0 - A and B have the same percent of `/checkout.html` visits
- Ha - B has more `/checkout.html` visits than A

We will use `power.prop.test` to estimate *sample* and *effect* sizes before starting the test

We will use `prop.test` to calculate p-value in the end of the test

## Alpha

In [12]:
alpha = base_alpha()
alpha

0.05

## Beta

In [13]:
beta = base_beta()
assert alpha < (1 - beta), "We want to avoid false positive errors more than false negative"
beta

0.8

## Number of parallel testing groups

In [14]:
groups = parallel_test_groups()
groups

2

## Estimating baseline conversion and sample size

In [15]:
start = datetime.fromisoformat('2019-11-06')
end = start + business_cycle()

baseline = ga_segment_stats_io(
    google_analytics_io(),
    start,
    end,
    [ga_target_audience_segment()]
)

print(f'''

Baseline is {baseline["n"]} visitors in a business cycle with conversion rate {baseline["conversion"]*100:.02f}%

''')



Baseline is 192 visitors in a business cycle with conversion rate 7.29%




## Expected uplift in conversion rate

In [16]:
expected_conversion = baseline["conversion"] * 3

print(f'''
We are expecting to deliver conversion rate at least of {expected_conversion*100:.2f}% in group B
''')


We are expecting to deliver conversion rate at least of 21.88% in group B



## Estimating test power

In [17]:
def power_prop_test(n, p1, p2, alpha):
    result = %R power.prop.test(n=$n, p1=$p1, p2=$p2, alternative='two.sided', sig.level=$alpha)
    return to_dict(result)

estimation = power_prop_test(
    baseline["n"] / groups, 
    baseline["conversion"], 
    expected_conversion,
    alpha
)

pprint(estimation)

{
    "alternative": [
        "two.sided"
    ],
    "method": [
        "Two-sample comparison of proportions power calculation"
    ],
    "n": [
        96.0
    ],
    "note": [
        "n is number in *each* group"
    ],
    "p1": [
        0.07291666666666667
    ],
    "p2": [
        0.21875
    ],
    "power": [
        0.8219061918939755
    ],
    "sig.level": [
        0.05
    ]
}


## Can we run the test?

In [18]:
assert estimation['power'][0] >= beta, "We can not conduct the test, consider a different KPI or a bigger uplift"
print('Yes, we can run this test!')

Yes, we can run this test!
