In [45]:
import datetime
from pybrew.analytics import base_alpha, base_beta, google_analytics_io, \
    google_analytics_view_id, to_dataframe, ga_target_audience_segment, business_cycle
from pybrew import pipe

print(datetime.datetime.utcnow().isoformat())

analytics = google_analytics_io()

import json

def to_dict(x):
    return dict(zip(x.names, [list(y) for y in list(x) if y]))

def pprint(x):
    print(json.dumps(x, sort_keys=True, indent=4))

2020-02-09T18:08:22.271889


In [46]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


---

# Pre Split Test Analysis
### Can we detect difference in KPI in one business cycle?

We select "checkout visits" as a poxy KPI

- H0 - A and B have the same percent of `/checkout.html` visits
- Ha - B has more `/checkout.html` visits than A

We will use `power.prop.test` to estimate *sample* and *effect* sizes before starting the test

We will use `prop.test` to calculate p-value in the end of the test

## Alpha

In [47]:
alpha = base_alpha()
alpha

0.05

## Beta

In [48]:
beta = base_beta()
assert alpha < (1 - beta), "We want to avoid false positive errors more than false negative"
beta

0.8

## Expected uplift in conversion rate

In [49]:
expected_uplift = 1.2

## Number of parallel testing groups

In [50]:
groups = 2

## Estimating baseline conversion and sample size

In [51]:
def unique_pageviews_of_target_audience_io(analytics, start, end):
    res = analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': google_analytics_view_id(),
          'dateRanges': [{'startDate': str(start), 'endDate': str(end)}],
          'metrics': [{
              'expression': 'ga:uniquePageviews'
          }],
          'dimensions': [{'name': 'ga:segment'}, {'name': 'ga:pagePath'}],
          'segments': [ga_target_audience_segment()],
        }]
      }
    ).execute()
    upv = to_dataframe(res).astype({'ga:uniquePageviews': 'int32'})
    upv['ga:pagePath'] = upv['ga:pagePath'].apply(lambda x: x.split('?')[0])
    return upv.groupby(['ga:pagePath', 'ga:segment']).sum()

start = datetime.datetime.fromisoformat('2019-11-06')
end = start + business_cycle()

upv = unique_pageviews_of_target_audience_io(analytics, start.date(), end.date())

n_base = upv['ga:uniquePageviews']['/', 'Target Audience']
n_conversion_base = upv['ga:uniquePageviews']['/checkout.html', 'Target Audience']
conversion_base = n_conversion_base / n_base

print(f'''

We expecting {n_base} visitors in a business cycle with baseline conversion rate {conversion_base*100:.02f}%

''')



We expecting 192 visitors in a business cycle with baseline conversion rate 7.29%




## Estimating test power

In [52]:
def power_prop_test(n, p1, p2, alpha):
    result = %R power.prop.test(n=$n, p1=$p1, p2=$p2, alternative='two.sided', sig.level=$alpha)
    return to_dict(result)

estimation = power_prop_test(
    n_base / groups, 
    conversion_base, 
    conversion_base * (1 + expected_uplift),
    alpha
)

pprint(estimation)

{
    "alternative": [
        "two.sided"
    ],
    "method": [
        "Two-sample comparison of proportions power calculation"
    ],
    "n": [
        96.0
    ],
    "note": [
        "n is number in *each* group"
    ],
    "p1": [
        0.07291666666666667
    ],
    "p2": [
        0.16041666666666668
    ],
    "power": [
        0.471204474720374
    ],
    "sig.level": [
        0.05
    ]
}


## Can we run the test?

In [53]:
assert estimation['power'][0] >= beta, "We can not conduct the test, consider a different KPI or a bigger uplift"

AssertionError: We can not conduct the test, consider a different KPI or a bigger uplift