In [1]:
from datetime import datetime, timedelta, timezone
from pybrew.analytics import base_alpha, base_beta, google_analytics_io, \
    google_analytics_view_id, to_dataframe, ga_target_audience_segment, business_cycle, parallel_test_groups, \
    git_sha_io, git_branch_io, unique_pageviews_of_sha_io, utc_time_from_sha_io, ga_segment_stats_io, ga_sha_segment, \
    unique_pageviews_of_segments_io, github_branch_sha_io, deep_get, github_branch_info_io, s2t, pull_request_io, last_split_action_in_pull_request
from pybrew import pipe

print(datetime.utcnow().isoformat())

import json

def to_dict(x):
    return dict(zip(x.names, [list(y) for y in list(x) if y]))

def pprint(x):
    print(json.dumps(x, sort_keys=True, indent=4))
    
analytics = google_analytics_io()

2020-02-13T08:45:43.132739


In [2]:
%load_ext rpy2.ipython

# Split Test
### Are we sure that this branch will boost KPI?

We select "checkout visits" as a poxy KPI

- H0 - A and B have the same percent of `/checkout.html` visits
- Ha - B has more `/checkout.html` visits than A

We will use `power.prop.test` to estimate *sample* and *effect* sizes before starting the test

We will use `prop.test` to calculate p-value in the end of the test

## Alpha

In [3]:
alpha = base_alpha()
alpha

0.05

## Number of parallel testing groups

In [4]:
groups = parallel_test_groups()
groups

2

## Estimating baseline conversion and sample size

In [5]:
start = datetime.fromisoformat('2019-11-06')
end = start + business_cycle()

baseline = ga_segment_stats_io(
    analytics,
    start,
    end,
    [ga_target_audience_segment()]
)

print(f'''

Baseline is {baseline["n"]} visitors in a business cycle with conversion rate {baseline["conversion"]*100:.02f}%

''')



Baseline is 147 visitors in a business cycle with conversion rate 9.52%




## How many visitors should each group have to be significant?

In [6]:
min_n = baseline["n"] / groups
min_n

73.5

## What SHA and branches are we testing?

In [7]:
branch_a = github_branch_info_io('master')
branch_b = github_branch_info_io()

targets = {
    'A': {
        'sha': branch_a['sha'],
        'branch': branch_a['branch'],
        'time': s2t(branch_a['commit']['committer']['date']),
    },
    'B': {
        'sha': branch_b['sha'],
        'branch': branch_b['branch'],
        'time': s2t(branch_b['commit']['committer']['date']),
    }
}

targets

{'A': {'sha': '5320116e21ea0695f6063b654f94907478283acd',
  'branch': 'master',
  'time': datetime.datetime(2020, 2, 12, 12, 50, 47)},
 'B': {'sha': '5320116e21ea0695f6063b654f94907478283acd',
  'branch': 'master',
  'time': datetime.datetime(2020, 2, 12, 12, 50, 47)}}

## Current results

In [10]:
analytics = google_analytics_io()

pr = pull_request_io(targets['B']['branch'])
action = last_split_action_in_pull_request(pr)
assert 'createdAt' in action, 'Split test hasnt been started'

start = action['createdAt']
end = start + business_cycle()

print('start', start, 'end', end)

def get_results_io(target, start, end):
    return ga_segment_stats_io(
        analytics,
        start,
        end,
        [ga_sha_segment(target['sha'])]
    )

results = {k: {**v, **{'results': get_results_io(v, start, end)}}  for k, v in targets.items()}
results

start 2020-02-13 08:21:13 end 2020-02-20 08:21:13


{'A': {'sha': '5320116e21ea0695f6063b654f94907478283acd',
  'branch': 'master',
  'time': datetime.datetime(2020, 2, 12, 12, 50, 47),
  'results': {'n': 4, 'n_conversion': 0, 'conversion': 0.0}},
 'B': {'sha': '5320116e21ea0695f6063b654f94907478283acd',
  'branch': 'master',
  'time': datetime.datetime(2020, 2, 12, 12, 50, 47),
  'results': {'n': 4, 'n_conversion': 0, 'conversion': 0.0}}}

## Conclusion

In [9]:
def prop_test(a, b):
    a_successes = a['results']['n_conversion']
    a_trials = a['results']['n']
    
    b_successes = b['results']['n_conversion']
    b_trials = b['results']['n']
    
    assert a_trials > 0 and b_trials > 0, "No data, please wait till branches have visits"
    
    result = %R prop.test(c($a_successes, $b_successes), c($a_trials, $b_trials))
    return to_dict(result)

test_results = prop_test(results['A'], results['B'])

pprint(test_results)

assert test_results['p.value'][0] <= alpha, "Test results are not significant"
print("The test result are significant")

assert results["B"]["results"]["conversion"] > results["A"]["results"]["conversion"], \
    f'''Branch {results["B"]["branch"]} is worse than {results["A"]["branch"]}, we can reject the new brach'''

print(
f'''
Branch {results["B"]["branch"]} is better than {results["A"]["branch"]}! We can merge the new branch!
'''
)

{
    "alternative": [
        "2-sample test for equality of proportions without continuity correction"
    ],
    "conf.int": [
        "two.sided"
    ],
    "estimate": [
        0.0,
        0.0
    ],
    "method": [
        "c(0, 0) out of c(4, 4)"
    ],
    "null.value": [
        0.0,
        0.0
    ],
    "p.value": [
        NaN
    ],
    "parameter": [
        1.0
    ],
    "statistic": [
        NaN
    ]
}


AssertionError: Test results are not significant