In [42]:
import random
from collections import namedtuple
import numpy as np
import scipy as sp
import scipy.stats

In [24]:
Visit = namedtuple("Visit", ["group", "amount"])

In [30]:
def generate_random_data(N, control_probs, treatment_probs):
    return [generate_random_visit(control_probs, treatment_probs) for _ in range(N)]

In [25]:
def generate_random_visit(control_probs, treatment_probs):
    if random.random() < 0.5:
        return generate_visit("control", control_probs)
    else:
        return generate_visit("treatment", treatment_probs)
    

In [26]:
def generate_visit(group_name, probs):
    p_buy = probs['p_buy']
    if random.random() < p_buy:
        amount = probs['amount_generator']()
    else:
        amount = 0.0
    return Visit(group_name, amount)

In [56]:
def analyze_experiment(visits):
    data = get_summary_data(visits)
    output = {"control":{},
              "treatment":{}}
    c_m, c_lb, c_ub = mean_confidence(data['control']['events'])
    t_m, t_lb, t_ub = mean_confidence(data['treatment']['events'])
    c_ev = data['control']['amount'] / float(data['control']['purchasers'])
    t_ev = data['treatment']['amount'] / float(data['treatment']['purchasers'])
    output['control']['mean_completion_rate'] = c_m
    output['control']['confidence_interval'] = [c_lb, c_ub]
    output['control']['expected_order_amount'] = c_ev
    
    output['treatment']['mean_completion_rate'] = t_m
    output['treatment']['confidence_interval'] = [t_lb, t_ub]
    output['treatment']['expected_order_amount'] = t_ev
    return output

In [59]:
analyze_experiment(data)

{'control': {'confidence_interval': [0.35920377063426467, 0.40179622936573534],
  'expected_order_amount': 49.9979660679797,
  'mean_completion_rate': 0.3805},
 'treatment': {'confidence_interval': [0.42270370666588358,
   0.46629629333411643],
  'expected_order_amount': 39.80238125479275,
  'mean_completion_rate': 0.44450000000000001}}

In [43]:
def get_summary_data(visits):
    data = {"control" : {"purchasers": 0,
                        "total": 0,
                        "amount": 0.0,
                        "events" : []},
           "treatment" : {"purchasers": 0,
                        "total": 0,
                        "amount": 0.0,
                         "events" : []}}
    for v in visits:
        group = v.group
        amount = v.amount
        data[group]['total'] += 1
        data[group]['amount'] += amount
        if amount > 0.0:
            data[group]['purchasers'] += 1
            data[group]['events'].append(1)
        else:
            data[group]['events'].append(0)
    return data

In [46]:
def mean_confidence(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    return m, m-h, m+h

In [58]:
data = generate_random_data(4000, control_probs, treatment_probs)

In [45]:
get_summary_data(data)

{'control': {'amount': 37822.886452415536,
  'events': [0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   1,
   0,
   0,
   0,
   1,
   1,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   1,
   0,
   1,
   0,
   0,
   1,
   0,
   1,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   1,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   1,
   1,
   0,
   1,
   0,
   1,
   1,
   0,
   1,
   1,
   0,
   1,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   1,
   1,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   1,
   0,
   1,
   0,
   0,
   1,
   1,
   1,
   1,
   1,
   1,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   1,
   0,
   1,
   1,
   1,
   1,
   0,
   1,
   1,
   0,
   1,
   0,
   0,
   0,
   0,
   1,
   1,
   1,
   1,
   1,
   1,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   1,

In [27]:
control_probs = {"p_buy" : 0.4,
                "amount_generator": lambda: max(0.99, random.gauss(50.0, 5.0))}
treatment_probs = {"p_buy" : 0.45,
                "amount_generator": lambda: max(0.99, random.gauss(40.0, 5.0))}

In [28]:
generate_random_visit(control_probs, treatment_probs)

Visit(group='control', amount=38.64897118095759)