In [1]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import scipy.stats

%matplotlib inline

In [2]:
def generate(gen, groups_count, group_size,  p0, delta):
    p0_values = gen.uniform(
        p0 - delta,
        p0 + delta,
        size=groups_count
    )
    rands = gen.uniform(size=(groups_count, group_size))
    return (
        rands < p0_values[:, np.newaxis]
    ).astype(float).reshape(groups_count * group_size)

In [6]:
def create_batched_ttest(batch_size):
    def func(data_fst, data_snd):
        return scipy.stats.mannwhitneyu(
            data_fst.reshape((len(data_fst) // batch_size, batch_size)).mean(axis=1), 
            data_snd.reshape((len(data_snd) // batch_size, batch_size)).mean(axis=1),
            alternative='two-sided',
        ).pvalue
    return func
    

def eval_hits_ratio(test, effect):
    hits = 0.
    count = 0.
    gen = np.random.RandomState(7)

    for _ in range(1000):
        data_fst = generate(gen, 200, 50, 0.7, 0.1)
        data_snd = generate(gen, 200, 50, 0.7 + effect, 0.1)

        count += 1
        if test(data_fst, data_snd) < 0.05:
            hits += 1

    return hits / count

In [7]:
for effect in [0., 0.001, 0.01, 0.02, 0.05]:
    print('effect: {}'.format(effect))
    for batch_size in [1, 20, 100, 200, 1000, 2000]:
        print('\tbatch_size: {:<12}hits: {}'.format(
            batch_size, 
            eval_hits_ratio(create_batched_ttest(batch_size), effect)
        ))

effect: 0.0
	batch_size: 1           hits: 0.139
	batch_size: 20          hits: 0.095
	batch_size: 100         hits: 0.055
	batch_size: 200         hits: 0.055
	batch_size: 1000        hits: 0.053
	batch_size: 2000        hits: 0.029
effect: 0.001
	batch_size: 1           hits: 0.144
	batch_size: 20          hits: 0.102
	batch_size: 100         hits: 0.053
	batch_size: 200         hits: 0.054
	batch_size: 1000        hits: 0.055
	batch_size: 2000        hits: 0.03
effect: 0.01
	batch_size: 1           hits: 0.365
	batch_size: 20          hits: 0.287
	batch_size: 100         hits: 0.193
	batch_size: 200         hits: 0.204
	batch_size: 1000        hits: 0.171
	batch_size: 2000        hits: 0.124
effect: 0.02
	batch_size: 1           hits: 0.816
	batch_size: 20          hits: 0.738
	batch_size: 100         hits: 0.621
	batch_size: 200         hits: 0.617
	batch_size: 1000        hits: 0.555
	batch_size: 2000        hits: 0.437
effect: 0.05
	batch_size: 1           hits: 1.0
	batch_size: 