In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from datetime import date

In [10]:
def create_group_generator(metrics, sample_size, n_iter):
    """Генератор случайных групп.

    :param metrics (pd.DataFame): таблица с метриками, columns=['user_id', 'metric'].
    :param sample_size (int): размер групп (количество пользователей в группе).
    :param n_iter (int): количество итераций генерирования случайных групп.
    :return (np.array, np.array): два массива со значениями метрик в группах.
    """
    user_ids = metrics['user_id'].unique()
    for _ in range(n_iter):
        a_user_ids, b_user_ids = np.random.choice(user_ids, (2, sample_size), False)
        a_metric_values = metrics.loc[metrics['user_id'].isin(a_user_ids), 'metric'].values
        b_metric_values = metrics.loc[metrics['user_id'].isin(b_user_ids), 'metric'].values
        yield a_metric_values, b_metric_values

metrics = pd.DataFrame({'user_id': [1, 2, 3, 4], 'metric': [5, 6, 8, 9.1] })
sample_size = 2
n_iter = 3
group_generator = create_group_generator(metrics, sample_size, n_iter)

In [35]:
from scipy.stats import norm, ttest_ind

def estimate_errors(group_generator, effect_add_type, effect, alpha):
    
    pvalues_aa = []        # список со значениями pvalue
    pvalues_ab = []        # список со значениями pvalue
    first_type_error = 0   # оценкa вероятности ошибки I рода
    second_type_error = 0  # оценкa вероятности ошибки II рода
    
    """
        group_generator : генератор значений метрик для двух групп.
        effect_add_type : способ добавления эффекта для группы B.
        effect          : размер эффекта в процентах.
        alpha           : уровень значимости
    """
    
    for control_values, test_values in group_generator:
        
        _, local_aa_pvalue = ttest_ind(test_values, control_values)
        pvalues_aa.append(round(local_aa_pvalue, 3))
        if local_aa_pvalue <= alpha:
            first_type_error += 1
        
        if effect_add_type == 'all_const':
            test_values += (test_values.mean() * effect / 100)
        elif effect_add_type == 'all_percent':
            test_values *= (1 + effect / 100)
                
        _, local_ab_pvalue = ttest_ind(test_values, control_values)
        pvalues_ab.append(round(local_ab_pvalue, 3))
        if local_ab_pvalue <= alpha:
            second_type_error += 1
            
    first_type_error /= len(pvalues_aa)
    first_type_error = 1 - first_type_error
        
    second_type_error /= len(pvalues_ab)
    second_type_error = 1 - second_type_error
        
    return pvalues_aa, pvalues_ab, first_type_error, second_type_error

In [33]:
sample_size, n_iter, effect, alpha = 100, 10, 6, 0.05

group_generator = (
    (np.arange(sample_size, dtype=float), np.arange(sample_size, dtype=float) + x,)
    for x in range(n_iter)
)
effect_add_type = 'all_const'
pvalues_aa, pvalues_ab, first_type_error, second_type_error = estimate_errors(
    group_generator, effect_add_type, effect, alpha
)
# pvalues_aa = [1.0, 0.808, 0.626, 0.466, 0.331, 0.224, 0.145, 0.09, 0.053, 0.029]
# pvalues_ab = [0.47, 0.327, 0.216, 0.135, 0.08, 0.045, 0.024, 0.012, 0.006, 0.003]
# first_type_error = 0.1
# second_type_error = 0.5

print(pvalues_aa)
print(pvalues_ab)
print(first_type_error)
print(second_type_error)

[1.0, 0.808, 0.626, 0.466, 0.331, 0.224, 0.145, 0.09, 0.053, 0.029]
[0.47, 0.327, 0.216, 0.135, 0.08, 0.045, 0.024, 0.012, 0.006, 0.003]
0
0.5


In [34]:
sample_size, n_iter, effect, alpha = 100, 10, 6, 0.05

group_generator = (
    (np.arange(sample_size, dtype=float), np.arange(sample_size, dtype=float) + x,)
    for x in range(n_iter)
)
effect_add_type = 'all_percent'
pvalues_aa, pvalues_ab, first_type_error, second_type_error = estimate_errors(
    group_generator, effect_add_type, effect, alpha
)
# pvalues_aa = [1.0, 0.808, 0.626, 0.466, 0.331, 0.224, 0.145, 0.09, 0.053, 0.029]
# pvalues_ab = [0.483, 0.342, 0.23, 0.147, 0.09, 0.052, 0.028, 0.015, 0.007, 0.003]
# first_type_error = 0.1
# second_type_error = 0.6

print(pvalues_aa)
print(pvalues_ab)
print(first_type_error)
print(second_type_error)

[1.0, 0.808, 0.626, 0.466, 0.331, 0.224, 0.145, 0.09, 0.053, 0.029]
[0.483, 0.342, 0.23, 0.147, 0.09, 0.052, 0.028, 0.015, 0.007, 0.003]
0
0.6
