In [1]:
import scipy.stats as sps
from tqdm.notebook import tqdm as tqdm_notebook 
from collections import defaultdict
from statsmodels.stats.proportion import proportion_confint
import numpy as np
import itertools
from scipy.stats import ttest_ind,kstest

### Пример переобучения

In [2]:
bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before = sps.expon(scale=1000).rvs(1000)
    control = control_before + sps.norm(loc=0, scale=100).rvs(1000)

    control_cup = control - (control_before - np.mean(control_before))
    std = np.std(control_cup) / np.sqrt(len(control_cup))
    mean = np.mean(control_cup)

    left_bound, right_bound = sps.norm(loc=mean, scale=std).ppf([0.025, 0.975])
    
    if left_bound > 1000 or right_bound < 1000:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 84.15% случаев; доверительный интервал: [83.42%, 84.85000000000001%]


### Если распределение известно

In [3]:
bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before = sps.expon(scale=1000).rvs(1000)
    control = control_before + sps.norm(loc=0, scale=100).rvs(1000)

    control_cup = control - (control_before - 1000)
    std = np.std(control_cup) / np.sqrt(len(control_cup))
    mean = np.mean(control_cup)

    left_bound, right_bound = sps.norm(loc=mean, scale=std).ppf([0.025, 0.975])
    
    if left_bound > 1000 or right_bound < 1000:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 5.1% случаев; доверительный интервал: [4.6899999999999995%, 5.55%]


### Допустим я не знаю параметры распределения, но у меня много данных


In [4]:
bad_cnt = 0

N = 10000
control_before_general = sps.expon(scale=1000).rvs(1000 * 1000)
for i in tqdm_notebook(range(N)):
    control_before = sps.expon(scale=1000).rvs(1000)
    control = control_before + sps.norm(loc=0, scale=100).rvs(1000)

    control_cup = control - (control_before - np.mean(control_before_general))
    std = np.std(control_cup) / np.sqrt(len(control_cup))
    mean = np.mean(control_cup)

    left_bound, right_bound = sps.norm(loc=mean, scale=std).ppf([0.025, 0.975])
    
    if left_bound > 1000 or right_bound < 1000:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 4.9399999999999995% случаев; доверительный интервал: [4.53%, 5.38%]


### Вот как будет это выглядеть в реальной жизни при самом плохом случае


In [5]:
bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before_full = sps.expon(scale=1000).rvs(1000 * 2)
    len_full = len(control_before_full)
    indexes_to_take_a = np.random.choice(range(len_full),len_full//2,False)
    indexes_to_take_b = [x for x in range(2000) if x not in indexes_to_take_a]
    
    control_before_a = control_before_full[indexes_to_take_a]
    control_before_b = control_before_full[indexes_to_take_b]
    
    control_after_a = control_before_a + sps.norm(loc=0, scale=100).rvs(len_full//2)
    control_after_b = control_before_b + sps.norm(loc=0, scale=100).rvs(len_full//2)

    control_cup_a = control_after_a - (control_before_a - np.mean(control_before_full))
    control_cup_b = control_after_b - (control_before_b - np.mean(control_before_full))

    
    if kstest(control_cup_a,control_cup_b).pvalue < 0.05:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 4.52% случаев; доверительный интервал: [4.130000000000001%, 4.9399999999999995%]


### Вот как будет это выглядеть в реальной жизни, допустим на каждую группу кидаем 5% трафика


In [6]:
bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before_full = sps.expon(scale=1000).rvs(1000 * 10)
    samples_in_ab = 2000
    indexes_to_take_a = np.random.choice(range(len_full),samples_in_ab//2,False)
    indexes_to_take_b = [x for x in range(samples_in_ab) if x not in indexes_to_take_a]
    
    control_before_a = control_before_full[indexes_to_take_a]
    control_before_b = control_before_full[indexes_to_take_b]
    
    control_after_a = control_before_a + sps.norm(loc=0, scale=100).rvs(samples_in_ab//2)
    control_after_b = control_before_b + sps.norm(loc=0, scale=100).rvs(samples_in_ab//2)

    control_cup_a = control_after_a - (control_before_a - np.mean(control_before_full))
    control_cup_b = control_after_b - (control_before_b - np.mean(control_before_full))

    
    if kstest(control_cup_a,control_cup_b).pvalue < 0.05:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 4.72% случаев; доверительный интервал: [4.32%, 5.1499999999999995%]


### Добавим Шум и посмотрим как справится Cuped


In [7]:

bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before_full = sps.expon(scale=1000).rvs(1000 * 10)
    samples_in_ab = 2000
    indexes_to_take_a = np.random.choice(range(len_full),samples_in_ab//2,False)
    indexes_to_take_b = [x for x in range(samples_in_ab) if x not in indexes_to_take_a]
    
    control_before_a = control_before_full[indexes_to_take_a]
    control_before_b = control_before_full[indexes_to_take_b]
    
    control_after_a = control_before_a + sps.norm(loc=0, scale=100).rvs(samples_in_ab//2)
    control_after_b = control_before_b + sps.norm(loc=10, scale=100).rvs(samples_in_ab//2) # добавляем немного шума

    control_cup_a = control_after_a - (control_before_a - np.mean(control_before_full))
    control_cup_b = control_after_b - (control_before_b - np.mean(control_before_full))

    
    if kstest(control_cup_a,control_cup_b).pvalue < 0.05:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 48.83% случаев; доверительный интервал: [47.85%, 49.81%]


### Тоже самое но без Cuped

In [8]:
bad_cnt = 0

N = 10000
for i in tqdm_notebook(range(N)):
    control_before_full = sps.expon(scale=1000).rvs(1000 * 10)
    samples_in_ab = 2000
    indexes_to_take_a = np.random.choice(range(len_full),samples_in_ab//2,False)
    indexes_to_take_b = [x for x in range(samples_in_ab) if x not in indexes_to_take_a]
    
    control_before_a = control_before_full[indexes_to_take_a]
    control_before_b = control_before_full[indexes_to_take_b]
    
    control_after_a = control_before_a + sps.norm(loc=0, scale=100).rvs(samples_in_ab//2)
    control_after_b = control_before_b + sps.norm(loc=10, scale=100).rvs(samples_in_ab//2) # добавляем немного шума

    control_cup_a = control_after_a #- (control_before_a - np.mean(control_before_full))
    control_cup_b = control_after_b #- (control_before_b - np.mean(control_before_full))

    
    if kstest(control_cup_a,control_cup_b).pvalue < 0.05:
        bad_cnt += 1
    
left_real_level, right_real_level = proportion_confint(count = bad_cnt, nobs = N, alpha=0.05, method='wilson')
print(f"Не попал в {round(bad_cnt / N, 4) * 100}% случаев;"
      f" доверительный интервал: [{round(left_real_level, 4) * 100}%, {round(right_real_level, 4) * 100}%]")

  0%|          | 0/10000 [00:00<?, ?it/s]

Не попал в 5.41% случаев; доверительный интервал: [4.9799999999999995%, 5.87%]
