### PLACEHOLDER
Write intro about AB tests

### CASE 1

<div class="alert alert-info">

`Case 1`: Changing the color of a button on a web page
The purpose of the A/B test is to determine which button color (the control group is the existing color, the experimental group is the new color) leads to more clicks and an increase in conversion.

</div>

In [217]:
import numpy as np
from scipy import stats
import pandas as pd


###########################################
## still need to accept several indicators
###########################################
def stratified_sampling(data, indicator, strats):

    """
    Function takes whole population in, and divides it into two equal groups, that will have similar distribution of chosen specific indicator
    """

    sorted_data = data.sort_values(by=indicator, ascending=False)
    strat_size = int(len(data) / strats)
    strats = [sorted_data.iloc[i:i+strat_size] for i in range(0, len(sorted_data), strat_size)]

    c_population = pd.DataFrame(columns=[feat for feat in strats[0].columns])
    c_population = c_population.astype(int)
    t_population = c_population.copy()


    for i in range(len(strats)):
        c_population = pd.concat([c_population, strats[i].sample(round(len(strats[i])/2))], ignore_index=True)
        
    cg_ids = list(c_population['customer_id'])
    filt = sorted_data['customer_id'].isin(cg_ids)
    t_population = sorted_data[~filt]
    
    return c_population, t_population

# create dummy data set (with clients and their parameters)
dataset = pd.DataFrame({
    "customer_id": range(1, 1000001),
    "customer_age": np.random.randint(18, 65, size=1000000),
    "customer_gender": np.random.choice(["Male", "Female"], size=1000000),
    "average_delay": np.random.randint(0, 90, size=1000000)#
})

# Divide our users into 2 identical (by chosen indicator, equal by size) groups
c_population, t_population = stratified_sampling(dataset, "average_delay", strats=20)


###########################################
## Add scalability, automatic column seeker
###########################################
class ABTest():
    def __init__(self, control_users, test_users):
        self.control_users = control_users
        self.test_users = test_users

    def calculate_p_value(self):
        _, p_value = stats.ttest_ind(self.control_users, self.test_users)
        return p_value

# dataframes: control/testing groups
control_group = pd.DataFrame({
    "customer_id": range(1, 1001),
    "customer_age": np.random.randint(18, 65, size=1000),
    "customer_gender": np.random.choice(["Male", "Female"], size=1000),
    "average_delay": np.random.randint(20, 90, size=1000)#
})

test_group = pd.DataFrame({
    "customer_id": range(1, 1001),
    "customer_age": np.random.randint(18, 65, size=1000),
    "customer_gender": np.random.choice(["Male", "Female"], size=1000),
    "average_delay": np.random.randint(0, 45, size=1000)#
})


# Run AB-test calculate p-value

test = ABTest(control_group["average_delay"], test_group["average_delay"])

p_value = test.calculate_p_value()

# Выводы
if p_value < 0.05:
    print("Success:\n{}".format(p_value))
else:
    print("Failure:\n{}".format(p_value))


Успешный тест: изменение цвета кнопки на сайте приводит к статистически значимому увеличению средней задержки:
1.2977947783785543e-290


In [218]:
display("control group:", c_population.describe())
display("test group:", t_population.describe())
display("control users:", control_group.describe())
display("test users:", test_group.describe())

'control group:'

Unnamed: 0,customer_id,customer_age,average_delay
count,500000.0,500000.0,500000.0
mean,500198.647958,41.010578,44.493774
std,288771.633373,13.549271,25.986409
min,2.0,18.0,0.0
25%,249738.75,29.0,22.0
50%,500521.5,41.0,44.0
75%,750478.0,53.0,67.0
max,999999.0,64.0,89.0


'test group:'

Unnamed: 0,customer_id,customer_age,average_delay
count,500000.0,500000.0,500000.0
mean,499802.352042,40.995118,44.496696
std,288579.045037,13.573677,25.986838
min,1.0,18.0,0.0
25%,250275.5,29.0,22.0
50%,499509.5,41.0,44.0
75%,749473.25,53.0,67.0
max,1000000.0,64.0,89.0


'control users:'

Unnamed: 0,customer_id,customer_age,average_delay
count,1000.0,1000.0,1000.0
mean,500.5,40.928,55.303
std,288.819436,13.399865,20.206571
min,1.0,18.0,20.0
25%,250.75,29.0,39.0
50%,500.5,41.0,56.0
75%,750.25,53.0,73.0
max,1000.0,64.0,89.0


'test users:'

Unnamed: 0,customer_id,customer_age,average_delay
count,1000.0,1000.0,1000.0
mean,500.5,40.794,22.348
std,288.819436,13.512222,12.954023
min,1.0,18.0,0.0
25%,250.75,29.0,11.0
50%,500.5,41.0,22.5
75%,750.25,52.0,33.25
max,1000.0,64.0,44.0
