In [10]:
### WORK ISSUE ###

#Facebook recently introduced a new bidding type, 
#'average bidding', as an alternative to the existing bidding type called 'maximum bidding'.

#One of our clients, bombabomba.com, decided to test this new feature and would like to do an A/B test
#to see if average bidding converts more than maximum bidding.

#The A/B test has been going on for 1 month and bombabomba.com is now waiting for you to analyze 
#the results of this A/B test. The ultimate success criterion for Bombabomba.com is Purchase. 
#Therefore, the focus should be on the Purchase metric for statistical testing.



In [9]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
!pip install statsmodels
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, \
    pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
pd.set_option('display.float_format', lambda x: '%.5f' % x)



In [3]:
df_control = pd.read_excel(r"C:\Users\PC\Desktop\Miuul_VBO\6. Hafta\Odev_Dataset\ab_testing.xlsx" , sheet_name="Control Group")
df_test = pd.read_excel(r"C:\Users\PC\Desktop\Miuul_VBO\6. Hafta\Odev_Dataset\ab_testing.xlsx" , sheet_name="Test Group")

In [4]:
df_control["Group"] = "Control"
df_test["Group"] = "Test"

In [5]:
df = pd.concat([df_control, df_test], axis=0,ignore_index=False)

In [6]:
df.groupby("Group")["Purchase","Click","Impression","Earning"].agg(["mean","min","max","sum","median"]).T

  df.groupby("Group")["Purchase","Click","Impression","Earning"].agg(["mean","min","max","sum","median"]).T


Unnamed: 0,Group,Control,Test
Purchase,mean,550.89406,582.10610
Purchase,min,267.02894,311.62952
Purchase,max,801.79502,889.91046
Purchase,sum,22035.76235,23284.24387
Purchase,median,531.20631,551.35573
...,...,...,...
Earning,mean,1908.56830,2514.89073
Earning,min,1253.98952,1939.61124
Earning,max,2497.29522,3171.48971
Earning,sum,76342.73199,100595.62931


In [7]:
def AB_Test(dataframe, group, target):

    # Necessary packages
    from scipy.stats import shapiro
    import scipy.stats as stats

    # # Split A/B
    control = dataframe[dataframe[group] == "Control"][target] #Old Design
    test = dataframe[dataframe[group] == "Test"][target] #New Desing

    # Assumption of the Normality 
    normality_control = shapiro(control)[1] < 0.05
    normality_test = shapiro(test)[1] < 0.05

    # H0: Data follow a normal distribution.- False
    # H1: Data do not follow a normal distribution. - True

    if (normality_control == False) & (normality_test == False):  # "H0: Data follow a normal distribution
        # Parametric Test
        # Assumption: Homogeneity of variances

        leveneTest = stats.levene(control, test)[1] < 0.05
        # H0: Homogeneity: False
        # H1: Heterogeneous: True

        if leveneTest == False:
            # Homogeneity
            ttest = stats.ttest_ind(control, test, equal_var=True)[1] # Attention! equal_var=True
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
        else:
            # Heterogeneous
            ttest = stats.ttest_ind(control, test, equal_var=False)[1] #Attention! equal_var=False
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
    else:
        # Non-Parametric Test
        ttest = stats.mannwhitneyu(control, test)[1]
        # H0: M1 == M2 - False
        # H1: M1 != M2 - True

    # Result
    temp = pd.DataFrame({
        "AB Hypothesis": [ttest < 0.05],
        "p-value": [ttest]
    })
    temp["Test Type"] = np.where((normality_control == False) & (normality_test == False), "Parametric", "Non-Parametric")
    temp["AB Hypothesis"] = np.where(temp["AB Hypothesis"] == False, "Fail to Reject H0", "Reject H0")
    temp["Comment"] = np.where(temp["AB Hypothesis"] == "Fail to Reject H0", "A/B groups are similar!",
                               "A/B groups are not similar!")

    # Columns
    if (normality_control == False) & (normality_test == False):
        temp["Homogeneity"] = np.where(leveneTest == False, "Yes", "No")
        temp = temp[["Test Type", "Homogeneity", "AB Hypothesis", "p-value", "Comment"]]
    else:
        temp = temp[["Test Type", "AB Hypothesis", "p-value", "Comment"]]

    # Print Hypothesis
    print("# A/B Testing Hypothesis")
    print("H0: A == B")
    print("H1: A != B", "\n")

    return temp

In [8]:
# for purchase
AB_Test(dataframe=df, group="Group", target="Purchase")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,Yes,Fail to Reject H0,0.34933,A/B groups are similar!
