In [1]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/ab-testing/ab_testing.xlsx


In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, pearsonr, spearmanr, kendalltau, \
    f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest

In [4]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
     |████████████████████████████████| 242 kB 4.8 MB/s            
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9
Note: you may need to restart the kernel to use updated packages.


# BUSINESS PROBLEM

Facebook recently introduced a new bidding type, 'average bidding', as an alternative to the existing bidding type called 'maximum bidding'.

One of our customers, bombabomba.com, decided to test this new feature and would like to do an A/B test to see if average bidding converts more than maximum bidding.

Problem:

The A/B test has been going on for 1 month and bombabomba.com is now waiting for you to analyze the results of this A/B test.

Hint: The success criteria of the Bombomba.com is Purchase. 


> # About average cost bidding


Average-cost bidding allows you to tell us what you want your average cost per result to be, rather than the maximum amount you're willing to pay for any given result.

If you use this type of bidding, we'll try to get you as many results as possible while keeping your average cost per result less than or equal to your desired average. The cost for a given individual result might be more or less than that average, but over time, as you accumulate more results, the average cost for each should even out to that number or less.

For example, if you set your average cost per App Install at GBP 10, we'll show your ad to people who are likely to install your app. Sometimes we may need to spend GBP 8 to find someone to install your app, other times we may need to spend GBP 12, but over time we'll aim to have your charges average out to GBP 10 per App Install, starting from the time you set your bid to GBP 10.



> # About maximum cost bidding


Maximum cost bidding allows you to tell us the maximum amount you're willing to pay for a result.

If you use this type of bidding, we'll try to get you as many results as possible at a price equal to or less than your bid. However, in limited cases, costs may end up greater than your bid. For example, this might happen if we overestimate how many people will convert through your ad early in your campaign before we've got data on its performance and try to make up for that, or with ad sets that have smaller budgets and infrequent conversions or App Installs.

The above explanations are taken from the link below.

Referance: https://en-gb.facebook.com/business/help/447834205249495/248901692118939


# Variables

Impression : Ad views count
    
Click: Number of clicks on the displayed ad
    
Purchase: Number of products purchased after ads clicked
    
Earning: Earnings after purchased products 

In [5]:
#A treatment (or experimental) group - They'll be shown the new design (average cost bidding)

df_test_group = pd.read_excel("/kaggle/input/ab-testing/ab_testing.xlsx", sheet_name="Test Group")
df_test_group.head()

Unnamed: 0,Impression,Click,Purchase,Earning
0,120103.5038,3216.54796,702.16035,1939.61124
1,134775.94336,3635.08242,834.05429,2929.40582
2,107806.62079,3057.14356,422.93426,2526.24488
3,116445.27553,4650.47391,429.03353,2281.42857
4,145082.51684,5201.38772,749.86044,2781.69752


In [6]:
#A control group - They'll be shown the old design (maximum cost bidding)

df_control_group = pd.read_excel("/kaggle/input/ab-testing/ab_testing.xlsx", sheet_name="Control Group")
df_control_group.head()

Unnamed: 0,Impression,Click,Purchase,Earning
0,82529.45927,6090.07732,665.21125,2311.27714
1,98050.45193,3382.86179,315.08489,1742.80686
2,82696.02355,4167.96575,458.08374,1797.82745
3,109914.4004,4910.88224,487.09077,1696.22918
4,108457.76263,5987.65581,441.03405,1543.72018


In [7]:
#I added a new column called "Group" to df_control_group. Observation value of this column is "Control"

df_control_group["Group"] = "Control"
df_control_group.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group
0,82529.45927,6090.07732,665.21125,2311.27714,Control
1,98050.45193,3382.86179,315.08489,1742.80686,Control
2,82696.02355,4167.96575,458.08374,1797.82745,Control
3,109914.4004,4910.88224,487.09077,1696.22918,Control
4,108457.76263,5987.65581,441.03405,1543.72018,Control


In [8]:
#I added a new column called "Group" to df_test_group. Observation value of this column is "Test"

df_test_group["Group"] = "Test"
df_test_group.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group
0,120103.5038,3216.54796,702.16035,1939.61124,Test
1,134775.94336,3635.08242,834.05429,2929.40582,Test
2,107806.62079,3057.14356,422.93426,2526.24488,Test
3,116445.27553,4650.47391,429.03353,2281.42857,Test
4,145082.51684,5201.38772,749.86044,2781.69752,Test


In [9]:
#I merged two tables

final_df = pd.concat([df_control_group, df_test_group], ignore_index=True)
final_df.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group
0,82529.45927,6090.07732,665.21125,2311.27714,Control
1,98050.45193,3382.86179,315.08489,1742.80686,Control
2,82696.02355,4167.96575,458.08374,1797.82745,Control
3,109914.4004,4910.88224,487.09077,1696.22918,Control
4,108457.76263,5987.65581,441.03405,1543.72018,Control


In [10]:
final_df.shape

(80, 5)

In [11]:
pd.set_option('display.max_rows', None)

final_df.groupby("Group")["Purchase","Click","Impression","Earning"].agg(["mean","min","max","sum","median"]).T

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Group,Control,Test
Purchase,mean,550.89406,582.1061
Purchase,min,267.02894,311.62952
Purchase,max,801.79502,889.91046
Purchase,sum,22035.76235,23284.24387
Purchase,median,531.20631,551.35573
Click,mean,5100.65737,3967.54976
Click,min,2189.75316,1836.62986
Click,max,7959.12507,6019.69508
Click,sum,204026.2949,158701.99043
Click,median,5001.2206,3931.3598


When I look at the descriptive statistics, I see that the new design has a positive impact on impressions, earnings and purchases. But, I cannot say for sure without statistical tests whether the new design has a positive effect on these factors.

> # A/B TESTING

Before we create a hypothesis test, let's do the assumption checks.

These assumptions are Normality and Variance Homogeneity:

> # Assumption of the Normality 

H0: Data follow a normal distribution.

H1: Data do not follow a normal distribution
    
If the p-value is less than 0.05, H0 is rejected. This means that data do not follow a normal distribution. 

We use the Shapiro Test to check the Normality Assumption.  


> Let's test whether the purchase variable follows a normality distribution on the control and test group 

In [12]:
test_stat, pvalue = shapiro(final_df.loc[final_df["Group"] == "Control", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 0.9773, p-value = 0.5891


In [13]:
test_stat, pvalue = shapiro(final_df.loc[final_df["Group"] == "Test", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 0.9589, p-value = 0.1541


H0 accepted. The purchase variable follows a normality distribution on the control and test group. 

> # Assumption of The Variance Homogeneity

H0: Variances are homogeneous 
    
H1: Variances are heterogenous
    
If the p-value is less than 0.05, H0 is rejected. This means that variances are heterogenous.

We use the Levene Test to check the assumption of the homogeneity of the variances.  

> Let's test the homogeneity of the variances for purchase variable on the control and test group 

In [14]:
test_stat, pvalue = levene(final_df.loc[final_df["Group"] == "Test", "Purchase"],
                           final_df.loc[final_df["Group"] == "Control", "Purchase"])

print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 2.6393, p-value = 0.1083


H0 accepted. Variances are homogeneity for the purchase variable. 

> # Independent T-Test for Two Samples

The independent t-test, also called the two sample t-test, independent-samples t-test or student's t-test, is an inferential statistical test that determines whether there is a statistically significant difference between the means in two unrelated groups.

> Assumptions: 
    
1.) Data in each group must be obtained via a random sample from the population. 

2.) Data in each group are normally distributed. 

3.) Data values are continuous. 

4.) The variances for the two independent groups are equal.

> Null and alternative hypotheses for the independent t-test:
    
H0: u1 = u2
    
There is not a statistically significant difference between the means in two unrelated groups.

H1: u1 ≠ u2
    
There is a statistically significant difference between the means in two unrelated groups.

> We will use an independent t-test for two samples for the purchase variable. Because it follows a normal distribution and variances are homogeneity for this variable. 

In [15]:
test_stat, pvalue = ttest_ind(final_df.loc[final_df["Group"] == "Test", "Purchase"],
                              final_df.loc[final_df["Group"] == "Control", "Purchase"],
                              equal_var=True)

print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 0.9416, p-value = 0.3493


When we look at the p-value of the test result for purchase variable, we see that p-value greater than 0.05. This means that there is not a statistically significant difference between the means in the control group and test group. However, when we looked at the descriptive statistics of the purchase variable, we saw that the new design had a positive effect on purchasing. So we found this result by chance. :) 

> Notes: 

1.) If the data does not follow a normality distribution, we use the Mann-Whitney U test.

2.) If the data follow a normality distribution but variances are not homogeneity, we use the Weltch test. 



Let's functionalize all the above operations.

In [16]:

def AB_Test(dataframe, group, target):

    # Necessary packages
    from scipy.stats import shapiro
    import scipy.stats as stats

    # # Split A/B
    control = dataframe[dataframe[group] == "Control"][target] #Old Design
    test = dataframe[dataframe[group] == "Test"][target] #New Desing

    # Assumption of the Normality 
    normality_control = shapiro(control)[1] < 0.05
    normality_test = shapiro(test)[1] < 0.05

    # H0: Data follow a normal distribution.- False
    # H1: Data do not follow a normal distribution. - True

    if (normality_control == False) & (normality_test == False):  # "H0: Data follow a normal distribution
        # Parametric Test
        # Assumption: Homogeneity of variances

        leveneTest = stats.levene(control, test)[1] < 0.05
        # H0: Homogeneity: False
        # H1: Heterogeneous: True

        if leveneTest == False:
            # Homogeneity
            ttest = stats.ttest_ind(control, test, equal_var=True)[1] # Attention! equal_var=True
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
        else:
            # Heterogeneous
            ttest = stats.ttest_ind(control, test, equal_var=False)[1] #Attention! equal_var=False
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
    else:
        # Non-Parametric Test
        ttest = stats.mannwhitneyu(control, test)[1]
        # H0: M1 == M2 - False
        # H1: M1 != M2 - True

    # Result
    temp = pd.DataFrame({
        "AB Hypothesis": [ttest < 0.05],
        "p-value": [ttest]
    })
    temp["Test Type"] = np.where((normality_control == False) & (normality_test == False), "Parametric", "Non-Parametric")
    temp["AB Hypothesis"] = np.where(temp["AB Hypothesis"] == False, "Fail to Reject H0", "Reject H0")
    temp["Comment"] = np.where(temp["AB Hypothesis"] == "Fail to Reject H0", "A/B groups are similar!",
                               "A/B groups are not similar!")

    # Columns
    if (normality_control == False) & (normality_test == False):
        temp["Homogeneity"] = np.where(leveneTest == False, "Yes", "No")
        temp = temp[["Test Type", "Homogeneity", "AB Hypothesis", "p-value", "Comment"]]
    else:
        temp = temp[["Test Type", "AB Hypothesis", "p-value", "Comment"]]

    # Print Hypothesis
    print("# A/B Testing Hypothesis")
    print("H0: A == B")
    print("H1: A != B", "\n")

    return temp


# > Application Step


> # 1.FOR PURCHASE 

In [17]:
AB_Test(dataframe=final_df, group="Group", target="Purchase")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,Yes,Fail to Reject H0,0.34933,A/B groups are similar!


> # 2.FOR CLICK

In [18]:
AB_Test(dataframe=final_df, group="Group", target="Click")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,No,Reject H0,3e-05,A/B groups are not similar!


> # 3.FOR IMPRESSION

In [19]:
AB_Test(dataframe=final_df, group="Group", target="Impression")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,Yes,Reject H0,5e-05,A/B groups are not similar!


> #  4.FOR EARNING

In [20]:
AB_Test(dataframe=final_df, group="Group", target="Earning")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,Yes,Reject H0,0.0,A/B groups are not similar!


Let's create other metrics!!

> #  5.click_per_impression

In [21]:
final_df["click_per_impression"] = final_df["Click"] / final_df["Impression"]
final_df.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group,click_per_impression
0,82529.45927,6090.07732,665.21125,2311.27714,Control,0.07379
1,98050.45193,3382.86179,315.08489,1742.80686,Control,0.0345
2,82696.02355,4167.96575,458.08374,1797.82745,Control,0.0504
3,109914.4004,4910.88224,487.09077,1696.22918,Control,0.04468
4,108457.76263,5987.65581,441.03405,1543.72018,Control,0.05521


In [22]:
final_df.groupby("Group")["click_per_impression"].agg(["mean","min","max","sum","median"])

Unnamed: 0_level_0,mean,min,max,sum,median
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Control,0.05362,0.02076,0.16207,2.14473,0.0488
Test,0.03418,0.01473,0.07575,1.36704,0.03136


In [23]:
AB_Test(dataframe=final_df, group="Group", target="click_per_impression")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,AB Hypothesis,p-value,Comment
0,Non-Parametric,Reject H0,0.0,A/B groups are not similar!


> #  6.purchases_per_impression

In [24]:
final_df["purchases_per_impression"] = final_df["Purchase"] / final_df["Impression"]
final_df.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group,click_per_impression,purchases_per_impression
0,82529.45927,6090.07732,665.21125,2311.27714,Control,0.07379,0.00806
1,98050.45193,3382.86179,315.08489,1742.80686,Control,0.0345,0.00321
2,82696.02355,4167.96575,458.08374,1797.82745,Control,0.0504,0.00554
3,109914.4004,4910.88224,487.09077,1696.22918,Control,0.04468,0.00443
4,108457.76263,5987.65581,441.03405,1543.72018,Control,0.05521,0.00407


In [25]:
final_df.groupby("Group")["purchases_per_impression"].agg(["mean","min","max","sum","median"])

Unnamed: 0_level_0,mean,min,max,sum,median
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Control,0.00558,0.00223,0.00891,0.2232,0.00544
Test,0.00492,0.00227,0.00924,0.19688,0.00483


In [26]:
AB_Test(dataframe=final_df, group="Group", target="purchases_per_impression")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,Homogeneity,AB Hypothesis,p-value,Comment
0,Parametric,Yes,Fail to Reject H0,0.05571,A/B groups are similar!


> #  7.purchases_per_impression

In [27]:
final_df["purchases_per_click"] = final_df["Purchase"] / final_df["Click"]
final_df.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Group,click_per_impression,purchases_per_impression,purchases_per_click
0,82529.45927,6090.07732,665.21125,2311.27714,Control,0.07379,0.00806,0.10923
1,98050.45193,3382.86179,315.08489,1742.80686,Control,0.0345,0.00321,0.09314
2,82696.02355,4167.96575,458.08374,1797.82745,Control,0.0504,0.00554,0.10991
3,109914.4004,4910.88224,487.09077,1696.22918,Control,0.04468,0.00443,0.09919
4,108457.76263,5987.65581,441.03405,1543.72018,Control,0.05521,0.00407,0.07366


In [28]:
final_df.groupby("Group")["purchases_per_click"].agg(["mean","min","max","sum","median"])

Unnamed: 0_level_0,mean,min,max,sum,median
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Control,0.11593,0.0404,0.30436,4.63702,0.10957
Test,0.15657,0.06365,0.44789,6.26265,0.14618


In [29]:
AB_Test(dataframe=final_df, group="Group", target="purchases_per_click")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,AB Hypothesis,p-value,Comment
0,Non-Parametric,Reject H0,0.00105,A/B groups are not similar!


# Should the company switch to the new design?

 # > Results

* Purchases haven't changed with the new design.

* The conversion rate hasn't changed with the new design. 

* A significant difference was observed between the two groups in terms of purchases per click, clicks per impression, purchases per impression, clicks, impressions, and earnings.


> We need more data to decide to replace the new design.