In [1]:
# Comparison of Bidding Methods Conversion with AB Test

In [2]:
# Business Problem

In [3]:
# Facebook recently launched an alternative to the existing "maximumbidding" type of bidding
# introduced a new type of bidding, "average bidding". One of our clients is bombabomba.com,
# decided to test this new feature and found thataveragebidding converts more than maximumbidding
# He wants to do an A/B test to see if it brings 
# A/B testing has been going on for 1 month and
# bombabomba.com is now waiting for you to analyze the results of this A/B test for Bombabomba.com
# the ultimate measure of success is Purchase. Therefore, for statistical tests, the focus should be on the Purchasemetric.

In [4]:
# Dataset Story

In [5]:
# What users see and click on in this data set containing a company's website information
# includes information such as the number of advertisements, as well as information on earnings from them.
#There are two separate data sets: Control and Test group.These data sets are located on separate sheets ofab_testing.xlsxexcel.
# Maximum Bidding was applied to the control group and Average Bidding to the test group.

# impression: Number of ad views
# Click: Number of clicks on the displayed ad
# Purchase Number of products purchased after clicked ads
# Earning: Earnings from purchased products

In [6]:
!pip install statsmodels



In [7]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
import warnings
warnings.filterwarnings("ignore")

In [95]:
data_control = pd.read_csv("ab_testing_control.csv", sep = ";")

In [96]:
data_control.head()

Unnamed: 0,Impression,Click,Purchase,Earning,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,82529,6090,665,2311,,,,,,,,,,
1,98050,3383,315,1743,,,,,,,,,,
2,82696,4168,458,1798,,,,,,,,,,
3,109914,4911,487,1696,,,,,,,,,,
4,108458,5988,441,1544,,,,,,,,,,


In [97]:
data_control= data_control.drop(columns=['Unnamed: 4', 'Unnamed: 5','Unnamed: 6','Unnamed: 7','Unnamed: 8',
                         'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])

In [98]:
df_control= data_control.copy()

In [99]:
df_control.head()

Unnamed: 0,Impression,Click,Purchase,Earning
0,82529,6090,665,2311
1,98050,3383,315,1743
2,82696,4168,458,1798
3,109914,4911,487,1696
4,108458,5988,441,1544


In [100]:
data_test = pd.read_csv("ab_testing_test.csv", sep = ";")

In [101]:
data_test= data_test.drop(columns=['Unnamed: 4', 'Unnamed: 5','Unnamed: 6','Unnamed: 7','Unnamed: 8',
                         'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])

In [102]:
df_test= data_test.copy()

In [103]:
df_test.head()

Unnamed: 0,Impression,Click,Purchase,Earning
0,120104,3217,702,1940
1,134776,3635,834,2929
2,107807,3057,423,2526
3,116445,4650,429,2281
4,145083,5201,750,2782


In [104]:
# Data Preparation and Analysis

In [105]:
def check_df(dataframe):
    print("-----------------------------SHAPE--------------------------")
    print(dataframe.shape)
    print("------------------------------TYPES-------------------------")
    print(dataframe.dtypes)
    print("------------------------------NULL--------------------------")
    print(dataframe.isnull().sum())
    print("------------------------------DESCRIBE----------------------")
    print(dataframe.describe().T)

In [106]:
check_df(df_control)

-----------------------------SHAPE--------------------------
(40, 4)
------------------------------TYPES-------------------------
Impression    int64
Click         int64
Purchase      int64
Earning       int64
dtype: object
------------------------------NULL--------------------------
Impression    0
Click         0
Purchase      0
Earning       0
dtype: int64
------------------------------DESCRIBE----------------------
              count         mean         std         min         25%  \
Impression 40.00000 101711.45000 20302.12298 45476.00000 85726.75000   
Click      40.00000   5100.62500  1329.95777  2190.00000  4124.25000   
Purchase   40.00000    550.90000   134.11052   267.00000   470.50000   
Earning    40.00000   1908.57500   302.86833  1254.00000  1685.75000   

                   50%          75%          max  
Impression 99790.50000 115212.50000 147539.00000  
Click       5001.50000   5923.50000   7959.00000  
Purchase     531.50000    638.00000    802.00000  
Earning     

In [107]:
check_df(df_test)

-----------------------------SHAPE--------------------------
(40, 4)
------------------------------TYPES-------------------------
Impression    int64
Click         int64
Purchase      int64
Earning       int64
dtype: object
------------------------------NULL--------------------------
Impression    0
Click         0
Purchase      0
Earning       0
dtype: int64
------------------------------DESCRIBE----------------------
              count         mean         std         min          25%  \
Impression 40.00000 120512.42500 18807.46662 79034.00000 112692.25000   
Click      40.00000   3967.55000   923.07177  1837.00000   3376.50000   
Purchase   40.00000    582.05000   161.17516   312.00000    444.75000   
Earning    40.00000   2514.92500   282.70791  1940.00000   2280.25000   

                    50%          75%          max  
Impression 119291.50000 132050.75000 158606.00000  
Click        3931.50000   4660.25000   6020.00000  
Purchase      551.00000    699.75000    890.00000  
Ear

In [108]:
df_control["group"] = "control"
df_test["group"] = "test"

In [109]:
df = pd.concat([df_control, df_test], axis = 0, ignore_index = False)

In [112]:
# Defining the Hypothesis of A/B Testing

In [113]:
# H0 : M1=M2
# There is no difference between the purchase averages of the Control group and the Test group.

# H1 : M1!=M2
# There is a difference between the purchase averages of the Control group and the Test group.

In [116]:
df.groupby("group").agg({"Purchase": "mean"})

Unnamed: 0_level_0,Purchase
group,Unnamed: 1_level_1
control,550.9
test,582.05


In [117]:
# Performing Hypothesis Testing

In [None]:
# Normality Assumption and Homogeneity of Variance check.

In [118]:
# Normality Assumption
# H0: The assumption of normal distribution is satisfied.
# H1: Assumption of normal distribution is not met.
# p < 0.05 H0 can be rejected
# p > 0.05 H0 cannot be rejected

In [122]:
test_stat, pvalue = shapiro(df.loc[df["group"] == "control", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 0.9774, p-value = 0.5929


In [123]:
# p-value > 0.05 : The H0 cannot be rejected.
# The normality assumption is satisfied.

In [124]:
#Homogeneity of Variance
# H0: The variances are homogeneous.
# H1: The variances are not homogeneous.
# p < 0.05 H0 can be rejected
# p > 0.05 H0 cannot be rejected

In [126]:
test_stat, pvalue = levene(df.loc[df["group"] == "control", "Purchase"],
                          df.loc[df["group"] == "test", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 2.6403, p-value = 0.1082


In [127]:
# p-value > 0.05 : The H0 cannot be rejected.
# The variances are homogeneous.

In [128]:
# An independent two-sample t-test (parametric test) is conducted as the assumptions are satisfied.

In [130]:
test_stat, pvalue = ttest_ind(df.loc[df["group"] == "control", "Purchase"],
                             df.loc[df["group"] == "test", "Purchase"],
                             equal_var = True)
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = -0.9396, p-value = 0.3503


In [132]:
# p-value > 0.05 : The H0 cannot be rejected.
# H0 : M1=M2
# There is no difference between the purchase averages of the Control group and the Test group.