## Import Library

In [226]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import scipy
from scipy import stats
from scipy.stats import chisquare
from statsmodels.stats import proportion
from statsmodels.stats.proportion import confint_proportions_2indep


## Import Data

In [227]:
data = pd.read_csv("/kaggle/input/ad-ab-testing/AdSmartABdata - AdSmartABdata.csv")

In [228]:
data.head()

Unnamed: 0,auction_id,experiment,date,hour,device_make,platform_os,browser,yes,no
0,0008ef63-77a7-448b-bd1e-075f42c55e39,exposed,2020-07-10,8,Generic Smartphone,6,Chrome Mobile,0,0
1,000eabc5-17ce-4137-8efe-44734d914446,exposed,2020-07-07,10,Generic Smartphone,6,Chrome Mobile,0,0
2,0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,2020-07-05,2,E5823,6,Chrome Mobile WebView,0,1
3,00187412-2932-4542-a8ef-3633901c98d9,control,2020-07-03,15,Samsung SM-A705FN,6,Facebook,0,0
4,001a7785-d3fe-4e11-a344-c8735acacc2c,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0


## Data Quality

### Missing Value

In [229]:
data.isna().sum()

auction_id     0
experiment     0
date           0
hour           0
device_make    0
platform_os    0
browser        0
yes            0
no             0
dtype: int64

There are no missing value in this data.

### Uniqueness

In [230]:
data.duplicated(['auction_id']).sum()

0

There are no duplicate in this data.

### Invalid Values

In [231]:
unique_experiment = data['experiment'].unique()
print('unique experiment:', sorted(unique_experiment))

unique experiment: ['control', 'exposed']


In [232]:
unique_date = data['date'].unique()
print('unique date:', sorted(unique_date))

unique date: ['2020-07-03', '2020-07-04', '2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08', '2020-07-09', '2020-07-10']


In [233]:
unique_hour = data['hour'].unique()
print('unique hour:', sorted(unique_hour))

unique hour: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]


In [234]:
unique_os = data['platform_os'].unique()
print('unique os:', unique_os)

unique os: [6 5 7]


In [235]:
unique_device = data['device_make'].unique()
print('unique device:', sorted(unique_device))

unique device: ['5008Y_EEA', '5099Y', '6039Y', 'A0001', 'ALE-L21', 'ANE-LX1', 'ATU-L11', 'Armor_3', 'Asus I01WD', 'BBB100-2', 'BBF100-1', 'BKL-L09', 'BLA-L09', 'C2105', 'C6903', 'CLT-L09', 'CLT-L29', 'COL-L29', 'COR-L29', 'CPH1851', 'CUBOT_X18_Plus', 'D5503', 'D5803', 'D6503', 'D6603', 'DLI-L22', 'DUA-L22', 'DUB-LX1', 'Doro 8030', 'E5823', 'E6653', 'ELE-L09', 'ELE-L29', 'EML-L09', 'EML-L29', 'EVA-L09', 'EVR-N29', 'F3111', 'F3311', 'F5321', 'F8331', 'FIG-LX1', 'FRD-L09', 'G3121', 'G3221', 'G3311', 'Generic Smartphone', 'H3113', 'H3311', 'HTC 10', 'HTC Desire $2', 'HTC M10h', 'HTC One', 'HTC One $2', 'HTC U11', 'HTC U11 $2', 'HTC U12+', 'HUAWEI $2', 'Huawei Browser', 'I3312', 'IMO $2', 'K7', 'LDN-L01', 'LG-$2', 'LYA-L09', 'LYA-L29', 'Lenovo A1010a20', 'Lenovo A7020a48', 'Lenovo P2a42', 'Lumia 950', 'MAR-LX1A', 'MHA-L09', 'MRD-LX1', 'Moto $2', 'Moto$2', 'NEM-L51', 'Nexus 5', 'Nokia undefined$2$3', 'Nokia$2$3', 'OnePlus ONE A2003', 'OnePlus ONE E1003', 'OnePlus ONEPLUS A3000', 'OnePlus ONE

In [236]:
unique_browser = data['browser'].unique()
print('unique browser:', sorted(unique_browser))

unique browser: ['Android', 'Chrome', 'Chrome Mobile', 'Chrome Mobile WebView', 'Chrome Mobile iOS', 'Edge Mobile', 'Facebook', 'Firefox Mobile', 'Mobile Safari', 'Mobile Safari UI/WKWebView', 'Opera Mini', 'Opera Mobile', 'Pinterest', 'Puffin', 'Samsung Internet']


In [237]:
unique_yes = data['yes'].unique()
print('unique yes:', unique_yes)

unique yes: [0 1]


In [238]:
unique_no = data['no'].unique()
print('unique no:', unique_no)

unique no: [0 1]


### Sample Ratio Mismatch

In [239]:
count_user = data.groupby(['experiment'])['auction_id'].count()
count_user = pd.DataFrame(count_user)
count_user['percentage'] = count_user/count_user['auction_id'].sum()
count_user.columns = ['user', 'percentage']
count_user

Unnamed: 0_level_0,user,percentage
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1
control,4071,0.504024
exposed,4006,0.495976


#### Chi Square Test

**1. Define the null and alternative hypothesis ($H_0$ and $H_1$)**

$H_0$  : No SRM detected

$H_1$  : SRM detected

**2. Calculate chi-square statistics**

$$ \chi^2 = \sum \frac{\left ( \text{observed - expected} \right )^2}{\text{expected}} $$

Where :
- Observed: the control and variation traffic volumes (sample size), respectively
- Expected: the expected values for control and treatment — i.e. the total observed divided by 2

In [240]:
observed = count_user['user']
total_traffic= sum(observed)
expected = [ total_traffic/2, total_traffic/2 ]

In [241]:
#calculate chi-square statistics
chi = chisquare(observed, f_exp=expected)
print(chi)

Power_divergenceResult(statistic=0.5230902562832735, pvalue=0.4695264353014863)


**3. Define decision rules**

In making statistical test decisions, we can use:
- Comparison of chi-square statistics with critical value
     -  $\chi^2 > \chi^2_{\alpha,df}$ → reject $H_0$

- Comparison of p-value with alpha
   - pvalue < $\alpha$ → reject $H_0$


Normally, one would look for a p-value of 0.05 or less to proof of SRM. The problem with 0.05 is that it’s not strict enough for our purposes. Using this might give us a false signal. What we need is to be stricter for our test. So we use significance level 1%.

degree of freedom (df) is calculated as: 
$$ df = (rows − 1) × (columns − 1) $$ 


In [242]:
# Comparison of chi-square statistics with critical value
# We must calculate the critical first

# critical value is the chi-square value at alpha
alpha = 0.01
df=(2-1)*(2-1)  

chi_critical = scipy.stats.chi2.ppf(1 - alpha, df)
print(f"Critical value: {chi_critical:.3f}")

Critical value: 6.635


In [243]:
#Make decisions from chi-square statistics and critical value
if chi[0] > chi_critical:
  print("Reject H0 : SRM may be present.")
else:
  print("Fail to reject H0 : No SRM")

Fail to reject H0 : No SRM


From the Chi-Square test,we can conclude that there are no Sample Ratio Mismatch.

## Analyzing Data

In [244]:
def split_data(data):
    """
    Function to split data into variant control & treatment
    :param data: <pandas DataFrame> sample data
    :return control_df: <pandas DataFrame> data control
    :return treatment_df: <pandas DataFrame> data treatment
    """
    data = data.copy()

    control_df = data[data["experiment"] == "control"]
    treatment_df = data[data["experiment"] == "exposed"]

    return control_df, treatment_df


In [245]:
def extract_evaluation(data):
    """
    Extract key evaluation criterion within the data
    - number of session
    - number of conversion
    - conversion rate
    :param data: <pandas DataFrame> sample data
    :return results: <dict> contain number of sess., number of conv., & conversion rate
    """
    # Number of session
    n_session = int(data.shape[0])

    # Number of conversion
    n_conversion = int(data["yes"].sum())

    # Conversion rate (in percent)
    conversion_rate = (n_conversion/n_session) * 100

    # Return results
    results = {}
    results["session"] = n_session
    results["conversion"] = n_conversion
    results["CVR (%)"] = np.round(conversion_rate, 2)

    return results


In [257]:
def calculate_pval(control_res, treatment_res, value=0, alternative='two-sided'):
    """
    Calculate the p-value from two-sided hypothesis test
    :param control_res: <dict> control group summary
    :param treatment_res: <dict> treatment group summary
    :param value: <int> the different between treatment and control proportion
    :param alternative: <str> The alternative hypothesis can be either two-sided or 
     one of the one- sided tests, smaller means that the alternative hypothesis 
     is prop < value and larger means prop > value
    :return pval: <float> p-value
    """
    # Group results
    counts = [treatment_res["conversion"], control_res["conversion"]]
    obs = [treatment_res["session"], control_res["session"]]

    _, pval = proportion.proportions_ztest(count = counts,
                                           nobs = obs,
                                           value = value,
                                           alternative = alternative)
    
    return pval


In [247]:
def calculate_ci(control_res, treatment_res):
    """
    Calculate the p-value from two-sided hypothesis test
    :param control_res: <dict> control group summary
    :param treatment_res: <dict> treatment group summary
    :return pval: <float> p-value
    """
    # Group results
#     counts = [, ]
#     obs = [, ]

    ci = confint_proportions_2indep(count1 = treatment_res["conversion"], nobs1 = treatment_res["session"],
                                                 count2 = control_res["conversion"], nobs2 = control_res["session"], 
                                                 compare='diff', alpha=0.05) 
    return np.round(ci,4)


In [248]:
def summarize_data(data, value= 0, alternative='two-sided'):
    """
    Function to create experimentation summary
    :param data: <pandas DataFrame> sample data
    :param value: <int> the different between treatment and control proportion
    :param alternative: <str> The alternative hypothesis can be either two-sided or 
     one of the one- sided tests, smaller means that the alternative hypothesis 
     is prop < value and larger means prop > value
    :return summary_data: <pandas DataFrame> summary data
    """
    # 1. Split the data
    control_df, treatment_df = split_data(data = data)

    # 2. Calculate evaluation criterion
    control_res = extract_evaluation(data = control_df)
    treatment_res = extract_evaluation(data = treatment_df)
    
    # 3. Calculate lift over baseline (uplift) -- absolute in percent
    lift = treatment_res["CVR (%)"] - control_res["CVR (%)"]
    control_res["lift"] = "---"
    treatment_res["lift"] = np.round(lift, 2)

    # 4. Calculate the p-value
    pval = calculate_pval(control_res = control_res,
                          treatment_res = treatment_res,
                          value = value,
                          alternative = alternative)
    control_res["p-value"] = "---"
    treatment_res["p-value"] = np.round(pval, 2)
    
    # 5. Calculate the Confidence Interval
    ci= calculate_ci(control_res = control_res,
                     treatment_res = treatment_res)
    control_res["confidence interval (%)"] = "---"
    treatment_res["confidence interval (%)"] = ci*100
    
    # 6. Generate dataframe
    summary_df = pd.DataFrame(data = [control_res,
                                      treatment_res],
                              index = ["control", "treatment"])

    return summary_df

In [249]:
summarize_data(data = data,value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,4071,264,6.48,---,---,---
treatment,4006,308,7.69,1.21,0.92,"[0.08, 2.33]"


#### Breakdown by User Platform OS

In [250]:
summarize_data(data = data[data.platform_os==6],value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,3763,260,6.91,---,---,---
treatment,3885,307,7.9,0.99,0.95,"[-0.18, 2.17]"


In [251]:
summarize_data(data = data[data.platform_os==5],value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,308,4,1.3,---,---,---
treatment,120,1,0.83,-0.47,0.98,"[-2.5700000000000003, 3.35]"


#### Breakdown by User Device

In [252]:
data.groupby(['device_make','experiment'])['yes'].agg(['count','sum']).sort_values('count')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,sum
device_make,experiment,Unnamed: 2_level_1,Unnamed: 3_level_1
5008Y_EEA,control,1,0
Samsung SM-A750GN,exposed,1,0
Samsung SM-A805F,exposed,1,0
Samsung SM-A908B,control,1,0
Samsung SM-C9000,control,1,0
...,...,...,...
Samsung SM-G960F,control,104,11
iPhone,exposed,122,1
iPhone,control,311,4
Generic Smartphone,exposed,2332,182


In [253]:
data.device_make.value_counts()

Generic Smartphone     4743
iPhone                  433
Samsung SM-G960F        203
Samsung SM-G973F        154
Samsung SM-G950F        148
                       ... 
D5803                     1
Samsung SM-G6100          1
HTC M10h                  1
Samsung SM-G925I          1
XiaoMi Redmi Note 5       1
Name: device_make, Length: 269, dtype: int64

In [254]:
summarize_data(data = data[data.device_make=='Generic Smartphone'],value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,2411,139,5.77,---,---,---
treatment,2332,182,7.8,2.03,0.48,"[0.61, 3.4799999999999995]"


In [255]:
summarize_data(data = data[data.device_make=='iPhone'],value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,311,4,1.29,---,---,---
treatment,122,1,0.82,-0.47,0.98,"[-2.55, 3.29]"


In [256]:
summarize_data(data = data[data.device_make=='Samsung SM-G960F'],value = 0.02, alternative = 'larger')

Unnamed: 0,session,conversion,CVR (%),lift,p-value,confidence interval (%)
control,104,11,10.58,---,---,---
treatment,99,10,10.1,-0.48,0.72,"[-9.13, 8.309999999999999]"
