# A/b Testing - Statistical Testing for SmartAd

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.special import binom
from scipy.stats import chi2_contingency, mannwhitneyu, norm, t, ttest_ind

%matplotlib inline

In [3]:
import os, sys
import seaborn as sns

In [4]:

path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)
sys.path.insert(0, path_parent+'/notebooks')
sys.path.insert(0, path_parent+'/scripts')

In [5]:
df = pd.read_csv('data/AdSmartABdata.csv')

In [6]:
df.head()

Unnamed: 0,auction_id,experiment,date,hour,device_make,platform_os,browser,yes,no
0,0008ef63-77a7-448b-bd1e-075f42c55e39,exposed,2020-07-10,8,Generic Smartphone,6,Chrome Mobile,0,0
1,000eabc5-17ce-4137-8efe-44734d914446,exposed,2020-07-07,10,Generic Smartphone,6,Chrome Mobile,0,0
2,0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,2020-07-05,2,E5823,6,Chrome Mobile WebView,0,1
3,00187412-2932-4542-a8ef-3633901c98d9,control,2020-07-03,15,Samsung SM-A705FN,6,Facebook,0,0
4,001a7785-d3fe-4e11-a344-c8735acacc2c,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0


In [7]:
df.experiment.value_counts()

control    4071
exposed    4006
Name: experiment, dtype: int64

In [8]:
df[df['yes'] == 1].experiment.value_counts()

exposed    308
control    264
Name: experiment, dtype: int64

In [9]:
df[df['no'] == 1].experiment.value_counts()

exposed    349
control    322
Name: experiment, dtype: int64

In [10]:
control = df[df['experiment'] == 'control']
exposed = df[df['experiment'] == 'exposed']

In [11]:
control

Unnamed: 0,auction_id,experiment,date,hour,device_make,platform_os,browser,yes,no
3,00187412-2932-4542-a8ef-3633901c98d9,control,2020-07-03,15,Samsung SM-A705FN,6,Facebook,0,0
4,001a7785-d3fe-4e11-a344-c8735acacc2c,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0
5,0027ce48-d3c6-4935-bb12-dfb5d5627857,control,2020-07-03,15,Samsung SM-G960F,6,Facebook,0,0
6,002e308b-1a07-49d6-8560-0fbcdcd71e4b,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0
7,00393fb9-ca32-40c0-bfcb-1bd83f319820,control,2020-07-09,5,Samsung SM-G973F,6,Facebook,0,0
...,...,...,...,...,...,...,...,...,...
8069,ffca1153-c182-4f32-9e90-2a6008417497,control,2020-07-10,16,Generic Smartphone,6,Chrome Mobile,0,1
8070,ffcea781-a6e7-4f98-9d90-f95377270476,control,2020-07-03,15,Samsung SM-N976B,6,Facebook,0,0
8073,ffea3210-2c3e-426f-a77d-0aa72e73b20f,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0
8074,ffeaa0f1-1d72-4ba9-afb4-314b3b00a7c7,control,2020-07-04,9,Generic Smartphone,6,Chrome Mobile,0,0


In [12]:
import HelperFunctions as hf

In [13]:
control_yes = control[control['yes'] == 1].yes.value_counts().values[0]
control_no = control[control['no'] == 1].no.value_counts().values[0]

In [14]:
exposed_yes = exposed[exposed['yes'] == 1].yes.value_counts().values[0]
exposed_no = exposed[exposed['no'] == 1].no.value_counts().values[0]

In [21]:
counts = pd.DataFrame(data=[[control_yes,exposed_yes], [control_no, exposed_no]], 
                 index=["Yes", "No"], 
                 columns=["Control", "Exposed"])

### contingency table

In [24]:
display(counts)
counts.values

Unnamed: 0,Control,Exposed
Yes,264,308
No,322,349


array([[264, 308],
       [322, 349]], dtype=int64)

In [25]:
chi2_val, p_val = chi2_contingency(counts, correction=False)[:2]

print("- Pearson's chi-squared t-test:")
print(f"   - χ2 value: {chi2_val:.3f}")
print(f"   - p-value: {p_val*100:.1f}%")

- Pearson's chi-squared t-test:
   - χ2 value: 0.417
   - p-value: 51.8%
