# Classical AB Testing

## Imports

In [36]:
import numpy as np
import pandas as pd
import scipy
import scipy.stats as scs
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# from ABTesting import DfHelper


In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option("expand_frame_repr", False)
pd.set_option('display.float_format', '{:.2f}'.format)

In [3]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from df_helper import DfHelper

## Data reading

In [4]:
helper = DfHelper()
missing_values = ["n/a", "na", "undefined"]
df = helper.read_csv("../data/AdSmartABdata.csv")
df.head(5)

2021-07-21 14:26:02,153 — DfHelper — DEBUG — file read as csv


Unnamed: 0,auction_id,experiment,date,hour,device_make,platform_os,browser,yes,no
0,0008ef63-77a7-448b-bd1e-075f42c55e39,exposed,2020-07-10,8,Generic Smartphone,6,Chrome Mobile,0,0
1,000eabc5-17ce-4137-8efe-44734d914446,exposed,2020-07-07,10,Generic Smartphone,6,Chrome Mobile,0,0
2,0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,2020-07-05,2,E5823,6,Chrome Mobile WebView,0,1
3,00187412-2932-4542-a8ef-3633901c98d9,control,2020-07-03,15,Samsung SM-A705FN,6,Facebook,0,0
4,001a7785-d3fe-4e11-a344-c8735acacc2c,control,2020-07-03,15,Generic Smartphone,6,Chrome Mobile,0,0


## Data Understanding

In [5]:
df.shape

(8077, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8077 entries, 0 to 8076
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   auction_id   8077 non-null   object
 1   experiment   8077 non-null   object
 2   date         8077 non-null   object
 3   hour         8077 non-null   int64 
 4   device_make  8077 non-null   object
 5   platform_os  8077 non-null   int64 
 6   browser      8077 non-null   object
 7   yes          8077 non-null   int64 
 8   no           8077 non-null   int64 
dtypes: int64(4), object(5)
memory usage: 568.0+ KB


In [7]:
df.describe()

Unnamed: 0,hour,platform_os,yes,no
count,8077.0,8077.0,8077.0,8077.0
mean,11.62,5.95,0.07,0.08
std,5.73,0.22,0.26,0.28
min,0.0,5.0,0.0,0.0
25%,7.0,6.0,0.0,0.0
50%,13.0,6.0,0.0,0.0
75%,15.0,6.0,0.0,0.0
max,23.0,7.0,1.0,1.0


## Data Exploration

## Data Transformation

we start by selecting columns we need for ...

In [14]:
clean_df = df[['auction_id', 'experiment', 'yes', 'no']]
clean_df.shape

(8077, 4)

We start by droping user that did not participate in the questioner.

In [15]:
clean_df = clean_df.query("not (yes == 0 & no == 0)")
clean_df.shape

(1243, 4)

transform the responses into on variable

In [16]:
clean_df['aware'] = 0
clean_df.loc[clean_df['yes'] == 1, 'aware'] = 1
clean_df.loc[clean_df['yes'] == 0, 'aware'] = 0
clean_df.drop(columns=['yes', 'no'], inplace=True)
clean_df = clean_df.set_index('auction_id')
clean_df.head(5)

Unnamed: 0_level_0,experiment,aware
auction_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,0
008aafdf-deef-4482-8fec-d98e3da054da,exposed,1
00a1384a-5118-4d1b-925b-6cdada50318d,exposed,0
00b6fadb-10bd-49e3-a778-290da82f7a8d,control,1
00ebf4a8-060f-4b99-93ac-c62724399483,control,0


## Calculate conversion rates

In [17]:
summary_df = clean_df.pivot_table(values='aware', index='experiment', aggfunc='sum')
summary_df['total'] = clean_df.pivot_table( values='aware', index='experiment', aggfunc='count')
summary_df['rate'] = clean_df.pivot_table(values='aware', index='experiment')
summary_df['not aware'] = summary_df['total'] - summary_df['aware']
summary_df = summary_df.reindex(['aware', 'not aware', 'total', 'rate'], axis=1)
summary_df

Unnamed: 0_level_0,aware,not aware,total,rate
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
control,264,322,586,0.45
exposed,308,349,657,0.47


In [18]:
e_rate = summary_df['rate']['exposed']
c_rate = summary_df['rate']['control']
dif_cr = e_rate - c_rate
dif_cr

0.018285619295484168

There is around a 1.82% difference in conversion rates between the two groups. It is a good sign but this is not enough evidence for us to confidently go with the new design.

## Calculate z-score and p-value.


$$z = (x – μ) / σ$$


In [40]:
def z_score_calc(e_rate, c_rate, std_dev_e, std_dev_c):
  return (e_rate - c_rate)/np.sqrt(std_dev_e**2 + std_dev_c**2) 

In [32]:
c_count = summary_df['total']['control']
e_count = summary_df['total']['exposed']

In [33]:
std_dev_c = np.sqrt( c_rate * (1 - c_rate) / c_count)
std_dev_e = np.sqrt(e_rate * (1 - e_rate) / e_count)


In [41]:
z_score = z_score(e_rate, c_rate, std_dev_e, std_dev_c)

In [42]:
p_value = scs.norm().sf(z_score)
p_value

0.2591726745827285

In [None]:
scs.norm().sf

In [None]:
scs.norm().sf