# A/B Testing for ShoeFly.com

In [1]:
import pandas as pd

In [2]:
ad_clicks = pd.read_csv('ad_clicks.csv')
ad_clicks.head()

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B


In [4]:
#  which ad platform is getting you the most views.
ad_clicks.groupby('utm_source').user_id.count().reset_index()

Unnamed: 0,utm_source,user_id
0,email,255
1,facebook,504
2,google,680
3,twitter,215


In [5]:
# check if ad_click_timestamp is not null
ad_clicks['is_click'] = ad_clicks.ad_click_timestamp.isnull()
ad_clicks.head()

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group,is_click
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A,False
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B,True
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A,True
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B,True
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B,True


In [7]:
# We want to know the percent of people who clicked on ads from each utm_source.
clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()
clicks_by_source

Unnamed: 0,utm_source,is_click,user_id
0,email,False,80
1,email,True,175
2,facebook,False,180
3,facebook,True,324
4,google,False,239
5,google,True,441
6,twitter,False,66
7,twitter,True,149


In [8]:
# create a pivot data
clicks_pivot = clicks_by_source.pivot(index='utm_source', columns='is_click', values='user_id').reset_index()
clicks_pivot

is_click,utm_source,False,True
0,email,80,175
1,facebook,180,324
2,google,239,441
3,twitter,66,149


In [10]:
clicks_pivot['percent_clicked'] = clicks_pivot[True] / (clicks_pivot[True] + clicks_pivot[False])
clicks_pivot

is_click,utm_source,False,True,percent_clicked
0,email,80,175,0.686275
1,facebook,180,324,0.642857
2,google,239,441,0.648529
3,twitter,66,149,0.693023


## Analyzing an A/B Test


In [11]:
# Were approximately the same number of people shown both adds?
ad_clicks.groupby('experimental_group').user_id.count().reset_index()

Unnamed: 0,experimental_group,user_id
0,A,827
1,B,827


In [15]:
# Using the column is_click that we defined earlier, check to see if a greater percentage of users clicked on Ad A or Ad B.
ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index()\
    .pivot(index='experimental_group', columns='is_click', values='user_id').reset_index()

is_click,experimental_group,False,True
0,A,310,517
1,B,255,572


Start by creating two DataFrames: a_clicks and b_clicks, which contain only the results for A group and B group, respectively.

In [17]:
a_clicks = ad_clicks[ad_clicks.experimental_group=='A']
b_clicks = ad_clicks[ad_clicks.experimental_group=='B']
a_clicks.head()

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group,is_click
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A,False
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A,True
5,013b0072-7b72-40e7-b698-98b4d0c9967f,facebook,1 - Monday,,A,True
6,0153d85b-7660-4c39-92eb-1e1acd023280,google,4 - Thursday,,A,True
7,01555297-d6e6-49ae-aeba-1b196fdbb09f,google,3 - Wednesday,,A,True


In [21]:
a_clicks_pivot = a_clicks.groupby(['is_click', 'day']).user_id.count().reset_index()\
    .pivot(index='day', columns='is_click', values='user_id').reset_index()
a_clicks_pivot['percent_clicked'] = a_clicks_pivot[True] / a_clicks_pivot[True] + a_clicks_pivot[False]
a_clicks_pivot

is_click,day,False,True,percent_clicked
0,1 - Monday,43,70,44.0
1,2 - Tuesday,43,76,44.0
2,3 - Wednesday,38,86,39.0
3,4 - Thursday,47,69,48.0
4,5 - Friday,51,77,52.0
5,6 - Saturday,45,73,46.0
6,7 - Sunday,43,66,44.0


In [22]:
b_clicks_pivot = b_clicks.groupby(['is_click', 'day']).user_id.count().reset_index()\
    .pivot(index='day', columns='is_click', values='user_id').reset_index()
b_clicks_pivot['percent_clicked'] = b_clicks_pivot[True] / b_clicks_pivot[True] + b_clicks_pivot[False]
b_clicks_pivot

is_click,day,False,True,percent_clicked
0,1 - Monday,32,81,33.0
1,2 - Tuesday,45,74,46.0
2,3 - Wednesday,35,89,36.0
3,4 - Thursday,29,87,30.0
4,5 - Friday,38,90,39.0
5,6 - Saturday,42,76,43.0
6,7 - Sunday,34,75,35.0
