# ANALYSIS PROJECT ON A/B TESTING USING PYTHON

In [127]:
    #Import the necessary libarires
    
    import pandas as pd
    import datetime 
    from datetime import date, timedelta
    import plotly.graph_objects as go
    import plotly.express as px
    import plotly.io as pio
    #pio.templates.defaults = "plotly_white"

# Load the data

In [128]:
#Due to nature of file, while loading the data specify the delimiter to make file easy to read

control = pd.read_csv('control_group.csv', sep=";")
test = pd.read_csv('test_group.csv', sep=";")

# Explore The Data

In [129]:
print(control.head())

      Campaign Name       Date  Spend [USD]  # of Impressions     Reach  \
0  Control Campaign  1.08.2019         2280           82702.0   56930.0   
1  Control Campaign  2.08.2019         1757          121040.0  102513.0   
2  Control Campaign  3.08.2019         2343          131711.0  110862.0   
3  Control Campaign  4.08.2019         1940           72878.0   61235.0   
4  Control Campaign  5.08.2019         1835               NaN       NaN   

   # of Website Clicks  # of Searches  # of View Content  # of Add to Cart  \
0               7016.0         2290.0             2159.0            1819.0   
1               8110.0         2033.0             1841.0            1219.0   
2               6508.0         1737.0             1549.0            1134.0   
3               3065.0         1042.0              982.0            1183.0   
4                  NaN            NaN                NaN               NaN   

   # of Purchase  
0          618.0  
1          511.0  
2          372.0  
3   

In [130]:
print(test.head())

   Campaign Name       Date  Spend [USD]  # of Impressions  Reach  \
0  Test Campaign  1.08.2019         3008             39550  35820   
1  Test Campaign  2.08.2019         2542            100719  91236   
2  Test Campaign  3.08.2019         2365             70263  45198   
3  Test Campaign  4.08.2019         2710             78451  25937   
4  Test Campaign  5.08.2019         2297            114295  95138   

   # of Website Clicks  # of Searches  # of View Content  # of Add to Cart  \
0                 3038           1946               1069               894   
1                 4657           2359               1548               879   
2                 7885           2572               2367              1268   
3                 4216           2216               1437               566   
4                 5863           2106                858               956   

   # of Purchase  
0            255  
1            677  
2            578  
3            340  
4            768  


# Check for null values in datasets


In [131]:
print(control.isnull().sum())

Campaign Name          0
Date                   0
Spend [USD]            0
# of Impressions       1
Reach                  1
# of Website Clicks    1
# of Searches          1
# of View Content      1
# of Add to Cart       1
# of Purchase          1
dtype: int64


In [132]:
print(test.isnull().sum())

Campaign Name          0
Date                   0
Spend [USD]            0
# of Impressions       0
Reach                  0
# of Website Clicks    0
# of Searches          0
# of View Content      0
# of Add to Cart       0
# of Purchase          0
dtype: int64


# Rename columns for convenience while reading data

In [133]:
control.columns = ["Campaign Name", "Date", "Amount Spent in Dollars", "Impressions", "Reach", "Website Clicks", "Searches", "Content", "Add to Chart", "Purchases"]

In [134]:
test.columns = ["Campaign Name", "Date", "Amount Spent in Dollars", "Impressions", "Reach", "Website Clicks", "Searches", "Content", "Add to Chart", "Purchases"]

In [135]:
control.columns

Index(['Campaign Name', 'Date', 'Amount Spent in Dollars', 'Impressions',
       'Reach', 'Website Clicks', 'Searches', 'Content', 'Add to Chart',
       'Purchases'],
      dtype='object')

In [136]:
test.columns

Index(['Campaign Name', 'Date', 'Amount Spent in Dollars', 'Impressions',
       'Reach', 'Website Clicks', 'Searches', 'Content', 'Add to Chart',
       'Purchases'],
      dtype='object')

In [137]:
#Checking to ensure everything looks good

print(control.head())

      Campaign Name       Date  Amount Spent in Dollars  Impressions  \
0  Control Campaign  1.08.2019                     2280      82702.0   
1  Control Campaign  2.08.2019                     1757     121040.0   
2  Control Campaign  3.08.2019                     2343     131711.0   
3  Control Campaign  4.08.2019                     1940      72878.0   
4  Control Campaign  5.08.2019                     1835          NaN   

      Reach  Website Clicks  Searches  Content  Add to Chart  Purchases  
0   56930.0          7016.0    2290.0   2159.0        1819.0      618.0  
1  102513.0          8110.0    2033.0   1841.0        1219.0      511.0  
2  110862.0          6508.0    1737.0   1549.0        1134.0      372.0  
3   61235.0          3065.0    1042.0    982.0        1183.0      340.0  
4       NaN             NaN       NaN      NaN           NaN        NaN  


In [138]:
## Now to Replace the null values in the contol dataset with the mean values of individual columns

In [139]:
control["Impressions"].fillna(value=control["Impressions"].mean(), inplace=True)

In [140]:
control["Reach"].fillna(value=control["Reach"].mean(), inplace=True)
control["Website Clicks"].fillna(value=control["Website Clicks"].mean(), inplace=True)
control["Searches"].fillna(value=control["Searches"].mean(), inplace=True)
control["Content"].fillna(value=control["Content"].mean(), inplace=True)
control["Add to Chart"].fillna(value=control["Add to Chart"].mean(), inplace=True)
control["Purchases"].fillna(value=control["Purchases"].mean(), inplace=True)

In [141]:
#Checking to enusre everything is alright again
print(control.head())

      Campaign Name       Date  Amount Spent in Dollars    Impressions  \
0  Control Campaign  1.08.2019                     2280   82702.000000   
1  Control Campaign  2.08.2019                     1757  121040.000000   
2  Control Campaign  3.08.2019                     2343  131711.000000   
3  Control Campaign  4.08.2019                     1940   72878.000000   
4  Control Campaign  5.08.2019                     1835  109559.758621   

           Reach  Website Clicks     Searches      Content  Add to Chart  \
0   56930.000000     7016.000000  2290.000000  2159.000000        1819.0   
1  102513.000000     8110.000000  2033.000000  1841.000000        1219.0   
2  110862.000000     6508.000000  1737.000000  1549.000000        1134.0   
3   61235.000000     3065.000000  1042.000000   982.000000        1183.0   
4   88844.931034     5320.793103  2221.310345  1943.793103        1300.0   

    Purchases  
0  618.000000  
1  511.000000  
2  372.000000  
3  340.000000  
4  522.793103  


In [142]:
# Now to Merge both datasets to create the A/B Testing Dataset
#ab_data = pd.merge(control, test, on="Date", how='outer')
ab_data = control.merge(test,how ="outer").sort_values(["Date"])
ab_data = ab_data.reset_index(drop=True)


You are merging on int and float columns where the float values are not equal to their int representation



In [143]:
print(ab_data.head())

      Campaign Name        Date  Amount Spent in Dollars  Impressions  \
0  Control Campaign   1.08.2019                     2280      82702.0   
1     Test Campaign   1.08.2019                     3008      39550.0   
2     Test Campaign  10.08.2019                     2790      95054.0   
3  Control Campaign  10.08.2019                     2149     117624.0   
4     Test Campaign  11.08.2019                     2420      83633.0   

     Reach  Website Clicks  Searches  Content  Add to Chart  Purchases  
0  56930.0          7016.0    2290.0   2159.0        1819.0      618.0  
1  35820.0          3038.0    1946.0   1069.0         894.0      255.0  
2  79632.0          8125.0    2312.0   1804.0         424.0      275.0  
3  91257.0          2277.0    2475.0   1984.0        1629.0      734.0  
4  71286.0          3750.0    2893.0   2617.0        1075.0      668.0  


In [144]:
print(ab_data["Campaign Name"].value_counts())

Control Campaign    30
Test Campaign       30
Name: Campaign Name, dtype: int64


In [145]:
#Now lets start analysing to find revelant insights about the dataset

Turnout_rate_graph = px.scatter(data_frame=ab_data, x="Amount Spent in Dollars", y="Impressions", color="Campaign Name", 
                                    trendline="ols", title = 'Relationship Between Amount Spent for Marketing to Number of Impressions Made')

In [146]:
Turnout_rate_graph.show()

From the graph it shows that as amount spent for ads increased, there was a slight increases in the numbers of impressions for the 'Control Campaign'

In [147]:
Payout_rate_graph = px.scatter(data_frame=ab_data, x="Amount Spent in Dollars", y="Purchases", color="Campaign Name", 
                               trendline="ols", title = 'Relationship Between Amount Spent for Marketing to Purchases Made')

In [148]:
Payout_rate_graph.show()

From the graph its clear that there were slight differences between two campaigns 
that validates that Amount of Dollar invested in ads resulted to more purchases.

# Now lets see the total amount spent on Amount Spent and Purchases for Both Campaigns

In [150]:
label = ["Amount Spent in Control Campaign", 
         "Amount Spent in Test Campaign"]
counts = [sum(control["Amount Spent in Dollars"]), 
          sum(test["Amount Spent in Dollars"])]
colors = ['Green','Red']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Amount Spent')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line= dict(color="black", width=3)))

Its clear that More Money was spent on the Test Campaign than the Control Campaign

In [151]:
label = ["Total Number of Purchases in Control Campaign", 
         "Total Number of Purchases in Test Campaign"]
counts = [round(sum(control["Purchases"]),0), sum(test["Purchases"])]
colors = ['Green','Red']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Total Number of Purchases')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line= dict(color="black", width=3)))

Despite more money spent on Test Campaign, Control Campaign still resulted in more purchases although a relatively Small Change

# Now let's check for the turnover Ratio from Add to Chart to Purchases Made

In [152]:
Turnover_Ratio_graph = px.scatter(data_frame=ab_data, x="Add to Chart", y="Purchases", color="Campaign Name", 
                                  trendline="ols",title = 'Relationship Between Orders Made to Actual Purchases Made')

In [153]:
Turnover_Ratio_graph.show()

Turnover Rate in the test Camapign is higher than that of the Contol Campaign, even though more purchases were made in the Contol Campaign

This would have been a more interesting study if there was a dataset to show the exact amount of revenue that was generated from both campaigns.. We could do this but assumptions would have to be made whereby we assume that all product(s) carries the same value.


But let's Dive deeper

In [154]:
Viewd_Ratio = px.scatter(data_frame=ab_data, x="Website Clicks", y="Content", color="Campaign Name", 
                             trendline="ols", title = 'Relationship Between Number of Website Clicks to Contents Viewed on Website')

In [155]:
Viewd_Ratio.show()

For the control camapign, more website clicks resulted into more viewed contents by the consumer, compared to the Test Campaign

# CONCLUSION

I love my things orderly to gain more insights about the turns of events with this Data

 TEST CAMPAIGN
1 More Money Spent                                 
,2 Less Ratio of Number of impressions              
  compared to the control campaign
,3 Less Viewed Ratio                                
,4 High Turn-Over Ratio in terms of                 
  number of clicks to number of purchases
,5 Slightly less Purchases Made                       


 CONTROL CAMPAIGN
1 Less Money compared to Test Campaign
,2 More Number of impression due to Amount spent
,3 More Viewed ratio, more viewed contents to website clicks
,4 Low Turnover Ratio Compared to Test Campaign
,5 Relatively high purchases made



We can say from this analysis due to the high turn-over ratio and Less Viewed Ratio in the test campaign, we could say that the Test campaign can be used to market a particlular product while the Control Campaign could be used to market several products due to it relatively higher number of impressions and more viewed ratio.
