In [1]:
import numpy as np
import pandas as pd
import pandas as pd
import datetime
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"

### BackGround Info, EDA and Cleaning of the Data

<span style="font-size: Large;">

- This A/B Testing of the project is about the Instagram Campaign, there are 2 types of ads and we need to find which ad performs well gives better sales, Traffic.

- Cleaning data so as to make sure data is in same format for both control and treatment group

</span>





In [2]:
Control_group = pd.read_csv("data/control_group.csv")
treatment_group = pd.read_csv("data/treatment_group.csv")

In [3]:
# Data Cleaning
Control_group.columns = ["Group Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Viewed Content", "Added to Cart",
                        "Purchases"]

treatment_group.columns = ["Group Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Viewed Content", "Added to Cart",
                        "Purchases"]

# Data Cleaning
Control_group["Group Name"] = "Control"
treatment_group["Group Name"] = "Treatment"

# Change date to datefomat

Control_group["Date"] = pd.to_datetime(Control_group["Date"], format="%d.%m.%Y")
treatment_group["Date"] = pd.to_datetime(treatment_group["Date"], format="%d.%m.%Y")

# Change all other columns to numeric
Control_group.iloc[:, 2:] = Control_group.iloc[:, 2:].apply(pd.to_numeric)
treatment_group.iloc[:, 2:] = treatment_group.iloc[:, 2:].apply(pd.to_numeric)




In [4]:
# show if there are any NA values

Control_group.isna().sum(), treatment_group.isna().sum()


(Group Name               0
 Date                     0
 Amount Spent             0
 Number of Impressions    1
 Reach                    1
 Website Clicks           1
 Searches Received        1
 Viewed Content           1
 Added to Cart            1
 Purchases                1
 dtype: int64,
 Group Name               0
 Date                     0
 Amount Spent             0
 Number of Impressions    0
 Reach                    0
 Website Clicks           0
 Searches Received        0
 Viewed Content           0
 Added to Cart            0
 Purchases                0
 dtype: int64)

### Null Values

<span style="font-size: Large;">

- As there is one null value in the Control group we will remove as we are using our Overall Evaluation metric as Reach per Amount Spent , Viewed Content per Amount Spent and Purchases per Amount Spent.

- If we taken mean of other values in that case it will give the results not accurate

</span>


In [5]:
# get the date of NA value in control group
Control_group[Control_group["Searches Received"].isna()]["Date"]

# Remove the row with date 2019-08-05
Control_group = Control_group[Control_group["Date"] != "2019-08-05"]
treatment_group = treatment_group[treatment_group["Date"] != "2019-08-05"]


In [6]:
print(Control_group.shape, treatment_group.shape)
print(Control_group.dtypes, treatment_group.dtypes)

# Change datatype of floats to int in control group

Control_group = Control_group.astype({
    "Amount Spent": int,
    "Number of Impressions": int,
    "Reach": int,
    "Website Clicks": int,
    "Searches Received": int,
    "Viewed Content": int,
    "Added to Cart": int,
    "Purchases": int
})


(29, 10) (29, 10)
Group Name                       object
Date                     datetime64[ns]
Amount Spent                      int64
Number of Impressions           float64
Reach                           float64
Website Clicks                  float64
Searches Received               float64
Viewed Content                  float64
Added to Cart                   float64
Purchases                       float64
dtype: object Group Name                       object
Date                     datetime64[ns]
Amount Spent                      int64
Number of Impressions             int64
Reach                             int64
Website Clicks                    int64
Searches Received                 int64
Viewed Content                    int64
Added to Cart                     int64
Purchases                         int64
dtype: object


As the Cleaning and EDA of the Data is done we are going discuss about the Plan



<span style="font-size: Large;">

### We are using the following Guide to perform A/B Test

#### Major steps in Running the A/B Tests
- Prerequisites
- Experiment Design
- Running Experiment
- Results to Decision
- Post Launch Monitoring

</span>

<span style="font-size: small;"><em>for more info : [Emma Ding: A/B Testing Fundamentals](https://www.youtube.com/watch?v=VpTlNRUcIDo&t=11s)</em></span>





<span style="font-size: Large;">
## Prerequisites

this is the first step where we need to establish the Objective and Overall Evaluation Criterion (OEC)
- Objective and Key Metrics
    - Objective is to increase the purchases and Traffic
    - Key Metric
        - Reach / Amount Spent
        - Viewed Content / Amount Spent
        - Purchases/ Amount Spent
- Product Groups 
    - Control and Treatment

</span>

In [7]:
# Create a new column for Reach per amount spent, viewed content per amount spent, Purchases per amount spent
Control_group["Reach per amount spent"] = Control_group["Reach"] / Control_group["Amount Spent"]
Control_group["Viewed Content per amount spent"] = Control_group["Viewed Content"] / Control_group["Amount Spent"]
Control_group["Purchases per amount spent"] = Control_group["Purchases"] / Control_group["Amount Spent"]

treatment_group["Reach per amount spent"] = treatment_group["Reach"] / treatment_group["Amount Spent"]
treatment_group["Viewed Content per amount spent"] = treatment_group["Viewed Content"] / treatment_group["Amount Spent"]
treatment_group["Purchases per amount spent"] = treatment_group["Purchases"] / treatment_group["Amount Spent"]





<span style="font-size: Large;">

## Experiment Design

this is the second step where we establish our point to target specific users or all users
- Users to target
    - Objective is to increase the purchases and Traffic

    - <img src="reports/figures/User&apos;s Path to Purchase.png" alt="ALT_TEXT" width="500" height="370">

    - from the above info we are targeting the users of viewed content

</span>




<span style="font-size: Large;">

## Running Experiment

this is the third step where we establish how long to run the experiment but as basic thumb rule we follow these four things 
- ramp up plan
- day of week effect
- seasonality effect
- primary & Novelity effect

</span>

<span style="font-size: small;"><em>for more info go through the doc section or source video mentioned above.</em></span>



<span style="font-size: Large;">

## Results and Descision

this is the fourth step where we establish descision which one is better
- Sanity Checks : make sure the data is reliable 
- When making decisions we need to consider
    - Tradeoffs between different metrics
    - Cost of Launching
    - Oppurtunity cost

</span>


In [8]:
Control_group

Unnamed: 0,Group Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Viewed Content,Added to Cart,Purchases,Reach per amount spent,Viewed Content per amount spent,Purchases per amount spent
0,Control,2019-08-01,2280,82702,56930,7016,2290,2159,1819,618,24.969298,0.94693,0.271053
1,Control,2019-08-02,1757,121040,102513,8110,2033,1841,1219,511,58.345475,1.047809,0.290837
2,Control,2019-08-03,2343,131711,110862,6508,1737,1549,1134,372,47.316261,0.661118,0.158771
3,Control,2019-08-04,1940,72878,61235,3065,1042,982,1183,340,31.564433,0.506186,0.175258
5,Control,2019-08-06,3083,109076,87998,4028,1709,1249,784,764,28.542978,0.405125,0.247811
6,Control,2019-08-07,2544,142123,127852,2640,1388,1106,1166,499,50.256289,0.434748,0.196148
7,Control,2019-08-08,1900,90939,65217,7260,3047,2746,930,462,34.324737,1.445263,0.243158
8,Control,2019-08-09,2813,121332,94896,6198,2487,2179,645,501,33.734803,0.774618,0.178102
9,Control,2019-08-10,2149,117624,91257,2277,2475,1984,1629,734,42.464867,0.92322,0.341554
10,Control,2019-08-11,2490,115247,95843,8137,2941,2486,1887,475,38.491165,0.998394,0.190763


In [9]:
# Merge the two dataframes

Combined_dataframe = pd.concat([Control_group, treatment_group], axis = 0)
Combined_dataframe.reset_index(drop=True, inplace=True)
Combined_dataframe.head()

Unnamed: 0,Group Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Viewed Content,Added to Cart,Purchases,Reach per amount spent,Viewed Content per amount spent,Purchases per amount spent
0,Control,2019-08-01,2280,82702,56930,7016,2290,2159,1819,618,24.969298,0.94693,0.271053
1,Control,2019-08-02,1757,121040,102513,8110,2033,1841,1219,511,58.345475,1.047809,0.290837
2,Control,2019-08-03,2343,131711,110862,6508,1737,1549,1134,372,47.316261,0.661118,0.158771
3,Control,2019-08-04,1940,72878,61235,3065,1042,982,1183,340,31.564433,0.506186,0.175258
4,Control,2019-08-06,3083,109076,87998,4028,1709,1249,784,764,28.542978,0.405125,0.247811


In [10]:
# pie chart of total amount spent in control group vs treatment group
fig = px.pie(Combined_dataframe, values="Amount Spent", names="Group Name", 
             title="Total Amount Spent in Control Group vs Treatment Group", 
             category_orders={"Group Name": ["Control", "Treatment"]})
fig.show()

In [11]:
fig = px.scatter(data_frame = Combined_dataframe, 
                    x="Reach",
                    y="Amount Spent", 
                    size="Amount Spent", 
                    color= "Group Name", 
                    trendline="ols",
                    title="Reach vs Amount Spent in Control and Treatment Group")
fig.show()

In [12]:
fig = px.scatter(data_frame = Combined_dataframe, 
                    x="Viewed Content",
                    y="Amount Spent", 
                    size="Amount Spent", 
                    color= "Group Name", 
                    trendline="ols",
                    title="Viewed Content vs Amount Spent in control and treatment group")
fig.show()

In [13]:
fig = px.scatter(data_frame = Combined_dataframe, 
                    x="Purchases",
                    y="Amount Spent", 
                    size="Amount Spent", 
                    color= "Group Name", 
                    trendline="ols",
                    title="Purchases vs Amount Spent in control and treatment group")
fig.show()

In [14]:
# average Purchases, viewed count, reach per amount spent in control group vs treatment group plot

Combined_dataframe.groupby("Group Name")[["Purchases per amount spent", "Viewed Content per amount spent", "Reach per amount spent"]].mean()

Unnamed: 0_level_0,Purchases per amount spent,Viewed Content per amount spent,Reach per amount spent
Group Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Control,0.232042,0.866409,39.168313
Treatment,0.202172,0.749462,20.54074


In [15]:

conclusion = Combined_dataframe.groupby("Group Name")[["Purchases per amount spent", "Viewed Content per amount spent", "Reach per amount spent"]].mean()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["Viewed Content", "Purchases", "Reach"],
    y=[conclusion.loc["Control", "Viewed Content per amount spent"], conclusion.loc["Control", "Purchases per amount spent"], conclusion.loc["Control", "Reach per amount spent"]],
    name="Control",
))

fig.add_trace(go.Bar(
    x=["Viewed Content", "Purchases", "Reach"],
    y=[conclusion.loc["Treatment", "Viewed Content per amount spent"], conclusion.loc["Treatment", "Purchases per amount spent"], conclusion.loc["Treatment", "Reach per amount spent"]],
    name="Treatment",
))

fig.update_layout(
    title="Performance Metrics per Amount Spent",
    barmode="group",
    xaxis_title="Metrics",
    yaxis_title="Value"
)

fig.show()



In [16]:
# using conclusion dataframe plot individual metrics for Viewed Content

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["Viewed Content"],
    y=[conclusion.loc["Control", "Viewed Content per amount spent"]],
    name="Control",
)) 

fig.add_trace(go.Bar(
    x=["Viewed Content"],
    y=[conclusion.loc["Treatment", "Viewed Content per amount spent"]],
    name="Treatment",
))

fig.update_layout(
    title="Mean of Viewed Content per Amount Spent",
    barmode="group",
    xaxis_title="Metrics",
    yaxis_title="Value"
)


In [17]:
# using conclusion dataframe plot individual metrics for purchases

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["Purchases"],
    y=[conclusion.loc["Control", "Purchases per amount spent"]],
    name="Control",
))

fig.add_trace(go.Bar(
    x=["Purchases"],
    y=[conclusion.loc["Treatment", "Purchases per amount spent"]],
    name="Treatment",
))

fig.update_layout(
    title="Mean of Purchases per Amount Spent",
    barmode="group",
    xaxis_title="Metrics",
    yaxis_title="Value"
)

fig.show()


<span style="font-size: Large;">

## Conclusion

If we observe the above charts clearly we can get to a conlcusion before getting to conclusion let's see what we found from the above charts
- The amount spent in Control is 66,818 USD and Treatment is 74,595 USD
- Performance Metrics
    - Range of Users : for each dollar we spent Control : 39 users and Treatment : 20 users
    - User's Viewed Content : for each dollar we spent Control : 0.866 users and Treatment : 0.749 Users
    - User's Purchased : for each dollar we spent Control : 0.232 users and Treatment : 0.202 Users

 By the above info we can say that Control group is perfoming better compared to the Treatment group

</span>
