# Introduction: #3 Promotional Data

This notebook includes the data preparation for discovering successful characteristics and popular item categories for future promotions.

The objective of this section is to create three datasets:
* `promotion_metrics.csv`: performance metrics for each promotion and sessions without promotions
* `promotion_view_item_category.csv`: percentage of unique items by item category among all unique items viewed by customers in each promotion and without promotions
* `promotion_trend.csv`: data for visualizing engagement trend for each promotion.

Based on those datasets and further analysis, we are able to identify the most popular item categories for each promotion, and key traits of successful promotions.

## Imports
We are using a typical data science stack such as `pandas`, `numpy`, `matplotlib`, etc.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime as dt
from datetime import date, timedelta
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 50)
plt.rcParams['figure.figsize'] = [10,6]

## Read in Data 

In [2]:
ecommerce_all = pd.read_csv('ecommerce_all.csv')
ecommerce_all['user_pseudo_id'] = ecommerce_all['user_pseudo_id'].astype(str)
ecommerce_all['ga_session_id'] = ecommerce_all['user_pseudo_id'].astype(str) + ecommerce_all['ga_session_id'].astype(str)
ecommerce_all.head()

Unnamed: 0,event_date,event_time,user_pseudo_id,continent,sub_continent,country,region,city,device_category,mobile_brand_name,mobile_model_name,operating_system,web_browser,revenue,traffic_name,traffic_medium,traffic_source,ga_session_id,event_name,event_value_in_usd,page_title,page_location,engagement_time_sec,total_item_quantity,purchase_revenue_in_usd,unique_items,transaction_id,item_id,item_name,item_brand,item_variant,item_category,price_in_usd,quantity,item_revenue_in_usd,promotion_name
0,20201103,2020-11-03 11:04:19,8825068.227430915,Europe,Southern Europe,(not set),(not set),(not set),desktop,Apple,Safari,Web,Chrome,0.0,(organic),organic,google,8825068.227430915,view_item,,Eco-Friendly | Google Merchandise Store,https://shop.googlemerchandisestore.com/Google...,3.075,,,12,(not set),GGOEGXXX1378,Google Cotopaxi Shell,(not set),(not set),Eco-Friendly,,,,(not set)
1,20201103,2020-11-03 07:43:31,10673312.893252771,Americas,Northern America,United States,Georgia,(not set),desktop,Google,Chrome,Web,Chrome,0.0,<Other>,cpc,google,10673312.893252771,view_item,,Hats | Apparel | Google Merchandise Store,https://shop.googlemerchandisestore.com/Google...,3.354,,,12,(not set),GGOEYHPB121110,YouTube Leather Strap Hat Black,(not set),(not set),Home/Apparel/Hats/,,,,(not set)
2,20201103,2020-11-03 13:50:36,15626554.576808143,Americas,Northern America,United States,New York,New York,mobile,Apple,iPhone,Web,Safari,0.0,(organic),organic,google,15626554.576808143,view_item,,Home,https://shop.googlemerchandisestore.com/store....,7.324,1.0,,12,(not set),GGOEGXXX1381,Google Men's Softshell Moss,(not set),(not set),Home/Shop by Brand/Google/,,,,(not set)
3,20201103,2020-11-03 22:34:53,16462431.22835943,Americas,South America,Colombia,(not set),(not set),mobile,Apple,iPhone,Web,Safari,0.0,(direct),(none),(direct),16462431.228359437,view_item,,Men's / Unisex | Apparel | Google Merchandise ...,https://shop.googlemerchandisestore.com/Google...,22.694,,,12,(not set),GGOEGXXX1038,Google F/C Long Sleeve Tee Charcoal,(not set),(not set),Home/Apparel/Men's / Unisex/,,,,(not set)
4,20201103,2020-11-03 21:31:24,16765047.752179116,Americas,South America,Ecuador,Pichincha,Quito,desktop,Google,Chrome,Windows,Chrome,0.0,(referral),referral,shop.googlemerchandisestore.com,16765047.752179116,view_item,,Home,https://shop.googlemerchandisestore.com/store....,2.811,,,12,(not set),GGOEGXXX1347,Google Sherpa Zip Hoodie Navy,(not set),(not set),Home/Apparel/Men's / Unisex/,,,,(not set)


In [3]:
### add column promotion_or_not to identify whether customer responded to internal promotions in this session
temp = ecommerce_all.copy()
temp['promotion_or_not'] = temp['promotion_name'].apply(lambda x: 1 if ((x == 'Google Mural Collection') | 
                                                                        (x == 'Act Responsible') |
                                                                        (x == 'Reach New Heights') |
                                                                        (x == 'Complete Your Collection')) else 0)
promotion_or_not = temp.groupby(['event_date', 'user_pseudo_id', 'ga_session_id'])['promotion_or_not'].agg('sum').\
                   reset_index().rename(columns={'promotion_or_not':'promotion_or_not'})
promotion_or_not['promotion_or_not'] = promotion_or_not['promotion_or_not'].apply(lambda x: 'No' if x == 0 else 'Yes')

# dataframe showing whether customer reponded to internal promotions for each session
session_promotion_or_not = ecommerce_all[['event_date', 'user_pseudo_id', 'ga_session_id']].drop_duplicates().merge(promotion_or_not)


### classify sessions with promotion and sessions without promotion
ecommerce_all['unique_session_id'] = ecommerce_all['event_date'].astype(str) + ecommerce_all['ga_session_id'].astype(str)
session_promotion_or_not['unique_session_id'] = session_promotion_or_not['event_date'].astype(str) + session_promotion_or_not['ga_session_id'].astype(str)
sessions_with_promotion_list = session_promotion_or_not[session_promotion_or_not['promotion_or_not'] == 'Yes']['unique_session_id'].tolist()
sessions_without_promotion_list = session_promotion_or_not[session_promotion_or_not['promotion_or_not'] == 'No']['unique_session_id'].tolist()

# sessions with promotion
sessions_with_promotion = ecommerce_all[ecommerce_all.unique_session_id.isin(sessions_with_promotion_list)]
# sessions without promotion
sessions_without_promotion = ecommerce_all[ecommerce_all.unique_session_id.isin(sessions_without_promotion_list)]

## #1 Sessions with Promotions
Calculate performance metrics for each promotion.

### Promotion Click-Through Rate (CTR)
* `total_sessions`: number of sessions engaged with this promotion
* `view_promotion_sessions`: number of sessions viewed this promotion
* `respond_promotion_sessions`: number of sessions responded this promotion
* `promotion_ctr`: percentage of sessions responded this promotion among all sessions viewed this promotions

In [4]:
sessions_with_promotion = sessions_with_promotion[(sessions_with_promotion.promotion_name == 'Reach New Heights') | (sessions_with_promotion.promotion_name == 'Act Responsible') |
                                                  (sessions_with_promotion.promotion_name == 'Complete Your Collection') | (sessions_with_promotion.promotion_name == 'Google Mural Collection')]

# view_promotion_sessions
view_promotion_sessions = sessions_with_promotion[sessions_with_promotion.event_name != 'select_promotion'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                          reset_index().rename(columns={'ga_session_id':'view_promotion_sessions'})

# respond_promotion_sessions
respond_promotion_sessions = sessions_with_promotion[sessions_with_promotion.event_name == 'select_promotion'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                             reset_index().rename(columns={'ga_session_id':'respond_promotion_sessions'})

# final data
promotion_ctr = view_promotion_sessions.merge(respond_promotion_sessions).fillna(0)
promotion_ctr['promotion_ctr'] = promotion_ctr['respond_promotion_sessions'] / promotion_ctr['view_promotion_sessions'] 
promotion_ctr

Unnamed: 0,promotion_name,view_promotion_sessions,respond_promotion_sessions,promotion_ctr
0,Act Responsible,5477,2842,0.518897
1,Complete Your Collection,2953,1546,0.523535
2,Google Mural Collection,2260,1003,0.443805
3,Reach New Heights,97137,2812,0.028949


### Add-to-Cart Rate & Conversion Rate (CVR) with Promotions
* `add_to_cart_sessions`: number of sessions added items to cart in this promotion
* `purchase_sessions`: number of sessions purchased items in this promotion
* `add_to_cart_rate`: percentage of sessions added items to cart among all sessions viewed this promotion
* `session_cvr`: percentage of sessions purchased items among all sessions viewed this promotion

In [5]:
# add_to_cart_sessions
add_to_cart_sessions = sessions_with_promotion[sessions_with_promotion.event_name == 'add_to_cart'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                       reset_index().rename(columns={'ga_session_id':'add_to_cart_sessions'})
# purchase_sessions
purchase_sessions = sessions_with_promotion[sessions_with_promotion.event_name == 'purchase'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                    reset_index().rename(columns={'ga_session_id':'purchase_sessions'})

# final data
session_cvr = promotion_ctr.merge(add_to_cart_sessions).merge(purchase_sessions)
session_cvr['add_to_cart_rate'] = session_cvr['add_to_cart_sessions'] / session_cvr['view_promotion_sessions'] 
session_cvr['session_cvr'] = session_cvr['purchase_sessions'] / session_cvr['view_promotion_sessions'] 
session_cvr

Unnamed: 0,promotion_name,view_promotion_sessions,respond_promotion_sessions,promotion_ctr,add_to_cart_sessions,purchase_sessions,add_to_cart_rate,session_cvr
0,Act Responsible,5477,2842,0.518897,1193,402,0.21782,0.073398
1,Complete Your Collection,2953,1546,0.523535,998,328,0.337961,0.111073
2,Google Mural Collection,2260,1003,0.443805,947,359,0.419027,0.15885
3,Reach New Heights,97137,2812,0.028949,2519,947,0.025932,0.009749


### Profit from each Promotion
* `total_item_quantity`: __(if purchased)__ total quantity of purchased items in this promotion
* `total_unique_items`: __(if purchased)__ number of unique items purchased in this promotion
* `total_purchase_revenue`: __(if purchased)__ total purchase revenue in this promotion

In [6]:
purchase_sessions_with_promotion = sessions_with_promotion[(sessions_with_promotion.event_name == 'purchase') \
                                                           & (sessions_with_promotion.event_value_in_usd.isnull() == False)][['sub_continent', 'promotion_name','ga_session_id','total_item_quantity','purchase_revenue_in_usd','unique_items']].drop_duplicates()

# total_item_quantity
total_item_quantity = purchase_sessions_with_promotion.groupby(['promotion_name'])['total_item_quantity'].agg('sum').reset_index()

# total_unique_items
total_unique_items = purchase_sessions_with_promotion.groupby(['promotion_name'])['unique_items'].agg('sum').reset_index().\
                     rename(columns={'unique_items':'total_unique_items'})

# total_purchase_revenue
total_purchase_revenue = purchase_sessions_with_promotion.groupby(['promotion_name'])['purchase_revenue_in_usd'].agg('sum').reset_index().\
                         rename(columns={'purchase_revenue_in_usd':'total_purchase_revenue'})

# final data
revenue = total_item_quantity.merge(total_unique_items).merge(total_purchase_revenue)
revenue

Unnamed: 0,promotion_name,total_item_quantity,total_unique_items,total_purchase_revenue
0,Act Responsible,1854.0,1165,28172.0
1,Complete Your Collection,1393.0,925,20352.0
2,Google Mural Collection,1676.0,1209,25207.0
3,Reach New Heights,4008.0,2651,66789.0


### View Item Category for each Promotion
* Percentage of unique items (`COUNTD(item_name)`) by item category among all unique items viewed in each promotion __(e.g. there’s a total of 608 unique items viewed in "Act Responsible" promotion, and among those unique items, 3 unique items are in "Accessories" category, which accounted for 0.5% of the total unique items viewed (3/608) in this promotion)__

In [7]:
view_sessions_with_promotion = sessions_with_promotion[((sessions_with_promotion.event_name == 'view_item') | (sessions_with_promotion.event_name == 'select_item'))\
                                                       & (sessions_with_promotion.item_category.isnull() == False)]

# add column item_category to categorize items based on categories on the website
item_category_mapping_df = pd.read_csv('item_category_mapping_df.csv')
view_sessions_with_promotion = view_sessions_with_promotion.merge(item_category_mapping_df, on=['item_category'], how='left')

# unique items by item category among all unique items viewed in each promotion
view_item_category_sessions_with_promotion = view_sessions_with_promotion.pivot_table(values='item_name', index='promotion_name', columns='item_category_new', aggfunc=lambda x: len(x.unique())).reset_index().fillna(0)
view_item_category_sessions_with_promotion

item_category_new,promotion_name,Accessories,Apparel,Campus Collection,Collection,Lifestyle,New,Sale,Shop by Brand,Stationery
0,Act Responsible,3.0,92.0,45.0,25.0,98.0,36.0,126.0,133.0,50.0
1,Complete Your Collection,2.0,95.0,48.0,20.0,104.0,26.0,118.0,104.0,52.0
2,Google Mural Collection,0.0,96.0,53.0,27.0,105.0,22.0,119.0,63.0,52.0
3,Reach New Heights,1.0,101.0,67.0,41.0,130.0,35.0,156.0,181.0,56.0


In [8]:
# calculate total unique items viewed by customers in each promotion
view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] = view_item_category_sessions_with_promotion.sum(axis=1)

# percentage of unique products by item category among all unique products viewed in each promotion
view_item_category_sessions_with_promotion['Accessories'] = view_item_category_sessions_with_promotion['Accessories'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Apparel'] = view_item_category_sessions_with_promotion['Apparel'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Campus Collection'] = view_item_category_sessions_with_promotion['Campus Collection'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion']
view_item_category_sessions_with_promotion['Collection'] = view_item_category_sessions_with_promotion['Collection'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
# view_item_category_sessions_with_promotion['Gift Cards'] = view_item_category_sessions_with_promotion['Gift Cards'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Lifestyle'] = view_item_category_sessions_with_promotion['Lifestyle'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['New'] = view_item_category_sessions_with_promotion['New'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Sale'] = view_item_category_sessions_with_promotion['Sale'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Shop by Brand'] = view_item_category_sessions_with_promotion['Shop by Brand'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 
view_item_category_sessions_with_promotion['Stationery'] = view_item_category_sessions_with_promotion['Stationery'] / view_item_category_sessions_with_promotion['total_unique_items_by_promotion'] 

view_item_category_sessions_with_promotion

item_category_new,promotion_name,Accessories,Apparel,Campus Collection,Collection,Lifestyle,New,Sale,Shop by Brand,Stationery,total_unique_items_by_promotion
0,Act Responsible,0.004934,0.151316,0.074013,0.041118,0.161184,0.059211,0.207237,0.21875,0.082237,608.0
1,Complete Your Collection,0.003515,0.16696,0.084359,0.035149,0.182777,0.045694,0.207381,0.182777,0.091388,569.0
2,Google Mural Collection,0.0,0.178771,0.098696,0.050279,0.195531,0.040968,0.221601,0.117318,0.096834,537.0
3,Reach New Heights,0.001302,0.13151,0.08724,0.053385,0.169271,0.045573,0.203125,0.235677,0.072917,768.0


### Finalize Metrics

In [9]:
promotion_df = session_cvr.merge(revenue, on=['promotion_name'])
promotion_df

Unnamed: 0,promotion_name,view_promotion_sessions,respond_promotion_sessions,promotion_ctr,add_to_cart_sessions,purchase_sessions,add_to_cart_rate,session_cvr,total_item_quantity,total_unique_items,total_purchase_revenue
0,Act Responsible,5477,2842,0.518897,1193,402,0.21782,0.073398,1854.0,1165,28172.0
1,Complete Your Collection,2953,1546,0.523535,998,328,0.337961,0.111073,1393.0,925,20352.0
2,Google Mural Collection,2260,1003,0.443805,947,359,0.419027,0.15885,1676.0,1209,25207.0
3,Reach New Heights,97137,2812,0.028949,2519,947,0.025932,0.009749,4008.0,2651,66789.0


In [10]:
print('Item quantity per transaction: ', promotion_df['total_item_quantity'] / promotion_df['purchase_sessions'])
print('Purchase revenue per transaction: ', promotion_df['total_purchase_revenue'] / promotion_df['purchase_sessions'])

Item quantity per transaction:  0    4.611940
1    4.246951
2    4.668524
3    4.232313
dtype: float64
Purchase revenue per transaction:  0    70.079602
1    62.048780
2    70.214485
3    70.526927
dtype: float64


##  #2 Sessions without Promotions
Calculate performance metrics for sessions without promotions.

### Add-to-Cart Rate & Conversion Rate (CVR) without Promotions
* `total_sessions`: number of sessions without promotions
* `add_to_cart_sessions`: number of sessions added items to cart
* `purchase_sessions`: number of sessions purchased items
* `add_to_cart_rate`: percentage of sessions added items to cart among all sessions
* `session_cvr`: percentage of sessions purchased items among all sessions

In [11]:
sessions_without_promotion.loc[(sessions_without_promotion.promotion_name == 'Not available in demo dataset') | (ecommerce_all.promotion_name.isnull()), 'promotion_name'] = '(not set)'

# total_sessions
total_sessions = sessions_without_promotion.groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                 reset_index().rename(columns={'ga_session_id':'total_sessions'})

# add_to_cart_sessions
add_to_cart_sessions = sessions_without_promotion[sessions_without_promotion.event_name == 'add_to_cart'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                       reset_index().rename(columns={'ga_session_id':'add_to_cart_sessions'})
# purchase_sessions
purchase_sessions = sessions_without_promotion[sessions_without_promotion.event_name == 'purchase'].groupby(['promotion_name'])['ga_session_id'].agg('nunique').\
                    reset_index().rename(columns={'ga_session_id':'purchase_sessions'})

# final data
session_cvr = total_sessions.merge(add_to_cart_sessions).merge(purchase_sessions)
session_cvr['add_to_cart_rate'] = session_cvr['add_to_cart_sessions'] / session_cvr['total_sessions'] 
session_cvr['session_cvr'] = session_cvr['purchase_sessions'] / session_cvr['total_sessions'] 
session_cvr

Unnamed: 0,promotion_name,total_sessions,add_to_cart_sessions,purchase_sessions,add_to_cart_rate,session_cvr
0,(not set),27457,3683,877,0.134137,0.031941


### Profit
* `total_item_quantity`: __(if purchased)__ total quantity of purchased items
* `total_unique_items`: __(if purchased)__ number of unique items purchased
* `total_purchase_revenue`: __(if purchased)__ total purchase revenue

In [12]:
purchase_sessions_without_promotion = sessions_without_promotion[(sessions_without_promotion.event_name == 'purchase') \
                                                                 & (sessions_without_promotion.event_value_in_usd.isnull() == False)][['sub_continent', 'promotion_name','ga_session_id','total_item_quantity','purchase_revenue_in_usd','unique_items']].drop_duplicates()

# total_item_quantity
total_item_quantity = purchase_sessions_without_promotion.groupby(['promotion_name'])['total_item_quantity'].agg('sum').reset_index()

# total_unique_items
total_unique_items = purchase_sessions_without_promotion.groupby(['promotion_name'])['unique_items'].agg('sum').reset_index().\
                     rename(columns={'unique_items':'total_unique_items'})

# total_purchase_revenue
total_purchase_revenue = purchase_sessions_without_promotion.groupby(['promotion_name'])['purchase_revenue_in_usd'].agg('sum').reset_index().\
                         rename(columns={'purchase_revenue_in_usd':'total_purchase_revenue'})

# final data
revenue = total_item_quantity.merge(total_unique_items).merge(total_purchase_revenue)
revenue

Unnamed: 0,promotion_name,total_item_quantity,total_unique_items,total_purchase_revenue
0,(not set),4030.0,2461,58826.0


### View Item Category
* Percentage of unique items (`COUNTD(item_name)`) by item category among all unique items viewed by customers __(e.g. there’s a total of 1,129 unique items viewed by customers, and among those unique items, 54 unique items are in "Accessories" category, which accounted for 4.7% of the total unique items viewed (54/1129))__

In [13]:
view_sessions_without_promotion = sessions_without_promotion[((sessions_without_promotion.event_name == 'view_item') | (sessions_with_promotion.event_name == 'select_item'))\
                                                             & (sessions_without_promotion.item_category.isnull() == False)]

# add column item_category to categorize items based on categories on the website
item_category_mapping_df = pd.read_csv('item_category_mapping_df.csv')
view_sessions_without_promotion = view_sessions_without_promotion.merge(item_category_mapping_df, on=['item_category'], how='left')

# unique items by item category among all unique items viewed without promotions
view_item_category_sessions_without_promotion = view_sessions_without_promotion.pivot_table(values='item_name', index='promotion_name', columns='item_category_new', aggfunc=lambda x: len(x.unique())).reset_index()
view_item_category_sessions_without_promotion

item_category_new,promotion_name,Accessories,Apparel,Campus Collection,Collection,Gift Cards,Lifestyle,New,Sale,Shop by Brand,Stationery
0,(not set),54,127,85,44,5,146,54,176,375,63


In [14]:
# calculate total unique items viewed by customers without promotions
view_item_category_sessions_without_promotion['total_unique_items'] = view_item_category_sessions_without_promotion.sum(axis=1)

# percentage of unique items by item category among all unique items viewed without promotions
view_item_category_sessions_without_promotion['Accessories'] = view_item_category_sessions_without_promotion['Accessories'] / view_item_category_sessions_without_promotion['total_unique_items']
view_item_category_sessions_without_promotion['Apparel'] = view_item_category_sessions_without_promotion['Apparel'] / view_item_category_sessions_without_promotion['total_unique_items']
view_item_category_sessions_without_promotion['Campus Collection'] = view_item_category_sessions_without_promotion['Campus Collection'] / view_item_category_sessions_without_promotion['total_unique_items']
view_item_category_sessions_without_promotion['Collection'] = view_item_category_sessions_without_promotion['Collection'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['Gift Cards'] = view_item_category_sessions_without_promotion['Gift Cards'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['Lifestyle'] = view_item_category_sessions_without_promotion['Lifestyle'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['New'] = view_item_category_sessions_without_promotion['New'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['Sale'] = view_item_category_sessions_without_promotion['Sale'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['Shop by Brand'] = view_item_category_sessions_without_promotion['Shop by Brand'] / view_item_category_sessions_without_promotion['total_unique_items'] 
view_item_category_sessions_without_promotion['Stationery'] = view_item_category_sessions_without_promotion['Stationery'] / view_item_category_sessions_without_promotion['total_unique_items'] 

view_item_category_sessions_without_promotion

item_category_new,promotion_name,Accessories,Apparel,Campus Collection,Collection,Gift Cards,Lifestyle,New,Sale,Shop by Brand,Stationery,total_unique_items
0,(not set),0.04783,0.112489,0.075288,0.038973,0.004429,0.129318,0.04783,0.15589,0.332152,0.055802,1129


### Finalize Metrics

In [15]:
non_promotion_df = session_cvr.merge(revenue)
non_promotion_df

Unnamed: 0,promotion_name,total_sessions,add_to_cart_sessions,purchase_sessions,add_to_cart_rate,session_cvr,total_item_quantity,total_unique_items,total_purchase_revenue
0,(not set),27457,3683,877,0.134137,0.031941,4030.0,2461,58826.0


In [16]:
print('Average item quantity per transaction: ', non_promotion_df['total_item_quantity'] / non_promotion_df['purchase_sessions'])
print('Average purchase revenue per transaction: ', non_promotion_df['total_purchase_revenue'] / non_promotion_df['purchase_sessions'])

Average item quantity per transaction:  0    4.595211
dtype: float64
Average purchase revenue per transaction:  0    67.076397
dtype: float64


## #3 Promotional Data (Sessions with & without Promotions)
Concat __metrics of each promotion__ with __metrics of sessions without promotions__ to get final data.

### Metrics
* `promotion_ctr`: Promotion Click-Through Rate (CTR)
* `add_to_cart_rate`: Add-to-Cart Rate
* `session_cvr`: Conversion Rate (CVR)
* `total_item_quantity`: to get Average Item Quantity Per Transaction
* `total_purchase_revenue`: to get Average Purchase Revenue Per Transaction

In [17]:
metrics = pd.concat([promotion_df, non_promotion_df.rename(columns={'total_sessions':'view_promotion_sessions'})])
metrics.head()

Unnamed: 0,promotion_name,view_promotion_sessions,respond_promotion_sessions,promotion_ctr,add_to_cart_sessions,purchase_sessions,add_to_cart_rate,session_cvr,total_item_quantity,total_unique_items,total_purchase_revenue
0,Act Responsible,5477,2842.0,0.518897,1193,402,0.21782,0.073398,1854.0,1165,28172.0
1,Complete Your Collection,2953,1546.0,0.523535,998,328,0.337961,0.111073,1393.0,925,20352.0
2,Google Mural Collection,2260,1003.0,0.443805,947,359,0.419027,0.15885,1676.0,1209,25207.0
3,Reach New Heights,97137,2812.0,0.028949,2519,947,0.025932,0.009749,4008.0,2651,66789.0
0,(not set),27457,,,3683,877,0.134137,0.031941,4030.0,2461,58826.0


In [18]:
metrics.to_csv('promotion_metrics.csv', index=False)

### View Item Category
* Percentage of unique items by item category among all unique items viewed by customers in each promotion

In [19]:
view_item_category = pd.concat([view_item_category_sessions_with_promotion.rename(columns={'total_unique_items_by_promotion':'total_unique_items'}), \
                                view_item_category_sessions_without_promotion]).fillna(0)
view_item_category = view_item_category[['promotion_name', 'Accessories', 'Apparel', 'Campus Collection', 'Collection', 'Lifestyle', 'New', 'Sale', \
                                         'Shop by Brand', 'Stationery', 'Gift Cards', 'total_unique_items']]
view_item_category.head()

item_category_new,promotion_name,Accessories,Apparel,Campus Collection,Collection,Lifestyle,New,Sale,Shop by Brand,Stationery,Gift Cards,total_unique_items
0,Act Responsible,0.004934,0.151316,0.074013,0.041118,0.161184,0.059211,0.207237,0.21875,0.082237,0.0,608.0
1,Complete Your Collection,0.003515,0.16696,0.084359,0.035149,0.182777,0.045694,0.207381,0.182777,0.091388,0.0,569.0
2,Google Mural Collection,0.0,0.178771,0.098696,0.050279,0.195531,0.040968,0.221601,0.117318,0.096834,0.0,537.0
3,Reach New Heights,0.001302,0.13151,0.08724,0.053385,0.169271,0.045573,0.203125,0.235677,0.072917,0.0,768.0
0,(not set),0.04783,0.112489,0.075288,0.038973,0.129318,0.04783,0.15589,0.332152,0.055802,0.004429,1129.0


In [20]:
view_item_category.to_csv('promotion_view_item_category.csv', index=False)

## #4 Timing Trend Data
Generate data for visualizing trend of each promotion.

In [21]:
# sessions with promotions
sessions_with_promotion_trend = sessions_with_promotion[sessions_with_promotion['promotion_name'] != '(not set)']\
                                [['event_date', 'event_time', 'continent', 'sub_continent', 'country', 'user_pseudo_id', 'ga_session_id', 'unique_session_id', 'promotion_name']].drop_duplicates()
# sessions without promotions
sessions_without_promotion_trend = sessions_without_promotion[['event_date', 'continent', 'sub_continent', 'country', 'user_pseudo_id', 'ga_session_id', 'unique_session_id', 'promotion_name']].drop_duplicates()

# concatenation & remove promotion_name is null data
promotion_trend = pd.concat([sessions_with_promotion_trend, sessions_without_promotion_trend])
promotion_trend.loc[(promotion_trend.promotion_name == 'Not available in demo dataset') | (promotion_trend.promotion_name.isnull()), 'promotion_name'] = '(not set)'
promotion_trend = promotion_trend[promotion_trend.promotion_name.isnull() == False]

promotion_trend.head()

Unnamed: 0,event_date,event_time,continent,sub_continent,country,user_pseudo_id,ga_session_id,unique_session_id,promotion_name
30,20201126,2020-11-26 17:09:57,Americas,Northern America,United States,35129808.30226189,35129808.3022619,2020112635129808.5,Google Mural Collection
77,20201129,2020-11-29 19:24:20,Asia,Western Asia,Palestine,60946114.09003512,60946114.09003512,2020112960946114.0,Google Mural Collection
84,20201129,2020-11-29 11:37:33,Asia,Southern Asia,India,1890293.7758196,1890293.7758196848,202011291890293.75,Act Responsible
129,20201217,2020-12-17 16:37:54,Asia,Western Asia,Saudi Arabia,9419371.207277443,9419371.207277443,202012179419371.2,Reach New Heights
130,20201217,2020-12-17 20:11:07,Asia,Southern Asia,India,59672566.62594916,59672566.62594917,2020121759672566.5,Reach New Heights


In [22]:
promotion_trend.to_csv('promotion_trend.csv', index=False)