<a href="https://colab.research.google.com/github/apiasak/datascience-portfolio/blob/main/code/ecommercetools_Example_usage_with_Retail_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# EcommerceTools

https://github.com/practical-data-science/ecommercetools

In [2]:
!pip install ecommercetools --upgrade --quiet

In [3]:
import pandas as pd

## EcommerceTools > Transactions

In [4]:
from ecommercetools import utilities
from ecommercetools import transactions

### 1. Load a sample transaction items dataset

In [15]:
transaction_items = utilities.load_sample_data()
transaction_items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   order_id     541909 non-null  object        
 1   sku          541909 non-null  object        
 2   description  540455 non-null  object        
 3   quantity     541909 non-null  int64         
 4   order_date   541909 non-null  datetime64[ns]
 5   unit_price   541909 non-null  float64       
 6   customer_id  406829 non-null  float64       
 7   country      541909 non-null  object        
 8   line_price   541909 non-null  float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(4)
memory usage: 37.2+ MB


### 2. Format a custom transaction items dataset

In [7]:
sales = pd.read_excel('https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx')

In [16]:
df = sales.rename(columns={
        'InvoiceDate': 'order_date',
        'InvoiceNo': 'order_id',
        'CustomerID': 'customer_id',
        'StockCode': 'sku',
        'Quantity': 'quantity',
        'UnitPrice': 'unit_price'
    })

In [18]:
df['line_price'] = round(df['quantity'] * df['unit_price'], 2)

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   order_id     541909 non-null  object        
 1   sku          541909 non-null  object        
 2   Description  540455 non-null  object        
 3   quantity     541909 non-null  int64         
 4   order_date   541909 non-null  datetime64[ns]
 5   unit_price   541909 non-null  float64       
 6   customer_id  406829 non-null  float64       
 7   Country      541909 non-null  object        
 8   line_price   541909 non-null  float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(4)
memory usage: 37.2+ MB


In [21]:
transaction_items = df

### 3. Create a transactions dataset

In [20]:
transactions_df = transactions.get_transactions(df)
transactions_df.head()

Unnamed: 0,order_id,order_date,customer_id,skus,items,revenue,replacement,order_number
0,536365,2010-12-01 08:26:00,17850.0,7,40,139.12,0,1
1,536366,2010-12-01 08:28:00,17850.0,2,12,22.2,0,2
2,536367,2010-12-01 08:34:00,13047.0,12,83,278.73,0,1
3,536368,2010-12-01 08:34:00,13047.0,4,15,70.05,0,2
4,536369,2010-12-01 08:35:00,13047.0,1,3,17.85,0,3


# EcommerceTools > Products

In [22]:
from ecommercetools import products

### 1. Create a products dataset

In [23]:
products_df = products.get_products(transaction_items)
products_df.head()

Unnamed: 0,sku,first_order_date,last_order_date,customers,orders,items,revenue,avg_unit_price,avg_quantity,avg_revenue,avg_orders,product_tenure,product_recency
0,10002,2010-12-01 08:45:00,2011-04-28 15:05:00,40,73,1037,759.89,1.056849,14.205479,10.409452,1.82,4280,4131
1,10080,2011-02-27 13:47:00,2011-11-21 17:04:00,19,24,495,119.09,0.376667,20.625,4.962083,1.26,4192,3924
2,10120,2010-12-03 11:19:00,2011-12-04 13:15:00,25,29,193,40.53,0.21,6.433333,1.351,1.16,4278,3912
3,10125,2010-12-01 12:23:00,2011-12-09 10:13:00,49,91,1296,994.84,0.859681,13.787234,10.583404,1.86,4280,3907
4,10133,2010-12-01 12:15:00,2011-11-16 15:32:00,101,198,2775,1540.02,0.6458,13.875,7.7001,1.96,4280,3929


### 2. Calculate product consumption and repurchase rate

In [24]:
repurchase_rates = products.get_repurchase_rates(transaction_items)
repurchase_rates.head(3).T

Unnamed: 0,0,1,2
sku,10002,10080,10120
revenue,759.89,119.09,40.53
items,1037,495,193
orders,73,24,29
customers,40,19,25
avg_unit_price,1.056849,0.376667,0.21
avg_line_price,10.409452,4.962083,1.351
avg_items_per_order,14.205479,20.625,6.655172
avg_items_per_customer,25.925,26.052632,7.72
purchased_individually,0.0,0.0,9.0


# EcommerceTools > Operations

In [37]:
from ecommercetools import operations

### 1. Create an ABC inventory classification

In [38]:
inventory_classification = operations.get_inventory_classification(transaction_items)
inventory_classification.head()

Unnamed: 0,sku,abc_class,abc_rank
4059,DOT,A,1
1271,22423,A,2
2517,47566,A,3
3670,85123A,A,4
3659,85099B,A,5


In [39]:
inventory_classification.groupby(by=['abc_class'])['sku'].nunique()

abc_class
A     736
B     411
C    2923
Name: sku, dtype: int64

# EcommerceTools > Customers

In [40]:
from ecommercetools import customers

### 1. Create a customers dataset

In [41]:
customers_df = customers.get_customers(transaction_items)
customers_df.head()

Unnamed: 0,customer_id,revenue,orders,skus,items,first_order_date,last_order_date,avg_items,avg_order_value,tenure,recency,cohort
0,12346.0,0.0,2,1,0,2011-01-18 10:01:00,2011-01-18 10:17:00,0.0,0.0,4232,4232,20111
1,12347.0,4310.0,7,7,2458,2010-12-07 14:57:00,2011-12-07 15:52:00,351.14,615.71,4273,3908,20104
2,12348.0,1797.24,4,4,2341,2010-12-16 19:09:00,2011-09-25 13:13:00,585.25,449.31,4264,3982,20104
3,12349.0,1757.55,1,1,631,2011-11-21 09:51:00,2011-11-21 09:51:00,631.0,1757.55,3925,3925,20114
4,12350.0,334.4,1,1,197,2011-02-02 16:01:00,2011-02-02 16:01:00,197.0,334.4,4216,4216,20111


### 2. Create a customer cohort analysis dataset

In [42]:
cohorts_df = customers.get_cohorts(transaction_items, period='M')
cohorts_df.head()

Unnamed: 0,customer_id,order_id,order_date,acquisition_cohort,order_cohort
0,17850.0,536365,2010-12-01 08:26:00,2010-12,2010-12
7,17850.0,536366,2010-12-01 08:28:00,2010-12,2010-12
9,13047.0,536367,2010-12-01 08:34:00,2010-12,2010-12
21,13047.0,536368,2010-12-01 08:34:00,2010-12,2010-12
25,13047.0,536369,2010-12-01 08:35:00,2010-12,2010-12


### 3. Create a customer cohort analysis matrix

In [45]:
cohort_matrix_df = customers.get_cohort_matrix(transaction_items, period='M', percentage=True)
cohort_matrix_df

periods,0,1,2,3,4,5,6,7,8,9,10,11,12
acquisition_cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-12,1.0,0.381857,0.334388,0.387131,0.359705,0.396624,0.379747,0.35443,0.35443,0.394515,0.373418,0.5,0.274262
2011-01,1.0,0.239905,0.28266,0.24228,0.327791,0.299287,0.261283,0.256532,0.311164,0.346793,0.368171,0.149644,
2011-02,1.0,0.247368,0.192105,0.278947,0.268421,0.247368,0.255263,0.281579,0.257895,0.313158,0.092105,,
2011-03,1.0,0.190909,0.254545,0.218182,0.231818,0.177273,0.263636,0.238636,0.288636,0.088636,,,
2011-04,1.0,0.227425,0.220736,0.210702,0.207358,0.237458,0.230769,0.26087,0.083612,,,,
2011-05,1.0,0.236559,0.172043,0.172043,0.215054,0.243728,0.265233,0.103943,,,,,
2011-06,1.0,0.208511,0.187234,0.27234,0.246809,0.33617,0.102128,,,,,,
2011-07,1.0,0.209424,0.204188,0.230366,0.272251,0.115183,,,,,,,
2011-08,1.0,0.251497,0.251497,0.251497,0.137725,,,,,,,,
2011-09,1.0,0.298658,0.325503,0.120805,,,,,,,,,


In [46]:
cohort_matrix_df = customers.get_cohort_matrix(transaction_items, period='M', percentage=False)
cohort_matrix_df

periods,0,1,2,3,4,5,6,7,8,9,10,11,12
acquisition_cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-12,948.0,362.0,317.0,367.0,341.0,376.0,360.0,336.0,336.0,374.0,354.0,474.0,260.0
2011-01,421.0,101.0,119.0,102.0,138.0,126.0,110.0,108.0,131.0,146.0,155.0,63.0,
2011-02,380.0,94.0,73.0,106.0,102.0,94.0,97.0,107.0,98.0,119.0,35.0,,
2011-03,440.0,84.0,112.0,96.0,102.0,78.0,116.0,105.0,127.0,39.0,,,
2011-04,299.0,68.0,66.0,63.0,62.0,71.0,69.0,78.0,25.0,,,,
2011-05,279.0,66.0,48.0,48.0,60.0,68.0,74.0,29.0,,,,,
2011-06,235.0,49.0,44.0,64.0,58.0,79.0,24.0,,,,,,
2011-07,191.0,40.0,39.0,44.0,52.0,22.0,,,,,,,
2011-08,167.0,42.0,42.0,42.0,23.0,,,,,,,,
2011-09,298.0,89.0,97.0,36.0,,,,,,,,,


### 4. Create a customer "retention" dataset

In [48]:
retention_df = customers.get_retention(transactions_df)
retention_df

Unnamed: 0,acquisition_cohort,order_cohort,customers,periods
0,2010-12,2010-12,948,0
1,2010-12,2011-01,362,1
2,2010-12,2011-02,317,2
3,2010-12,2011-03,367,3
4,2010-12,2011-04,341,4
...,...,...,...,...
86,2011-10,2011-11,93,1
87,2011-10,2011-12,46,2
88,2011-11,2011-11,321,0
89,2011-11,2011-12,43,1


### 5. Create an RFM dataset

In [49]:
rfm_df = customers.get_rfm_segments(customers_df)
rfm_df.head()

Unnamed: 0,customer_id,acquisition_date,recency_date,recency,frequency,monetary,heterogeneity,tenure,r,f,m,h,rfm,rfm_score,rfm_segment_name
0,12346.0,2011-01-18 10:01:00,2011-01-18 10:17:00,4232,2,0.0,1,4232,1,1,1,1,111,3,Risky
1,12350.0,2011-02-02 16:01:00,2011-02-02 16:01:00,4216,1,334.4,1,4216,1,1,1,1,111,3,Risky
2,12373.0,2011-02-01 13:10:00,2011-02-01 13:10:00,4218,1,364.6,1,4218,1,1,1,1,111,3,Risky
3,12377.0,2010-12-20 09:37:00,2011-01-28 15:45:00,4221,2,1628.12,2,4261,1,1,1,1,111,3,Risky
4,12386.0,2010-12-08 09:53:00,2011-01-06 12:37:00,4244,2,401.9,2,4273,1,1,1,1,111,3,Risky


In [50]:
rfm_df.groupby(by=['rfm_segment_name'])['customer_id'].nunique()

rfm_segment_name
Hold and improve     443
Loyal               3079
Potential loyal      531
Risky                303
Star                  16
Name: customer_id, dtype: int64

### 5.1 Create an RFM (H) dataset

In [51]:
rfm_df.groupby(by=['heterogeneity'])['customer_id'].nunique()

heterogeneity
1     1394
2      869
3      520
4      430
5      282
6      200
7      144
8      122
9       83
10      83
11      46
12      33
13      31
14      19
15      21
16      23
17      14
18      13
19      13
20       1
21       4
22       5
23       3
24       3
25       2
26       1
29       1
30       2
31       1
33       1
34       1
35       1
43       1
44       1
46       1
60       1
69       1
79       1
Name: customer_id, dtype: int64

### 6. Create a purchase latency dataset

In [52]:
latency_df = customers.get_latency(transactions_df)
latency_df.head()

Unnamed: 0,customer_id,frequency,recency_date,recency,avg_latency,min_latency,max_latency,std_latency,cv,days_to_next_order,label
0,12680.0,4,2011-12-09 12:50:00,3907,28,16,73,30.859898,1.102139,-3848.0,Order overdue
1,13113.0,24,2011-12-09 12:49:00,3907,15,0,52,12.060126,0.804008,-3880.0,Order overdue
2,15804.0,13,2011-12-09 12:31:00,3907,15,1,39,11.008261,0.733884,-3881.0,Order overdue
3,13777.0,33,2011-12-09 12:25:00,3907,11,0,48,12.055274,1.095934,-3884.0,Order overdue
4,17581.0,25,2011-12-09 12:21:00,3907,14,0,67,21.974293,1.569592,-3871.0,Order overdue


### 7. Customer ABC segmentation

In [53]:
abc_df = customers.get_abc_segments(customers_df, months=12, abc_class_name='abc_class_12m', abc_rank_name='abc_rank_12m')
abc_df.head()

Unnamed: 0,customer_id,abc_class_12m,abc_rank_12m
0,12346.0,D,1.0
1,12347.0,D,1.0
2,12348.0,D,1.0
3,12349.0,D,1.0
4,12350.0,D,1.0


### 8. Predict customer AOV, CLV, and orders
EcommerceTools allows you to predict the AOV, Customer Lifetime Value (CLV) and expected number of orders via the Gamma-Gamma and BG/NBD models from the excellent Lifetimes package. By passing the dataframe of transactions from get_transactions() to the get_customer_predictions() function, EcommerceTools will fit the BG/NBD and Gamma-Gamma models and predict the AOV, order quantity, and CLV for each customer in the defined number of future days after the end of the observation period.

In [54]:
customer_predictions = customers.get_customer_predictions(transactions_df, 
                                                          observation_period_end='2011-12-09', 
                                                          days=90)
customer_predictions.head(10)

Unnamed: 0,customer_id,predicted_purchases,aov,clv
0,12346.0,0.18883,,
1,12347.0,1.408736,569.978836,836.846896
2,12348.0,0.805907,333.784235,308.247354
3,12349.0,0.855607,,
4,12350.0,0.196304,,
5,12352.0,1.682277,376.175359,647.826169
6,12353.0,0.272541,,
7,12354.0,0.247183,,
8,12355.0,0.262909,,
9,12356.0,0.645368,324.039419,256.855226


# EcommerceTools > Advertising

In [55]:
from ecommercetools import advertising

### 1. Create paid search keywords

In [56]:
product_names = ['fly rods', 'fly reels']
keywords_prepend = ['buy', 'best', 'cheap', 'reduced']
keywords_append = ['for sale', 'price', 'promotion', 'promo', 'coupon', 'voucher', 'shop', 'suppliers']
campaign_name = 'fly_fishing'

keywords = advertising.generate_ad_keywords(product_names, keywords_prepend, keywords_append, campaign_name)
keywords.head()

Unnamed: 0,product,keywords,match_type,campaign_name
0,fly rods,[fly rods],Exact,fly_fishing
1,fly rods,[buy fly rods],Exact,fly_fishing
2,fly rods,[best fly rods],Exact,fly_fishing
3,fly rods,[cheap fly rods],Exact,fly_fishing
4,fly rods,[reduced fly rods],Exact,fly_fishing


### 2. Create paid search ad copy using Spintax

In [57]:
text = "Fly Reels from {Orvis|Loop|Sage|Airflo|Nautilus} for {trout|salmon|grayling|pike}"
spin = advertising.generate_spintax(text, single=False)

spin

['Fly Reels from Orvis for trout',
 'Fly Reels from Orvis for salmon',
 'Fly Reels from Orvis for grayling',
 'Fly Reels from Orvis for pike',
 'Fly Reels from Loop for trout',
 'Fly Reels from Loop for salmon',
 'Fly Reels from Loop for grayling',
 'Fly Reels from Loop for pike',
 'Fly Reels from Sage for trout',
 'Fly Reels from Sage for salmon',
 'Fly Reels from Sage for grayling',
 'Fly Reels from Sage for pike',
 'Fly Reels from Airflo for trout',
 'Fly Reels from Airflo for salmon',
 'Fly Reels from Airflo for grayling',
 'Fly Reels from Airflo for pike',
 'Fly Reels from Nautilus for trout',
 'Fly Reels from Nautilus for salmon',
 'Fly Reels from Nautilus for grayling',
 'Fly Reels from Nautilus for pike']