# Background

- The platform has 6 years worth of data.
- Problem: Issues with retention, reengagement, campaign targeting, and lack of general understanding of the user base
- Ask: How to group players based on recent behavior, general purchase behavior, and extreme purchase behavior

# Data Profile

- Business domain: scheduling platform for services
- Snapshot date: 2022-09-03
- Disclaimer: Data is private and have been masked. It will not be shared.

# Setup

In [35]:
import pandas as pd
import numpy as np

from copy import deepcopy

from scheduling_platform_segmentation.constants import QUANTILE_LIST
from scheduling_platform_segmentation.preprocess.rfm import (
    calculate_weighted_recency,
    
)


In [3]:
pdf_raw_user_by_vendor_by_store = pd.read_csv('user_data_by_vendor_by_store.csv')
pdf_raw_user_by_vendor_by_store = pdf_raw_user_by_vendor_by_store[[c for c in pdf_raw_user_by_vendor_by_store.columns if c != 'Unnamed: 0']]
pdf_raw_user_by_vendor_by_store

Unnamed: 0,user_id,install_date,vendor_id,store_id,tenure,store_days_since_last_transaction,store_num_transactions,store_total_dollar_spend
0,142488,2022-07-23,195,269,41.737250,12.899776,2,15400.0
1,133793,2022-06-01,195,269,93.859821,20.668556,3,33000.0
2,139901,2022-07-09,195,269,55.806231,55.806223,1,33000.0
3,134030,2022-06-02,195,269,92.660766,63.704679,3,19600.0
4,140978,2022-07-14,195,269,50.750846,23.745670,3,13400.0
...,...,...,...,...,...,...,...,...
92250,110297,2021-11-29,139,219,277.861396,81.845761,2,50300.0
92251,108382,2021-11-15,139,219,291.649244,88.853760,2,59196.0
92252,123325,2022-03-22,139,219,164.875360,95.752685,1,67050.0
92253,17108,2018-08-22,146,226,1472.914944,415.731643,1,500.0


In [4]:
# pdf_raw_user_purchase_behavior = pd.read_csv('user_data_purchase_behavior.csv')
# pdf_raw_user_purchase_behavior = pdf_raw_user_purchase_behavior[[c for c in pdf_raw_user_purchase_behavior.columns if c != 'Unnamed: 0']]
# pdf_raw_user_purchase_behavior

# Recent Purchase Behavior

In [13]:
pdf_raw_user_by_vendor_by_store



Unnamed: 0,user_id,install_date,vendor_id,store_id,tenure,store_days_since_last_transaction,store_num_transactions,store_total_dollar_spend
0,142488,2022-07-23,195,269,41.737250,12.899776,2,15400.0
1,133793,2022-06-01,195,269,93.859821,20.668556,3,33000.0
2,139901,2022-07-09,195,269,55.806231,55.806223,1,33000.0
3,134030,2022-06-02,195,269,92.660766,63.704679,3,19600.0
4,140978,2022-07-14,195,269,50.750846,23.745670,3,13400.0
...,...,...,...,...,...,...,...,...
92250,110297,2021-11-29,139,219,277.861396,81.845761,2,50300.0
92251,108382,2021-11-15,139,219,291.649244,88.853760,2,59196.0
92252,123325,2022-03-22,139,219,164.875360,95.752685,1,67050.0
92253,17108,2018-08-22,146,226,1472.914944,415.731643,1,500.0


In [14]:
pdf_user_level = (
    pdf_raw_user_by_vendor_by_store
    .groupby('user_id')
    .agg(install_date=('install_date', 'min'),
         cnt_vendors=('vendor_id', 'nunique'),
         tenure=('tenure', 'max'),
         recency=('store_days_since_last_transaction', 'min'),
         frequency=('store_num_transactions', 'sum'),
         monetary=('store_total_dollar_spend', 'sum')
         )
    )

pdf_user_level['weighted_recency'] = pdf_user_level.apply(lambda x: calculate_weighted_recency(x), axis=1)
pdf_user_level['weighted_recency'] = pdf_user_level.apply(lambda x: clean_weighted_recency(x), axis=1)
pdf_user_level


Unnamed: 0_level_0,install_date,cnt_vendors,tenure,recency,frequency,monetary,weighted_recency
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,2016-03-31,1,2346.602576,70.874920,3,1495.0,0.940506
52,2016-07-15,1,2240.230525,1574.828366,1,300.0,0.088223
56,2016-09-01,6,2192.329370,535.607214,14,7480.0,0.571068
57,2016-09-08,4,2185.122283,1742.387525,6,4098.0,0.041052
73,2016-11-11,1,2121.104485,939.375335,2,755.0,0.310393
...,...,...,...,...,...,...,...
148312,2022-09-02,1,0.724430,0.724201,1,550.0,0.999684
148333,2022-09-02,1,0.619158,0.619144,1,3028.0,0.999978
148347,2022-09-02,1,0.544632,0.544619,1,798.0,0.999977
148363,2022-09-02,1,0.499832,0.499819,1,3028.0,0.999974


In [54]:
dict_quantile = generate_quantile_dictionary(pdf_user_level, quantile_list=QUANTILE_LIST) # save this into database

In [69]:
pdf_rfm = deepcopy(pdf_user_level)
for metric in dict_quantile.keys():
    if metric in ('weighted_recency', 'recency'):
        reverse = 1
    else:
        reverse = 0

    conditions, values = generate_conditions(pdf_rfm, dict_quantile, metric, reverse=reverse)
    pdf_rfm[metric[0]] = np.select(conditions, values)

pdf_rfm

Unnamed: 0_level_0,install_date,cnt_vendors,tenure,recency,frequency,monetary,weighted_recency,r,w,f,m
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
5,2016-03-31,1,2346.602576,70.874920,3,1495.0,0.940506,5,1,5,4
52,2016-07-15,1,2240.230525,1574.828366,1,300.0,0.088223,1,1,1,2
56,2016-09-01,6,2192.329370,535.607214,14,7480.0,0.571068,4,1,5,5
57,2016-09-08,4,2185.122283,1742.387525,6,4098.0,0.041052,1,1,5,5
73,2016-11-11,1,2121.104485,939.375335,2,755.0,0.310393,2,1,4,3
...,...,...,...,...,...,...,...,...,...,...,...
148312,2022-09-02,1,0.724430,0.724201,1,550.0,0.999684,5,1,1,3
148333,2022-09-02,1,0.619158,0.619144,1,3028.0,0.999978,5,1,1,5
148347,2022-09-02,1,0.544632,0.544619,1,798.0,0.999977,5,1,1,4
148363,2022-09-02,1,0.499832,0.499819,1,3028.0,0.999974,5,1,1,5


In [71]:
weighted_labels = generate_trad_rf_segments(pdf_rfm, weighted=1)
normal_labels = generate_trad_rf_segments(pdf_rfm, weighted=0)

In [74]:
pdf_rfm_labeled = deepcopy(pdf_rfm)
pdf_rfm_labeled['rfm'] = normal_labels
pdf_rfm_labeled['wfm'] = weighted_labels

In [75]:
pdf_rfm_labeled

Unnamed: 0_level_0,install_date,cnt_vendors,tenure,recency,frequency,monetary,weighted_recency,r,w,f,m,rfm,wfm
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
5,2016-03-31,1,2346.602576,70.874920,3,1495.0,0.940506,5,1,5,4,champions,cant_lose
52,2016-07-15,1,2240.230525,1574.828366,1,300.0,0.088223,1,1,1,2,hibernating,hibernating
56,2016-09-01,6,2192.329370,535.607214,14,7480.0,0.571068,4,1,5,5,loyal_customers,cant_lose
57,2016-09-08,4,2185.122283,1742.387525,6,4098.0,0.041052,1,1,5,5,cant_lose,cant_lose
73,2016-11-11,1,2121.104485,939.375335,2,755.0,0.310393,2,1,4,3,at_risk,at_risk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
148312,2022-09-02,1,0.724430,0.724201,1,550.0,0.999684,5,1,1,3,new_customers,hibernating
148333,2022-09-02,1,0.619158,0.619144,1,3028.0,0.999978,5,1,1,5,new_customers,hibernating
148347,2022-09-02,1,0.544632,0.544619,1,798.0,0.999977,5,1,1,4,new_customers,hibernating
148363,2022-09-02,1,0.499832,0.499819,1,3028.0,0.999974,5,1,1,5,new_customers,hibernating
