# Background

- The platform has 6 years worth of data.
- Problem: Issues with retention, reengagement, campaign targeting, and lack of general understanding of the user base
- Ask: How to group players based on recent behavior, general purchase behavior, and extreme purchase behavior

# Data Profile

- Business domain: scheduling platform for services
- Snapshot date: 2022-09-03
- Disclaimer: Data is private and have been masked. It will not be shared.

# Setup

In [1]:
%pip install ../.

Processing c:\users\franc\git-repo\scheduling-platform-segmentation
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: scheduling-platform-segmentation
  Building wheel for scheduling-platform-segmentation (setup.py): started
  Building wheel for scheduling-platform-segmentation (setup.py): finished with status 'done'
  Created wheel for scheduling-platform-segmentation: filename=scheduling_platform_segmentation-0.0.1-py3-none-any.whl size=4439 sha256=2503801bf172ba6bb2d0c8b55045d6ebac87278be6752a9702de4ac005af86ae
  Stored in directory: c:\users\franc\appdata\local\pip\cache\wheels\0f\b0\cc\22072402b1c65edb72b7efff9548c7464e42d8420b35ab478b
Successfully built scheduling-platform-segmentation
Installing collected packages: scheduling-platform-segmentation
  Attempting uninstall: scheduling-platform-segmentation
    Found existing installation: scheduling-platform-segmentation 0.0.1
    Uninstalli

In [2]:
import pandas as pd
import numpy as np

from copy import deepcopy

from scheduling_platform_segmentation.constants import QUANTILE_LIST, TRAD_RF_SEGMENTS_LIST
from scheduling_platform_segmentation.preprocess.rfm import (
    generate_quantile_dictionary,
    generate_conditions,
    generate_trad_rf_segments
)

import warnings
warnings.filterwarnings("ignore")

In [3]:
pdf_raw_user_by_vendor_by_store = pd.read_csv('user_data_by_vendor_by_store.csv')
pdf_raw_user_by_vendor_by_store = pdf_raw_user_by_vendor_by_store[[c for c in pdf_raw_user_by_vendor_by_store.columns if c != 'Unnamed: 0']]
pdf_raw_user_by_vendor_by_store

Unnamed: 0,user_id,install_date,vendor_id,store_id,tenure,store_days_since_last_transaction,store_num_transactions,store_total_dollar_spend
0,142488,2022-07-23,195,269,41.737250,12.899776,2,15400.0
1,133793,2022-06-01,195,269,93.859821,20.668556,3,33000.0
2,139901,2022-07-09,195,269,55.806231,55.806223,1,33000.0
3,134030,2022-06-02,195,269,92.660766,63.704679,3,19600.0
4,140978,2022-07-14,195,269,50.750846,23.745670,3,13400.0
...,...,...,...,...,...,...,...,...
92250,110297,2021-11-29,139,219,277.861396,81.845761,2,50300.0
92251,108382,2021-11-15,139,219,291.649244,88.853760,2,59196.0
92252,123325,2022-03-22,139,219,164.875360,95.752685,1,67050.0
92253,17108,2018-08-22,146,226,1472.914944,415.731643,1,500.0


In [4]:
# pdf_raw_user_purchase_behavior = pd.read_csv('user_data_purchase_behavior.csv')
# pdf_raw_user_purchase_behavior = pdf_raw_user_purchase_behavior[[c for c in pdf_raw_user_purchase_behavior.columns if c != 'Unnamed: 0']]
# pdf_raw_user_purchase_behavior

# Recent Purchase Behavior

In [5]:
pdf_raw_user_by_vendor_by_store

Unnamed: 0,user_id,install_date,vendor_id,store_id,tenure,store_days_since_last_transaction,store_num_transactions,store_total_dollar_spend
0,142488,2022-07-23,195,269,41.737250,12.899776,2,15400.0
1,133793,2022-06-01,195,269,93.859821,20.668556,3,33000.0
2,139901,2022-07-09,195,269,55.806231,55.806223,1,33000.0
3,134030,2022-06-02,195,269,92.660766,63.704679,3,19600.0
4,140978,2022-07-14,195,269,50.750846,23.745670,3,13400.0
...,...,...,...,...,...,...,...,...
92250,110297,2021-11-29,139,219,277.861396,81.845761,2,50300.0
92251,108382,2021-11-15,139,219,291.649244,88.853760,2,59196.0
92252,123325,2022-03-22,139,219,164.875360,95.752685,1,67050.0
92253,17108,2018-08-22,146,226,1472.914944,415.731643,1,500.0


In [6]:
pdf_user_level = (
    pdf_raw_user_by_vendor_by_store
    .groupby('user_id')
    .agg(install_date=('install_date', 'min'),
         cnt_vendors=('vendor_id', 'nunique'),
         tenure=('tenure', 'max'),
         recency=('store_days_since_last_transaction', 'min'),
         frequency=('store_num_transactions', 'sum'),
         monetary=('store_total_dollar_spend', 'sum')
         )
    )

In [7]:
dict_quantile = generate_quantile_dictionary(pdf_user_level, QUANTILE_LIST) # save this into database

pdf_rfm_scored = deepcopy(pdf_user_level)
for metric in dict_quantile.keys():
    if metric in ('recency'):
        reverse = 1
    else:
        reverse = 0

    conditions, values = generate_conditions(pdf_rfm_scored, dict_quantile, metric, reverse=reverse)
    pdf_rfm_scored[metric[0]] = np.select(conditions, values)

In [8]:
pdf_rfm_labeled = generate_trad_rf_segments(pdf_rfm_scored)
pdf_rfm_labeled

Unnamed: 0,install_date,cnt_vendors,tenure,recency,frequency,monetary,t,r,f,m,wtd_rfm
106246,2021-10-30,1,307.799153,121.934572,6,90396.0,1,5,5,5,champions
106247,2021-10-30,1,307.797483,139.782477,1,3826.0,1,5,1,5,new_customers
106251,2021-10-30,1,307.784611,307.784600,1,42800.0,1,4,1,5,promising
106252,2021-10-30,1,307.776173,223.626908,2,1495.0,1,4,4,4,loyal_customers
106254,2021-10-30,1,307.769778,117.760101,2,1496.0,1,5,4,4,champions
...,...,...,...,...,...,...,...,...,...,...,...
34853,2019-03-09,1,1273.808965,985.000000,4,1745.0,5,2,5,4,cant_lose
34854,2019-03-09,1,1273.799046,1080.489532,2,790.0,5,2,4,3,at_risk
34855,2019-03-09,1,1273.781311,1147.726193,1,450.0,5,1,1,2,hibernating
34856,2019-03-09,1,1273.769372,1084.455102,2,845.0,5,2,4,4,at_risk


In [20]:
pdf_summary = (
    pdf_rfm_labeled
    .groupby(['wtd_rfm', 't'])
    .describe()[[('tenure', 'count'), 
                ('recency', 'mean'), #('recency', '50%'), 
                ('frequency', 'mean'), #('frequency', '50%'), 
                ('monetary', 'mean'), #('monetary', '50%')
                ]]
    .reset_index()
    )

pdf_summary.columns = ['segment', 'tenure', 'cnt_users', 
                       'recency_mean', #'recency_median', 
                       'frequency_mean', #'frequency_median', 
                       'monetary_mean', #'monetary_median'
                       ]

pdf_summary = pdf_summary.sort_values(['tenure', 'segment']).set_index(['tenure', 'segment'])
pdf_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,cnt_users,recency_mean,frequency_mean,monetary_mean
tenure,segment,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,champions,2397.0,67.300643,2.7864,7722.003087
1,loyal_customers,209.0,235.982359,2.157895,6870.30622
1,new_customers,11088.0,91.972988,1.0,2221.260732
1,promising,3713.0,251.336164,1.0,2293.652572
2,about_to_sleep,5486.0,653.309957,1.0,254.478163
2,champions,1556.0,77.952291,4.016067,9740.802314
2,loyal_customers,2138.0,487.562068,2.624415,1991.68363
2,new_customers,284.0,96.500293,1.0,3262.56338
2,promising,7943.0,509.049597,1.0,964.61118
3,about_to_sleep,8148.0,730.733086,1.0,249.491163
