RFM_Analysis (Standard Tresholding method)

**Overview**

This notebook performs customer segmentation based on RFM (Recency, Frequency, and Monetary) analysis using manually tresholding of RFM scores. 

**Requirements**

- **Inputs**: Manually specify the reference date, analysis start and end dates, the tresholding for each cluster can be changed in the .py file that this notebook references based on changing needs
- **Outputs**: Cluster analysis report and a DataFrame/CSV output of at-risk customers.


In [None]:
# import 
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from rfm_analysis import *

In [2]:
# set custom analysis dates
ref_date = '2021-01-01'
start_date = '2020-01-01'
end_date = '2020-12-31'

# loading
df = pd.read_csv('data_final.csv')

# cleaning 
df['purchase_date'] = pd.to_datetime(df['purchase_date'])

# seleting required columns 
df = df[['customer_key', 'quantity_purchased', 'revenue', 'purchase_date', 'time_of_purchase']]

df.head(10)

  df = pd.read_csv('data_final.csv')
  df['purchase_date'] = pd.to_datetime(df['purchase_date'])


Unnamed: 0,customer_key,quantity_purchased,revenue,purchase_date,time_of_purchase
0,C001743,4,56,2014-05-25,16:20:00
1,C008827,11,22,2018-12-31,15:03:00
2,C008830,11,143,2015-12-21,12:28:00
3,C004301,5,160,2014-05-25,16:20:00
4,C008848,10,90,2020-12-22,19:51:00
5,C005060,5,19,2015-02-28,18:36:00
6,C008854,9,18,2020-04-16,4:56:00
7,C008874,4,136,2019-06-25,0:31:00
8,C008876,14,70,2016-12-15,11:16:00
9,C008881,11,66,2019-06-22,23:04:00


In [3]:
# creating cumulative columns for each customer based on purchase date
df['cumulative_revenue'] = df.groupby('customer_key')['revenue'].cumsum()
df['cumulative_quantity'] = df.groupby('customer_key')['quantity_purchased'].cumsum()

df = df.sort_values(by=['customer_key', 'purchase_date'])
df.head(100)

Unnamed: 0,customer_key,quantity_purchased,revenue,purchase_date,time_of_purchase,cumulative_revenue,cumulative_quantity
34860,C000001,11,143,2014-02-06,4:56:00,275,22
690838,C000001,7,77,2014-02-21,16:16:00,2281,309
292237,C000001,6,18,2014-02-22,19:43:00,1110,142
437355,C000001,2,10,2014-04-03,20:46:00,1475,191
717631,C000001,7,49,2014-05-18,16:50:00,2330,316
...,...,...,...,...,...,...,...
937055,C000002,9,45,2014-10-17,3:12:00,5606,629
890212,C000002,7,56,2014-10-21,12:10:00,5208,572
890213,C000002,11,77,2014-10-24,14:15:00,5285,583
718195,C000002,3,6,2014-10-25,8:24:00,3919,451


In [4]:
rfm_df = rfm_analysis(df, ref_date, start_date, end_date)
rfm_df.head(10)

Unnamed: 0,customer_key,purchase_date,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Segment,RFM_Score
0,C000001,2020-12-30,2,5,285,5,1,1,511,7
1,C000002,2020-11-22,40,12,587,1,1,2,112,4
2,C000003,2020-11-02,60,8,303,1,1,1,111,3
3,C000004,2020-12-21,11,17,389,4,4,1,441,9
4,C000005,2020-11-23,39,12,663,1,1,2,112,4
5,C000006,2020-11-18,44,21,1189,1,5,5,155,11
6,C000007,2020-12-20,12,19,1107,3,4,4,344,11
7,C000008,2020-12-24,8,12,475,4,1,1,411,6
8,C000009,2020-11-11,51,23,1491,1,5,5,155,11
9,C000010,2020-12-03,29,15,538,2,3,1,231,6


In [5]:
rfm_df_complex = label_customer_segments(rfm_df)
rfm_df_complex.head(10)

Unnamed: 0,customer_key,purchase_date,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Segment,RFM_Score,Segment
0,C000001,2020-12-30,2,5,285,5,1,1,511,7,New Customers
1,C000002,2020-11-22,40,12,587,1,1,2,112,4,Lost
2,C000003,2020-11-02,60,8,303,1,1,1,111,3,Lost
3,C000004,2020-12-21,11,17,389,4,4,1,441,9,Potential Loyalist
4,C000005,2020-11-23,39,12,663,1,1,2,112,4,Lost
5,C000006,2020-11-18,44,21,1189,1,5,5,155,11,Cannot Lose Them
6,C000007,2020-12-20,12,19,1107,3,4,4,344,11,Loyal
7,C000008,2020-12-24,8,12,475,4,1,1,411,6,New Customers
8,C000009,2020-11-11,51,23,1491,1,5,5,155,11,Cannot Lose Them
9,C000010,2020-12-03,29,15,538,2,3,1,231,6,About To Sleep


In [6]:
# rfm_df = assign_rfm_labels(rfm_df)
print(plot_treemap(rfm_df_complex))

None


In [7]:
analyze_clusters(rfm_df_complex,  rfm_columns=['Recency', 'Frequency', 'Monetary'], cluster_id_col='Segment')

Cluster Analysis Summary:
               Segment  Recency_mean  Recency_median  Recency_count  \
0       About To Sleep     21.978155            20.0            412   
1              At Risk     39.244287            33.0           1269   
2     Cannot Lose Them     54.638596            49.0            285   
3            Champions      4.745808             4.0           1133   
4          Hibernating     34.232355            30.0           1261   
5                 Lost     67.367707            59.0            737   
6                Loyal     13.013274            14.0            904   
7       Need Attention     10.207607            10.0            631   
8        New Customers      8.796569             8.0            816   
9                Other     52.666667            49.0             54   
10  Potential Loyalist      8.916515             8.0           1102   
11           Promising      5.800681             4.0            587   

    Frequency_mean  Frequency_median  Frequency_co

Unnamed: 0,Segment,Recency_mean,Recency_median,Recency_count,Frequency_mean,Frequency_median,Frequency_count,Monetary_mean,Monetary_median,Monetary_count
0,About To Sleep,21.978155,20.0,412,12.980583,13.0,412,580.878641,562.5,412
1,At Risk,39.244287,33.0,1269,17.661939,17.0,1269,1030.571316,988.0,1269
2,Cannot Lose Them,54.638596,49.0,285,15.045614,12.0,285,1046.873684,993.0,285
3,Champions,4.745808,4.0,1133,20.484554,20.0,1133,1276.843778,1230.0,1133
4,Hibernating,34.232355,30.0,1261,13.161776,14.0,1261,645.894528,657.0,1261
5,Lost,67.367707,59.0,737,10.90095,11.0,737,469.860244,480.0,737
6,Loyal,13.013274,14.0,904,18.793142,18.0,904,1163.892699,1124.0,904
7,Need Attention,10.207607,10.0,631,16.160063,16.0,631,985.049128,963.0,631
8,New Customers,8.796569,8.0,816,10.947304,11.0,816,507.691176,514.0,816
9,Other,52.666667,49.0,54,21.166667,21.0,54,1019.148148,1013.0,54


In [8]:
rfm_df_simple = label_customer_segments_simple(rfm_df)
print(rfm_df_simple.head(10))

  customer_key purchase_date  Recency  Frequency  Monetary R_Score F_Score  \
0      C000001    2020-12-30        2          5       285       5       1   
1      C000002    2020-11-22       40         12       587       1       1   
2      C000003    2020-11-02       60          8       303       1       1   
3      C000004    2020-12-21       11         17       389       4       4   
4      C000005    2020-11-23       39         12       663       1       1   
5      C000006    2020-11-18       44         21      1189       1       5   
6      C000007    2020-12-20       12         19      1107       3       4   
7      C000008    2020-12-24        8         12       475       4       1   
8      C000009    2020-11-11       51         23      1491       1       5   
9      C000010    2020-12-03       29         15       538       2       3   

  M_Score RFM_Segment  RFM_Score          Segment  
0       1         511          7        Low Value  
1       2         112          4     

In [9]:
# rfm_df = assign_rfm_labels(rfm_df)
plot_treemap(rfm_df_simple)

In [10]:
analyze_clusters(rfm_df_simple,  rfm_columns=['Recency', 'Frequency', 'Monetary'], cluster_id_col='Segment')

Cluster Analysis Summary:
           Segment  Recency_mean  Recency_median  Recency_count  \
0          At Risk     46.213836            39.0           1590   
1        Champions      5.087163             5.0           1262   
2        Low Value     19.108274            12.0           3251   
3  Loyal Customers     23.335168            19.0           3088   

   Frequency_mean  Frequency_median  Frequency_count  Monetary_mean  \
0       13.467925              13.0             1590     707.980503   
1       20.219493              20.0             1262    1251.185420   
2       12.440787              12.0             3251     615.004922   
3       17.901231              17.0             3088    1040.938472   

   Monetary_median  Monetary_count  
0            674.5            1590  
1           1192.0            1262  
2            598.0            3251  
3            979.0            3088  


Unnamed: 0,Segment,Recency_mean,Recency_median,Recency_count,Frequency_mean,Frequency_median,Frequency_count,Monetary_mean,Monetary_median,Monetary_count
0,At Risk,46.213836,39.0,1590,13.467925,13.0,1590,707.980503,674.5,1590
1,Champions,5.087163,5.0,1262,20.219493,20.0,1262,1251.18542,1192.0,1262
2,Low Value,19.108274,12.0,3251,12.440787,12.0,3251,615.004922,598.0,3251
3,Loyal Customers,23.335168,19.0,3088,17.901231,17.0,3088,1040.938472,979.0,3088


In [None]:
# output df / vsc of low-value customers
low_value_customers = rfm_df_simple[rfm_df_simple['Segment'] == 'At Risk']

#uncomment to save as csv
low_value_customers.to_csv('low_value_customers_simple.csv', index=False)
