<a href="https://colab.research.google.com/github/jeremysb1/predictive-analytics/blob/main/rfm_problem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Directory, Libraries, and Data

In [2]:
%cd /content/drive/MyDrive/Business Analyst Course/Segmentation/RFM

/content/drive/MyDrive/Business Analyst Course/Segmentation/RFM


In [3]:
#import libraries
import pandas as pd

In [4]:
#Data
df = pd.read_csv("customer_data.csv")
df.head()

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days
0,22086,777,5/14/2006,9,232
1,2290,1555,9/8/2006,16,115
2,26377,336,11/19/2006,5,43
3,24650,1189,10/29/2006,12,64
4,12883,1229,12/9/2006,12,23


#Preparing Dataframe

In [5]:
#preparing basket / monetary
df['monetary'] = df.revenue / df.number_of_orders
df.head(1)

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days,monetary
0,22086,777,5/14/2006,9,232,86.333333


In [6]:
#changing variable names
df.rename(columns = {
    'number_of_orders': 'frequency',
    'recency_days': 'recency'
}, inplace = True)

df.head(1)

Unnamed: 0,customer_id,revenue,most_recent_visit,frequency,recency,monetary
0,22086,777,5/14/2006,9,232,86.333333


In [7]:
#removing unwanted variables
df = df.drop(columns = ['most_recent_visit', 'revenue'])
df.head(1)

Unnamed: 0,customer_id,frequency,recency,monetary
0,22086,9,232,86.333333


In [8]:
#create groups and point system
df['F'] = pd.qcut(df['frequency'], q = 3, labels = range(1, 4, 1))
df['M'] = pd.qcut(df['monetary'], q = 3, labels = range(1, 4, 1))
df['R'] = pd.qcut(df['recency'], q = 3, labels = range(3, 0, -1))
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R
0,22086,9,232,86.333333,2,1,1
1,2290,16,115,97.1875,3,2,2
2,26377,5,43,67.2,1,1,3
3,24650,12,64,99.083333,3,2,3
4,12883,12,23,102.416667,3,2,3


In [9]:
#Create RFM Score
df['RFM'] = df[['R', 'F', 'M']].sum(axis = 1)
df.head(1)

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM
0,22086,9,232,86.333333,2,1,1,4


In [10]:
#Create RFM Function
def rfm_segment(df):
  if df['RFM'] >= 8:
    return 'SuperStar'
  elif ((df['RFM'] >= 5) and (df['RFM'] < 8)):
    return 'High Potential'
  else:
    return 'Low Relevance'

#RFM

In [11]:
#Applying function
df['RFM_level'] = df.apply(rfm_segment, axis = 1)
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM,RFM_level
0,22086,9,232,86.333333,2,1,1,4,Low Relevance
1,2290,16,115,97.1875,3,2,2,7,High Potential
2,26377,5,43,67.2,1,1,3,5,High Potential
3,24650,12,64,99.083333,3,2,3,8,SuperStar
4,12883,12,23,102.416667,3,2,3,8,SuperStar


In [12]:
#Aggregating the segments
df.groupby('RFM_level').agg({
    'recency': 'mean',
    'frequency': 'mean',
    'monetary': ['mean', 'count']
}).round(1)

Unnamed: 0_level_0,recency,frequency,monetary,monetary
Unnamed: 0_level_1,mean,mean,mean,count
RFM_level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
High Potential,171.8,9.8,97.0,26445
Low Relevance,306.6,7.1,78.5,7179
SuperStar,80.1,12.8,108.3,6375
