### Import Libraries

In [7]:
# Import Libraries

# Data wrangling
import numpy as np
import pandas as pd

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Import warning
import warnings
warnings.filterwarnings("ignore")

In [8]:
### Overview

In [9]:
df = pd.read_csv("data/rfm_data.csv")

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   CustomerID          1000 non-null   int64  
 1   PurchaseDate        1000 non-null   object 
 2   TransactionAmount   1000 non-null   float64
 3   ProductInformation  1000 non-null   object 
 4   OrderID             1000 non-null   int64  
 5   Location            1000 non-null   object 
dtypes: float64(1), int64(2), object(3)
memory usage: 47.0+ KB


### Preprocessing

**Get RFM Values**

In [18]:
# Check last date
max_date = pd.to_datetime(df["PurchaseDate"].max())

In [19]:
# Insert to dataframe
df["LastPurchase"] = max_date

In [20]:
# Change to datetime
df["PurchaseDate"] = pd.to_datetime(df["PurchaseDate"])

In [22]:
# Get Recency
df["Recency"] = df["LastPurchase"] - df["PurchaseDate"]
df["Recency"] = df["Recency"].dt.days

In [25]:
df.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,LastPurchase,Recency
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,2023-06-10,60
1,2188,2023-04-11,463.7,Product A,176819,London,2023-06-10,60
2,4608,2023-04-11,80.28,Product A,340062,New York,2023-06-10,60
3,2559,2023-04-11,221.29,Product A,239145,London,2023-06-10,60
4,9482,2023-04-11,739.56,Product A,194545,Paris,2023-06-10,60


In [24]:
# Get Frequency
freq = df.groupby("CustomerID", as_index=False)["OrderID"].count()
freq = freq.rename(columns={"OrderID":"Frequency"})

In [26]:
# Merge Frequency vals to original df
df_new = pd.merge(df, freq, on="CustomerID")

In [32]:
mone = df.groupby("CustomerID", as_index=False)["TransactionAmount"].sum()
mone = mone.rename(columns={"TransactionAmount":"Monetary"})

In [33]:
# Merge Monetary vals to original df
df_rfm = pd.merge(df_new, mone, on="CustomerID")

In [34]:
df_rfm.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,LastPurchase,Recency,Frequency,Monetary
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,2023-06-10,60,1,943.31
1,2188,2023-04-11,463.7,Product A,176819,London,2023-06-10,60,1,463.7
2,4608,2023-04-11,80.28,Product A,340062,New York,2023-06-10,60,1,80.28
3,2559,2023-04-11,221.29,Product A,239145,London,2023-06-10,60,1,221.29
4,9482,2023-04-11,739.56,Product A,194545,Paris,2023-06-10,60,1,739.56


### Get RFM Score

In [37]:
df_rfm[["Recency", "Frequency", "Monetary"]].quantile([0.25, 0.5, 0.75])

Unnamed: 0,Recency,Frequency,Monetary
0.25,15.0,1.0,283.925
0.5,32.0,1.0,566.71
0.75,45.0,1.0,805.3725


In [38]:
df_rfm.to_csv("df_rfm_cleaned.csv")

![](https://blog.rsquaredacademy.com/img/rfm_segments_table.png)