In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [3]:
df = pd.read_csv("cust_segmentation.csv")
df['orderDate'] = pd.to_datetime(df['orderDate'])

In [10]:
df.head(10)

Unnamed: 0,CustomerSK,Cust_Name,Email,Country,Phone,Order_ID,orderDate,Total_Amount
0,3669,Anallise Collum,acollumik@delicious.com,Portugal,164-390-2831,708,2022-11-30,1696.0
1,3161,Elvis Pinnion,epinnion4g@mac.com,Sweden,985-189-0287,547,2023-05-04,21709.8
2,1604,Giustina Guyver,gguyvergr@msn.com,France,790-103-1554,1868,2024-05-13,41035.29
3,1191,Salaidh Kennet,skennet5a@mail.ru,Indonesia,540-692-7755,1805,2023-12-26,38375.05
4,4538,Duffy Conybear,dconybearex@delicious.com,Democratic Republic of the Congo,489-817-1623,1953,2024-03-16,33464.34
5,575,Marj Coupe,mcoupefy@cargocollective.com,France,428-658-7645,517,2024-06-05,16642.5
6,701,Irwin Simmig,isimmigjg@free.fr,Venezuela,891-206-7625,2710,2024-09-30,16293.06
7,2896,Klemens Henzley,khenzleyov@desdev.cn,Uganda,915-128-4476,1291,2022-07-21,7252.96
8,2538,Lyssa Danovich,ldanovichex@hugedomains.com,Indonesia,569-723-8483,2594,2022-11-13,3111.81
9,520,Domenico Franseco,dfransecoef@chron.com,China,484-835-9697,1859,2024-09-27,7075.25


In [4]:
snapshot_date = df['orderDate'].max() + pd.Timedelta(days=1)

In [5]:
rfm = df.groupby('CustomerSK').agg({
    'orderDate': lambda x: (snapshot_date - x.max()).days,
    'Order_ID': 'nunique',
    'Total_Amount': 'sum'
}).reset_index()

rfm.columns = ['CustomerSK', 'Recency', 'Frequency', 'Monetary']


In [6]:
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(rfm[['Recency', 'Frequency', 'Monetary']])

In [7]:
kmeans = KMeans(n_clusters=4, random_state=42)
rfm['Segment'] = kmeans.fit_predict(rfm_scaled)

In [8]:
cluster_summary = rfm.groupby('Segment')[['Recency', 'Frequency', 'Monetary']].mean()
print(cluster_summary)

             Recency  Frequency      Monetary
Segment                                      
0        1091.140000   1.008750  4.224179e+05
1         302.238938   3.123894  1.331252e+06
2         481.978431   2.001961  8.459469e+05
3         362.424908   1.000000  4.198163e+05


In [17]:
segment_summary = rfm.groupby('Segment').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': 'mean',
    'CustomerSK': 'count'
}).rename(columns={'CustomerSK': 'Count'})
print(segment_summary)

             Recency  Frequency      Monetary  Count
Segment                                             
0        1091.140000   1.008750  4.224179e+05    800
1         302.238938   3.123894  1.331252e+06    113
2         481.978431   2.001961  8.459469e+05    510
3         362.424908   1.000000  4.198163e+05    819


In [24]:
rfm['Cluster_Name'] = rfm['Segment'].map({
    0: 'Inactive Customers',
    1: 'VIP Customers',
    2: 'Loyal Customers',
    3: 'Potential Returning Customers'
})

In [26]:
print(rfm[['CustomerSK', 'Segment', 'Cluster_Name']].head())

   CustomerSK  Segment                   Cluster_Name
0           6        0             Inactive Customers
1           7        0             Inactive Customers
2          10        3  Potential Returning Customers
3          11        1                  VIP Customers
4          12        2                Loyal Customers


In [27]:
rfm.to_csv("rfm_segments_named.csv", index=False)

In [30]:
df = pd.read_csv("rfm_segments_named.csv")
df.head(10)

Unnamed: 0,CustomerSK,Recency,Frequency,Monetary,Segment,SegmentName,Cluster_Name
0,6,788,1,428558.16,0,Inactive Customers,Inactive Customers
1,7,761,1,361370.17,0,Inactive Customers,Inactive Customers
2,10,642,1,408921.9,3,Potential Returning Customers,Potential Returning Customers
3,11,73,3,1439024.82,1,VIP Customers,VIP Customers
4,12,1036,2,819356.55,2,Loyal Customers,Loyal Customers
5,13,1173,1,572156.28,0,Inactive Customers,Inactive Customers
6,14,186,1,377388.67,3,Potential Returning Customers,Potential Returning Customers
7,16,671,2,683643.25,2,Loyal Customers,Loyal Customers
8,17,783,1,361653.54,0,Inactive Customers,Inactive Customers
9,19,1056,1,577390.37,0,Inactive Customers,Inactive Customers
