In [None]:
# Customer Segmentation Dashboard: Phase 4 - Segmenting Customers

**Objective**: Assign RFM-based scores and segments to customers for actionable insights.

**Steps**:
1. Load RFM data from SQLite.
2. Score RFM metrics using quartiles.
3. Assign customer segments based on RFM scores.
4. Validate and explore segment distribution.
5. Save segmented data for visualization.

In [1]:
import pandas as pd
import sqlite3

# Connect to SQLite database
db_path = '../ecommerce_data.db'
conn = sqlite3.connect(db_path)

# Load RFM data
rfm = pd.read_sql_query('SELECT * FROM rfm', conn)
print('Loaded RFM data shape:', rfm.shape)
rfm.head()

Loaded RFM data shape: (4372, 4)


Unnamed: 0,CustomerID,Recency,Frequency,Monetary
0,12346.0,326,2,0.0
1,12347.0,2,7,4310.0
2,12348.0,75,4,1797.24
3,12349.0,19,1,1757.55
4,12350.0,310,1,334.4


In [None]:
## Step 1: Score RFM Metrics Using Quartiles

- Recency: Lower is better (score 1-4, 1=most recent).
- Frequency: Higher is better (score 1-4, 4=most frequent).
- Monetary: Higher is better (score 1-4, 4=highest spenders).

In [3]:
# Score RFM metrics using quartiles
rfm['R_Score'] = pd.qcut(rfm['Recency'], 4, labels=[4,3,2,1]).astype(int)
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=[1,2,3,4]).astype(int)
rfm['M_Score'] = pd.qcut(rfm['Monetary'], 4, labels=[1,2,3,4]).astype(int)

# Combine scores
rfm['RFM_Score'] = rfm['R_Score'].astype(str) + rfm['F_Score'].astype(str) + rfm['M_Score'].astype(str)
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Score
0,12346.0,326,2,0.0,1,2,1,121
1,12347.0,2,7,4310.0,4,4,4,444
2,12348.0,75,4,1797.24,2,3,4,234
3,12349.0,19,1,1757.55,3,1,4,314
4,12350.0,310,1,334.4,1,1,2,112


In [None]:
## Step 2: Assign Customer Segments

Define segments based on RFM scores. Example mapping:
- 444: 'Champions'
- 441, 442, 443: 'Loyal Customers'
- 411, 412, 413, 414: 'Potential Loyalist'
- 111: 'At Risk'
- Others: 'Others'

(You can customize this mapping as needed for your business logic.)

In [4]:
def segment_customer(row):
    if row['RFM_Score'] == '444':
        return 'Champions'
    elif row['RFM_Score'] in ['441','442','443']:
        return 'Loyal Customers'
    elif row['RFM_Score'] in ['411','412','413','414']:
        return 'Potential Loyalist'
    elif row['RFM_Score'] == '111':
        return 'At Risk'
    else:
        return 'Others'

rfm['Segment'] = rfm.apply(segment_customer, axis=1)
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Score,Segment
0,12346.0,326,2,0.0,1,2,1,121,Others
1,12347.0,2,7,4310.0,4,4,4,444,Champions
2,12348.0,75,4,1797.24,2,3,4,234,Others
3,12349.0,19,1,1757.55,3,1,4,314,Others
4,12350.0,310,1,334.4,1,1,2,112,Others


In [None]:
## Step 3: Validate and Explore Segment Distribution

In [5]:
# Segment distribution
segment_counts = rfm['Segment'].value_counts()
print(segment_counts)

# Optionally, explore average RFM values per segment
print(rfm.groupby('Segment')[['Recency','Frequency','Monetary']].mean())

Segment
Others                3374
Champions              496
At Risk                320
Loyal Customers        110
Potential Loyalist      72
Name: count, dtype: int64
                       Recency  Frequency     Monetary
Segment                                               
At Risk             271.065625   1.000000   127.254312
Champions             6.854839  18.866935  8791.427016
Loyal Customers       7.836364   7.681818  1160.632636
Others               92.103734   3.436574  1104.221572
Potential Loyalist    9.291667   1.000000   332.459306


In [None]:
## Step 4: Save Segmented Data

Save the segmented RFM data to SQLite and as a CSV for visualization.

In [6]:
# Save segmented data
rfm.to_sql('rfm_segmented', conn, if_exists='replace', index=False)
rfm.to_csv('../data/rfm_segmented.csv', index=False)
print('Segmented RFM data saved to SQLite and CSV.')

# Close connection
conn.close()

Segmented RFM data saved to SQLite and CSV.


In [None]:
## Summary

- RFM scores assigned and segments defined.
- Segment distribution validated.
- Segmented data saved for visualization in the next phase.