In [1]:
import joblib
import pandas as pd
import numpy as np

pd.set_option("display.float_format", "{:.2f}".format)


In [2]:
customer_df = joblib.load("../artifacts/customer_with_anomalies.pkl")
customer_df.head()


Unnamed: 0,CustomerID,Recency,Frequency,MonetaryValue,TotalQuantity,Cluster,DistanceToCentroid,IsAnomaly
0,12346.0,325,1,77183.6,74215,3,12.9,True
1,12347.0,1,7,4310.0,2458,0,0.66,False
2,12348.0,74,4,1797.24,2341,0,0.43,False
3,12349.0,18,1,1757.55,631,0,0.51,False
4,12350.0,309,1,334.4,197,2,0.63,False


In [3]:
customer_df.columns


Index(['CustomerID', 'Recency', 'Frequency', 'MonetaryValue', 'TotalQuantity',
       'Cluster', 'DistanceToCentroid', 'IsAnomaly'],
      dtype='object')

Cluster Size & Contribution

In [4]:
cluster_summary = customer_df.groupby("Cluster").agg(
    Customers=("CustomerID", "count"),
    Avg_Recency=("Recency", "mean"),
    Avg_Frequency=("Frequency", "mean"),
    Avg_Monetary=("MonetaryValue", "mean"),
    Avg_Quantity=("TotalQuantity", "mean"),
    Revenue_Contribution=("MonetaryValue", "sum")
)

cluster_summary


Unnamed: 0_level_0,Customers,Avg_Recency,Avg_Frequency,Avg_Monetary,Avg_Quantity,Revenue_Contribution
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3204,40.78,4.47,1677.1,991.42,5373414.97
1,7,5.71,65.43,184143.69,89650.43,1289005.83
2,1079,246.14,1.58,520.06,267.63,561139.86
3,48,16.54,42.85,35163.48,22395.52,1687847.24


Revenue Contribution %

In [5]:
total_revenue = customer_df["MonetaryValue"].sum()

cluster_summary["Revenue_%"] = (
    cluster_summary["Revenue_Contribution"] / total_revenue * 100
)

cluster_summary.sort_values("Revenue_%", ascending=False)


Unnamed: 0_level_0,Customers,Avg_Recency,Avg_Frequency,Avg_Monetary,Avg_Quantity,Revenue_Contribution,Revenue_%
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,3204,40.78,4.47,1677.1,991.42,5373414.97,60.3
3,48,16.54,42.85,35163.48,22395.52,1687847.24,18.94
1,7,5.71,65.43,184143.69,89650.43,1289005.83,14.46
2,1079,246.14,1.58,520.06,267.63,561139.86,6.3


Assign Business-Friendly Cluster Names

In [6]:
cluster_labels = {
    0: "Regular Active Customers",
    1: "VIP / Wholesale Customers",
    2: "Inactive / Churn-Risk Customers",
    3: "Loyal High-Value Customers"
}

customer_df["Cluster_Label"] = customer_df["Cluster"].map(cluster_labels)


Business View

In [7]:
business_view = customer_df.groupby("Cluster_Label").agg(
    Customers=("CustomerID", "count"),
    Avg_Recency=("Recency", "mean"),
    Avg_Frequency=("Frequency", "mean"),
    Avg_Monetary=("MonetaryValue", "mean"),
    Revenue=("MonetaryValue", "sum")
)

business_view["Revenue_%"] = (
    business_view["Revenue"] / total_revenue * 100
)

business_view.sort_values("Revenue_%", ascending=False)


Unnamed: 0_level_0,Customers,Avg_Recency,Avg_Frequency,Avg_Monetary,Revenue,Revenue_%
Cluster_Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Regular Active Customers,3204,40.78,4.47,1677.1,5373414.97,60.3
Loyal High-Value Customers,48,16.54,42.85,35163.48,1687847.24,18.94
VIP / Wholesale Customers,7,5.71,65.43,184143.69,1289005.83,14.46
Inactive / Churn-Risk Customers,1079,246.14,1.58,520.06,561139.86,6.3


Actionable Business Recommendations

## üìä Business Insights & Actions

### 1Ô∏è‚É£ VIP / Wholesale Customers
- Small customer base
- Extremely high revenue contribution
- Actions:
  - Dedicated account managers
  - Priority support
  - Volume-based discounts
  - Long-term contracts

---

### 2Ô∏è‚É£ Loyal High-Value Customers
- Consistent buyers with high engagement
- Strong repeat behavior
- Actions:
  - Loyalty rewards
  - Personalized offers
  - Upsell / cross-sell strategies

---

### 3Ô∏è‚É£ Regular Active Customers
- Largest customer base
- Moderate spending
- Actions:
  - Promotional nudges
  - Bundle offers
  - Engagement campaigns to move them to high-value segment

---

### 4Ô∏è‚É£ Inactive / Churn-Risk Customers
- Low engagement and long recency
- Minimal revenue contribution
- Actions:
  - Reactivation campaigns
  - Discount offers
  - Email reminders
  - Decide whether retention cost is justified


Anomaly Review

In [8]:
anomalies = customer_df[customer_df["IsAnomaly"]]

anomalies.groupby("Cluster_Label").agg(
    Anomaly_Count=("CustomerID", "count"),
    Avg_Monetary=("MonetaryValue", "mean"),
    Avg_Frequency=("Frequency", "mean")
)


Unnamed: 0_level_0,Anomaly_Count,Avg_Monetary,Avg_Frequency
Cluster_Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Inactive / Churn-Risk Customers,3,15770.04,12.67
Loyal High-Value Customers,48,35163.48,42.85
Regular Active Customers,159,8028.27,18.72
VIP / Wholesale Customers,7,184143.69,65.43


In [9]:
joblib.dump(customer_df, "../artifacts/customer_business_ready.pkl")
print("Business-ready dataset saved")


Business-ready dataset saved
