# Complete Customer Analytics: CLTV, RFM Analysis, and KMeans Clustering

This notebook performs comprehensive customer segmentation analysis using:
- **RFM Analysis** (Recency, Frequency, Monetary)
- **Customer Lifetime Value (CLTV)** calculation
- **KMeans Clustering** for data-driven segmentation

Dataset: Canteen Shop Transaction Data

## Setup: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("="*60)
print("CUSTOMER ANALYTICS: CLTV, RFM, AND KMEANS CLUSTERING")
print("="*60)

## STEP 1: Load and Explore Data

In [None]:
print("\n[STEP 1] Loading and exploring data...")

# Load dataset
canteen_sales = pd.read_csv('../data/canteen_shop_data.csv')
df = canteen_sales.copy()

print(f"\nDataset Shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nColumn Info:")
print(df.info())
print(f"\nMissing Values:")
print(df.isnull().sum())
print(f"\nBasic Statistics:")
print(df.describe())

## STEP 2: Clean Data

In [None]:
print("\n[STEP 2] Cleaning data...")

# Remove negative quantities (returns)
df = df[df['Quantity'] > 0]
# Remove zero or negative prices
df = df[df['Price'] > 0]
# Calculate total price
df['TotalPrice'] = df['Quantity'] * df['Price']

print(f"\nClean Dataset Shape: {df.shape}")
print(f"Number of Unique Customers: {df['Customer ID'].nunique()}")
print(f"Date Range: {df['Date'].min()} to {df['Date'].max()}")

## STEP 3: Calculate RFM Metrics

In [None]:
print("\n[STEP 3] Calculating RFM metrics...")

# Set snapshot date (day after last transaction)
snapshot_date = pd.to_datetime(df['Date']).max() + pd.Timedelta(days=1)

# Calculate RFM
rfm = df.groupby('Customer ID').agg({
    'Date': lambda x: (snapshot_date - pd.to_datetime(x.max())).days,
    'Item': 'count',
    'Total': 'sum'
}).reset_index()

rfm.columns = ['Customer ID', 'Recency', 'Frequency', 'Monetary']

print("\nRFM Summary Statistics:")
print(rfm.describe())

# Create RFM Scores (1-5 for R and F, 1-3 for M due to data distribution)
rfm['R_Score'] = pd.qcut(rfm['Recency'], 5, labels=[5, 4, 3, 2, 1])
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 5, labels=[1, 2, 3, 4, 5])
rfm['M_Score'] = pd.qcut(rfm['Monetary'], 3, labels=[1, 2, 3], duplicates='drop')

# Create RFM Segment
rfm['RFM_Score'] = rfm['R_Score'].astype(str) + rfm['F_Score'].astype(str) + rfm['M_Score'].astype(str)
rfm['RFM_Segment'] = rfm['R_Score'].astype(int) + rfm['F_Score'].astype(int) + rfm['M_Score'].astype(int)

# Create customer segments
def segment_customer(row):
    if row['RFM_Segment'] >= 10 and row['R_Score'] >= 4:
        return 'Champions'
    elif row['RFM_Segment'] >= 7 and row['R_Score'] >= 3:
        return 'Loyal Customers'
    elif row['F_Score'] >= 3 and row['R_Score'] >= 3:
        return 'Potential Loyalists'
    elif row['R_Score'] >= 4:
        return 'Recent Customers'
    elif row['RFM_Segment'] >= 6 and row['R_Score'] <= 2:
        return 'At Risk'
    elif row['F_Score'] >= 2 and row['R_Score'] <= 2:
        return 'Cant Lose Them'
    elif row['R_Score'] <= 2:
        return 'Lost'
    else:
        return 'Others'

rfm['Customer_Segment'] = rfm.apply(segment_customer, axis=1)

print("\nCustomer Segment Distribution:")
print(rfm['Customer_Segment'].value_counts())

## STEP 4: Visualize RFM Distributions

In [None]:
print("\n[STEP 4] Creating RFM distribution visualizations...")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('RFM Analysis: Distribution of Key Metrics', fontsize=16, fontweight='bold')

# Recency distribution
axes[0, 0].hist(rfm['Recency'], bins=50, color='#FF7043', edgecolor='black', alpha=0.7)
axes[0, 0].set_title('Recency Distribution (Days Since Last Purchase)', fontweight='bold')
axes[0, 0].set_xlabel('Days')
axes[0, 0].set_ylabel('Number of Customers')
axes[0, 0].axvline(rfm['Recency'].median(), color='red', linestyle='--', linewidth=2, label=f'Median: {rfm["Recency"].median():.0f} days')
axes[0, 0].legend()

# Frequency distribution
axes[0, 1].hist(rfm['Frequency'], bins=50, color='#00BFA5', edgecolor='black', alpha=0.7)
axes[0, 1].set_title('Frequency Distribution (Number of Purchases)', fontweight='bold')
axes[0, 1].set_xlabel('Number of Transactions')
axes[0, 1].set_ylabel('Number of Customers')
axes[0, 1].axvline(rfm['Frequency'].median(), color='red', linestyle='--', linewidth=2, label=f'Median: {rfm["Frequency"].median():.0f} purchases')
axes[0, 1].legend()

# Monetary distribution (log scale)
axes[1, 0].hist(np.log10(rfm['Monetary']), bins=50, color='#FFC107', edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Monetary Distribution (Log Scale)', fontweight='bold')
axes[1, 0].set_xlabel('Log10(Total Spend)')
axes[1, 0].set_ylabel('Number of Customers')
axes[1, 0].axvline(np.log10(rfm['Monetary'].median()), color='red', linestyle='--', linewidth=2, label=f'Median: £{rfm["Monetary"].median():.2f}')
axes[1, 0].legend()

# Customer Segments
segment_counts = rfm['Customer_Segment'].value_counts()
colors_palette = ['#FF7043', '#00BFA5', '#FFC107', '#42A5F5', '#AB47BC', '#66BB6A', '#FFA726', '#EC407A']
axes[1, 1].barh(segment_counts.index, segment_counts.values, color=colors_palette[:len(segment_counts)])
axes[1, 1].set_title('Customer Segments Distribution', fontweight='bold')
axes[1, 1].set_xlabel('Number of Customers')
axes[1, 1].set_ylabel('Segment')

for i, v in enumerate(segment_counts.values):
    axes[1, 1].text(v, i, f' {v}', va='center', fontweight='bold')

plt.tight_layout()
plt.show()

print("RFM distribution visualization complete!")

## STEP 5: Segment Analysis

In [None]:
print("\n[STEP 5] Analyzing segments...")

segment_summary = rfm.groupby('Customer_Segment').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': 'mean',
    'Customer ID': 'count'
}).round(2)
segment_summary.columns = ['Avg_Recency', 'Avg_Frequency', 'Avg_Monetary', 'Count']
segment_summary = segment_summary.sort_values('Avg_Monetary', ascending=False)

print("\nSegment Analysis Summary:")
print(segment_summary)

# Create heatmap and segment value visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Heatmap: Average metrics by segment
segment_metrics = rfm.groupby('Customer_Segment')[['Recency', 'Frequency', 'Monetary']].mean()
segment_metrics_normalized = (segment_metrics - segment_metrics.min()) / (segment_metrics.max() - segment_metrics.min())

sns.heatmap(segment_metrics_normalized.T, annot=True, fmt='.2f', cmap='RdYlGn_r',
            linewidths=1, cbar_kws={'label': 'Normalized Score'}, ax=axes[0])
axes[0].set_title('RFM Metrics by Customer Segment (Normalized)', fontweight='bold', fontsize=14)
axes[0].set_xlabel('Customer Segment', fontweight='bold')
axes[0].set_ylabel('RFM Metric', fontweight='bold')

# Segment value contribution
segment_value = rfm.groupby('Customer_Segment')['Monetary'].sum().sort_values(ascending=False)
colors_palette = ['#FF7043', '#00BFA5', '#FFC107', '#42A5F5', '#AB47BC', '#66BB6A', '#FFA726', '#EC407A']
axes[1].bar(range(len(segment_value)), segment_value.values, color=colors_palette[:len(segment_value)])
axes[1].set_xticks(range(len(segment_value)))
axes[1].set_xticklabels(segment_value.index, rotation=45, ha='right')
axes[1].set_title('Total Revenue by Customer Segment', fontweight='bold', fontsize=14)
axes[1].set_xlabel('Customer Segment', fontweight='bold')
axes[1].set_ylabel('Total Revenue (£)', fontweight='bold')

for i, v in enumerate(segment_value.values):
    axes[1].text(i, v, f'£{v:,.0f}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

print("Segment analysis visualization complete!")

## STEP 6: Calculate Customer Lifetime Value (CLTV)

In [None]:
print("\n[STEP 6] Calculating Customer Lifetime Value...")

cltv_data = df.groupby('Customer ID').agg({
    'Item': 'count',
    'Total': 'sum',
    'Date': lambda x: (pd.to_datetime(x.max()) - pd.to_datetime(x.min())).days
}).reset_index()

cltv_data.columns = ['Customer ID', 'NumPurchases', 'TotalRevenue', 'CustomerLifespan']

# Calculate CLTV components
cltv_data['AvgOrderValue'] = cltv_data['TotalRevenue'] / cltv_data['NumPurchases']
cltv_data['PurchaseFrequency'] = cltv_data['NumPurchases'] / (cltv_data['CustomerLifespan'] + 1) * 365
cltv_data['CustomerLifespanYears'] = (cltv_data['CustomerLifespan'] + 1) / 365
cltv_data['CLTV'] = cltv_data['AvgOrderValue'] * cltv_data['PurchaseFrequency'] * cltv_data['CustomerLifespanYears']

# Remove extreme outliers for visualization
cltv_data_clean = cltv_data[cltv_data['CLTV'] <= cltv_data['CLTV'].quantile(0.99)]

print("\nCustomer Lifetime Value Summary:")
print(cltv_data_clean['CLTV'].describe())
print(f"\nTotal Customer Lifetime Value: £{cltv_data_clean['CLTV'].sum():,.2f}")
print(f"Average CLTV per Customer: £{cltv_data_clean['CLTV'].mean():,.2f}")
print(f"Median CLTV per Customer: £{cltv_data_clean['CLTV'].median():,.2f}")

## STEP 7: Visualize CLTV

In [None]:
print("\n[STEP 7] Creating CLTV visualizations...")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Customer Lifetime Value (CLTV) Analysis', fontsize=16, fontweight='bold')

# CLTV Distribution
axes[0, 0].hist(cltv_data_clean['CLTV'], bins=50, color='#00BFA5', edgecolor='black', alpha=0.7)
axes[0, 0].set_title('CLTV Distribution', fontweight='bold')
axes[0, 0].set_xlabel('Customer Lifetime Value (£)')
axes[0, 0].set_ylabel('Number of Customers')
axes[0, 0].axvline(cltv_data_clean['CLTV'].mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: £{cltv_data_clean["CLTV"].mean():.2f}')
axes[0, 0].axvline(cltv_data_clean['CLTV'].median(), color='blue', linestyle='--', linewidth=2, label=f'Median: £{cltv_data_clean["CLTV"].median():.2f}')
axes[0, 0].legend()

# Average Order Value vs Purchase Frequency
scatter = axes[0, 1].scatter(cltv_data_clean['AvgOrderValue'], cltv_data_clean['PurchaseFrequency'],
                             c=cltv_data_clean['CLTV'], cmap='YlOrRd', alpha=0.6, s=50)
axes[0, 1].set_title('Average Order Value vs Purchase Frequency', fontweight='bold')
axes[0, 1].set_xlabel('Average Order Value (£)')
axes[0, 1].set_ylabel('Purchase Frequency (purchases/year)')
plt.colorbar(scatter, ax=axes[0, 1], label='CLTV (£)')

# CLTV Segments
cltv_quartiles = pd.qcut(cltv_data_clean['CLTV'], q=4, labels=['Low Value', 'Medium Value', 'High Value', 'Top Value'])
segment_counts = cltv_quartiles.value_counts().sort_index()
colors = ['#FFC107', '#FF7043', '#42A5F5', '#00BFA5']
axes[1, 0].bar(segment_counts.index, segment_counts.values, color=colors)
axes[1, 0].set_title('Customer Value Segments', fontweight='bold')
axes[1, 0].set_xlabel('CLTV Segment')
axes[1, 0].set_ylabel('Number of Customers')
for i, v in enumerate(segment_counts.values):
    axes[1, 0].text(i, v, f'{v}', ha='center', va='bottom', fontweight='bold')

# Cumulative CLTV (Pareto Chart)
sorted_cltv = cltv_data_clean.sort_values('CLTV', ascending=False)
sorted_cltv['CumulativePercent'] = (sorted_cltv['CLTV'].cumsum() / sorted_cltv['CLTV'].sum()) * 100
sorted_cltv['CustomerPercent'] = (np.arange(1, len(sorted_cltv) + 1) / len(sorted_cltv)) * 100

axes[1, 1].plot(sorted_cltv['CustomerPercent'], sorted_cltv['CumulativePercent'], color='#00BFA5', linewidth=2)
axes[1, 1].plot([0, 100], [0, 100], 'k--', alpha=0.3, label='Perfect Equality')
axes[1, 1].set_title('Cumulative CLTV Distribution (Pareto)', fontweight='bold')
axes[1, 1].set_xlabel('Cumulative % of Customers')
axes[1, 1].set_ylabel('Cumulative % of Total CLTV')
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].axvline(20, color='red', linestyle=':', alpha=0.5)
axes[1, 1].axhline(80, color='red', linestyle=':', alpha=0.5)
axes[1, 1].legend()

plt.tight_layout()
plt.show()

print("CLTV visualization complete!")

## STEP 8: Elbow Method for Optimal Clusters

In [None]:
print("\n[STEP 8] Finding optimal number of clusters using Elbow Method...")

# Prepare data for clustering
X = rfm[['Recency', 'Frequency', 'Monetary']]
X_transformed = X.copy()
X_transformed['Monetary'] = np.log1p(X_transformed['Monetary'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_transformed)

# Calculate inertias for different K values
inertias = []
K_range = range(2, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X_scaled)
    inertias.append(kmeans.inertia_)

# Plot Elbow graph
plt.figure(figsize=(10, 6))
plt.plot(K_range, inertias, 'bo-', linewidth=2, markersize=8)
plt.title('Elbow Method: Finding Optimal Number of Clusters', fontweight='bold', fontsize=14)
plt.xlabel('Number of Clusters (K)', fontweight='bold')
plt.ylabel('Inertia (Within-Cluster Sum of Squares)', fontweight='bold')
plt.axvline(x=4, color='red', linestyle='--', alpha=0.5, label='Suggested K=4')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

print("\nInertia values for K=2 to K=10:")
for k, inertia in zip(K_range, inertias):
    print(f"K={k}: {inertia:.2f}")

print("\nPercentage decrease in inertia:")
for i in range(1, len(inertias)):
    pct_decrease = ((inertias[i-1] - inertias[i]) / inertias[i-1]) * 100
    print(f"K={i+1} to K={i+2}: {pct_decrease:.2f}%")

## STEP 9: Apply KMeans Clustering

In [None]:
print("\n[STEP 9] Applying KMeans clustering with K=4...")

optimal_k = 4
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
rfm['KMeans_Cluster'] = kmeans.fit_predict(X_scaled)

# Analyze clusters
cluster_summary = rfm.groupby('KMeans_Cluster').agg({
    'Recency': ['mean', 'median'],
    'Frequency': ['mean', 'median'],
    'Monetary': ['mean', 'median'],
    'Customer ID': 'count'
}).round(2)

print("\nKMeans Cluster Analysis:")
print(cluster_summary)

# Name clusters
cluster_profiles = rfm.groupby('KMeans_Cluster')[['Recency', 'Frequency', 'Monetary']].mean()

def name_cluster(cluster_id, profiles):
    profile = profiles.loc[cluster_id]
    if profile['Frequency'] > profiles['Frequency'].quantile(0.75) and profile['Monetary'] > profiles['Monetary'].quantile(0.75):
        return 'VIP Champions'
    elif profile['Recency'] < profiles['Recency'].quantile(0.25) and profile['Monetary'] > profiles['Monetary'].median():
        return 'Recent Big Spenders'
    elif profile['Frequency'] <= profiles['Frequency'].quantile(0.25) and profile['Monetary'] <= profiles['Monetary'].quantile(0.25):
        return 'Low Engagement'
    else:
        return 'Regular Customers'

rfm['Cluster_Name'] = rfm['KMeans_Cluster'].apply(lambda x: name_cluster(x, cluster_profiles))

print("\nCluster Names and Distribution:")
print(rfm.groupby(['KMeans_Cluster', 'Cluster_Name']).size())

## STEP 10: Visualize KMeans Clusters

In [None]:
print("\n[STEP 10] Creating KMeans cluster visualizations...")

fig = plt.figure(figsize=(16, 6))

# 3D scatter plot
ax1 = fig.add_subplot(131, projection='3d')
colors = ['#FF7043', '#00BFA5', '#FFC107', '#42A5F5']
for i in range(optimal_k):
    cluster_data = rfm[rfm['KMeans_Cluster'] == i]
    ax1.scatter(cluster_data['Recency'],
                cluster_data['Frequency'],
                np.log1p(cluster_data['Monetary']),
                c=colors[i],
                label=f'Cluster {i}',
                alpha=0.6,
                s=30)

ax1.set_xlabel('Recency (days)', fontweight='bold')
ax1.set_ylabel('Frequency', fontweight='bold')
ax1.set_zlabel('Log(Monetary)', fontweight='bold')
ax1.set_title('KMeans Clustering (3D View)', fontweight='bold', fontsize=12)
ax1.legend()

# Cluster distribution
ax2 = fig.add_subplot(132)
cluster_counts = rfm['KMeans_Cluster'].value_counts().sort_index()
bars = ax2.bar(range(len(cluster_counts)), cluster_counts.values, color=colors)
ax2.set_xticks(range(len(cluster_counts)))
ax2.set_xticklabels([f'Cluster {i}' for i in range(optimal_k)])
ax2.set_title('Customer Distribution by Cluster', fontweight='bold', fontsize=12)
ax2.set_ylabel('Number of Customers', fontweight='bold')
for i, v in enumerate(cluster_counts.values):
    ax2.text(i, v, f'{v}', ha='center', va='bottom', fontweight='bold')

# Cluster value contribution
ax3 = fig.add_subplot(133)
cluster_revenue = rfm.groupby('KMeans_Cluster')['Monetary'].sum().sort_index()
bars = ax3.bar(range(len(cluster_revenue)), cluster_revenue.values, color=colors)
ax3.set_xticks(range(len(cluster_revenue)))
ax3.set_xticklabels([f'Cluster {i}' for i in range(optimal_k)])
ax3.set_title('Total Revenue by Cluster', fontweight='bold', fontsize=12)
ax3.set_ylabel('Total Revenue (£)', fontweight='bold')
for i, v in enumerate(cluster_revenue.values):
    ax3.text(i, v, f'£{v/1000:.0f}K', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

print("KMeans clustering visualization complete!")

## STEP 11: Comparison Analysis - RFM vs KMeans

In [None]:
print("\n[STEP 11] Creating RFM vs KMeans comparison...")

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('RFM Segments vs KMeans Clusters: Comparison Analysis', fontsize=16, fontweight='bold')

# Cross-tabulation heatmap
crosstab = pd.crosstab(rfm['Customer_Segment'], rfm['KMeans_Cluster'])
sns.heatmap(crosstab, annot=True, fmt='d', cmap='YlOrRd', ax=axes[0, 0], cbar_kws={'label': 'Customer Count'})
axes[0, 0].set_title('RFM Segments vs KMeans Clusters', fontweight='bold', fontsize=12)
axes[0, 0].set_xlabel('KMeans Cluster', fontweight='bold')
axes[0, 0].set_ylabel('RFM Segment', fontweight='bold')

# Average monetary value comparison
segment_monetary = rfm.groupby('Customer_Segment')['Monetary'].mean().sort_values(ascending=False)
x1 = np.arange(len(segment_monetary))
axes[0, 1].barh(x1, segment_monetary.values, color='#00BFA5', alpha=0.7)
axes[0, 1].set_yticks(x1)
axes[0, 1].set_yticklabels(segment_monetary.index)
axes[0, 1].set_title('Average Monetary Value by RFM Segment', fontweight='bold', fontsize=12)
axes[0, 1].set_xlabel('Average Monetary Value (£)', fontweight='bold')

# Recency vs Monetary colored by cluster
colors_cluster = ['#FF7043', '#00BFA5', '#FFC107', '#42A5F5']
for i in range(4):
    cluster_data = rfm[rfm['KMeans_Cluster'] == i]
    axes[1, 0].scatter(cluster_data['Recency'],
                       np.log1p(cluster_data['Monetary']),
                       c=colors_cluster[i],
                       label=f'Cluster {i}',
                       alpha=0.6,
                       s=30)

axes[1, 0].set_title('Recency vs Monetary (KMeans Clusters)', fontweight='bold', fontsize=12)
axes[1, 0].set_xlabel('Recency (days)', fontweight='bold')
axes[1, 0].set_ylabel('Log(Monetary)', fontweight='bold')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Distribution comparison
segment_top3 = rfm['Customer_Segment'].value_counts().head(3).index
colors_segments = ['#FF7043', '#00BFA5', '#FFC107']
for idx, segment in enumerate(segment_top3):
    segment_data = rfm[rfm['Customer_Segment'] == segment]['Monetary']
    axes[1, 1].hist(np.log1p(segment_data), bins=30, alpha=0.5,
                    color=colors_segments[idx], label=segment)

axes[1, 1].set_title('Monetary Distribution: Top 3 RFM Segments', fontweight='bold', fontsize=12)
axes[1, 1].set_xlabel('Log(Monetary)', fontweight='bold')
axes[1, 1].set_ylabel('Frequency', fontweight='bold')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Comparison visualization complete!")

## Final Summary and Business Insights

In [None]:
print("\n" + "="*60)
print("KEY INSIGHTS SUMMARY")
print("="*60)

print("\nRFM SEGMENT INSIGHTS:")
rfm_segment_summary = rfm.groupby('Customer_Segment').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': ['mean', 'sum'],
    'Customer ID': 'count'
}).round(2)
print(rfm_segment_summary)

print("\n\nKMEANS CLUSTER INSIGHTS:")
kmeans_cluster_summary = rfm.groupby('KMeans_Cluster').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': ['mean', 'sum'],
    'Customer ID': 'count'
}).round(2)
print(kmeans_cluster_summary)

print("\n\nBUSINESS RECOMMENDATIONS:")
print("\n1. CHAMPIONS (RFM) / VIP CHAMPIONS (Cluster 3):")
print("   - These are your highest-value customers")
print("   - Action: Provide VIP treatment, exclusive offers, loyalty rewards")
print("   - Predicted Value: High lifetime value, focus on retention")

print("\n2. AT RISK / LOW ENGAGEMENT (Cluster 1):")
print("   - Haven't purchased recently, risk of churn")
print("   - Action: Win-back campaigns, special discounts, re-engagement emails")
print("   - Predicted Value: Medium, worth recovery effort")

print("\n3. LOYAL CUSTOMERS / REGULAR CUSTOMERS (Cluster 0, 2):")
print("   - Consistent purchasers, moderate spend")
print("   - Action: Upsell/cross-sell campaigns, increase purchase frequency")
print("   - Predicted Value: Stable revenue base")

print("\n4. LOST / CANT LOSE THEM:")
print("   - Previously valuable but now inactive")
print("   - Action: Aggressive win-back, understand why they left")
print("   - Predicted Value: High potential if recovered")

print("\n" + "="*60)
print("ANALYSIS COMPLETE!")
print("="*60)