In [None]:
# Import libraries
import sys
sys.path.append('./src')

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

from clustering import CustomerSegmentation
from rfm_analysis import RFMAnalyzer

print("âœ… All modules imported successfully!")

## Step 1: Generate Sample Customer Transaction Data

In [None]:
# Generate customer transaction data
np.random.seed(42)
n_customers = 500

# Reference date for recency calculation
reference_date = datetime(2024, 1, 1)

# Generate customer IDs
customer_ids = [f"CUST{i:04d}" for i in range(n_customers)]

# Generate purchase patterns
days_since_purchase = np.random.exponential(30, n_customers).astype(int)
purchase_frequency = np.random.poisson(5, n_customers) + 1
total_spend = np.random.gamma(2, 500, n_customers)

# Create DataFrame
df = pd.DataFrame({
    'customer_id': customer_ids,
    'last_purchase_date': [reference_date - timedelta(days=int(d)) for d in days_since_purchase],
    'purchase_frequency': purchase_frequency,
    'total_spend': total_spend
})

print(f"Dataset: {len(df)} customers")
print(f"Avg purchases per customer: {df['purchase_frequency'].mean():.1f}")
print(f"Avg customer value: ${df['total_spend'].mean():.2f}")

## Step 2: RFM Analysis

In [None]:
# Perform RFM analysis
rfm_analyzer = RFMAnalyzer(n_bins=5)

rfm_results = rfm_analyzer.calculate_rfm(
    df,
    customer_id_col='customer_id',
    recency_col='last_purchase_date',
    frequency_col='purchase_frequency',
    monetary_col='total_spend',
    reference_date=reference_date
)

# Show segment distribution
segment_counts = rfm_results['segment'].value_counts()
print("\nðŸ“Š Customer Segments:")
for segment, count in segment_counts.items():
    print(f"{segment:20s}: {count:3d} customers ({count/len(rfm_results)*100:.1f}%)")

# Get strategies
strategies = rfm_analyzer.get_segment_strategies()
print("\nðŸ’¡ Champions Strategy:")
print(f"  {strategies['Champions']['strategy']}")
print(f"  Expected ROI: {strategies['Champions']['expected_roi']}")

## Step 3: K-Means Clustering

In [None]:
# Prepare features for clustering
X = rfm_results[['recency', 'frequency', 'monetary']].values

# Find optimal clusters
segmenter = CustomerSegmentation(method='kmeans')
optimal_k = segmenter.find_optimal_clusters(X, method='silhouette', max_clusters=8)

print(f"\nðŸŽ¯ Optimal number of clusters: {optimal_k}")

# Fit with optimal clusters
segmenter = CustomerSegmentation(method='kmeans', n_clusters=optimal_k, random_state=42)
clusters = segmenter.fit_predict(X)

# Profile segments
profiles = segmenter.profile_segments(rfm_results[['recency', 'frequency', 'monetary']])
print("\nðŸ“Š Cluster Profiles:")
print(profiles)