In [1]:
# train_model.py
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import joblib

# Step 1: Generate Synthetic Customer Data
# Simulate 4 customer segments with income and spending
X, _ = make_blobs(n_samples=200, centers=4, cluster_std=1.0, random_state=42)
df = pd.DataFrame(X, columns=["Annual Income (k$)", "Spending Score"])

# Step 2: Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# Step 3: Apply KMeans Clustering
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(X_scaled)

# Step 4: PCA for dimensionality reduction (for visualization)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Optional: Save PCA results for visualization (if needed later)
df["PCA1"] = X_pca[:, 0]
df["PCA2"] = X_pca[:, 1]
df["Segment"] = kmeans.labels_

# Step 5: Save model components
joblib.dump({
    "scaler": scaler,
    "kmeans": kmeans,
    "pca": pca
}, "customer_segmentation_model.pkl")

print("✅ Model saved as 'customer_segmentation_model.pkl'")


✅ Model saved as 'customer_segmentation_model.pkl'
