In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

In [None]:
# Generate Synthetic Customer Data
np.random.seed(42)
n_samples = 300

In [None]:
# Simulate customer features
age = np.random.normal(40, 15, n_samples)
income = np.random.normal(60000, 20000, n_samples)
spending_score = np.random.normal(50, 20, n_samples)

In [None]:
# Create DataFrame
customers = pd.DataFrame({
    'Age': age,
    'Annual_Income': income,
    'Spending_Score': spending_score
})

In [None]:
# Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(customers)

In [None]:
# Dimensionality Reduction for Visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# Multiple Clustering Techniques
def apply_kmeans(X, n_clusters=3):
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(X)
    return kmeans_labels, kmeans

In [None]:
def apply_dbscan(X, eps=0.5, min_samples=5):
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    dbscan_labels = dbscan.fit_predict(X)
    return dbscan_labels, dbscan

In [None]:
# Visualization Function
def plot_clusters(X, labels, title):
    plt.figure(figsize=(10, 6))
    scatter = plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
    plt.title(title)
    plt.colorbar(scatter)
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.show()

In [None]:
# Apply KMeans
kmeans_labels, kmeans_model = apply_kmeans(X_scaled)
plot_clusters(X_pca, kmeans_labels, 'K-Means Clustering')

In [None]:
# Evaluate Clustering Performance
kmeans_silhouette = silhouette_score(X_scaled, kmeans_labels)
print(f"K-Means Silhouette Score: {kmeans_silhouette:.4f}")

In [None]:
# Cluster Statistics
def cluster_statistics(data, labels, cluster_names):
    clustered_data = data.copy()
    clustered_data['Cluster'] = labels
    
    print("\nCluster Statistics:")
    for cluster in cluster_names:
        cluster_subset = clustered_data[clustered_data['Cluster'] == cluster]
        print(f"\nCluster {cluster} Summary:")
        print(cluster_subset.describe())

In [None]:
cluster_statistics(customers, kmeans_labels, range(3))

In [None]:
# Print Variance Explained by PCA
print("\nVariance Explained:")
print(f"First Component: {pca.explained_variance_ratio_[0]*100:.2f}%")
print(f"Second Component: {pca.explained_variance_ratio_[1]*100:.2f}%")
print(f"Total: {sum(pca.explained_variance_ratio_)*100:.2f}%")