In [None]:
# Import libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

In [None]:
# Merge datasets to create customer profiles with transaction history
customer_transactions = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

In [None]:
# Aggregate data to create customer-level profiles
customer_profiles = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',       # Total revenue generated by the customer
    'Quantity': 'sum',         # Total quantity purchased by the customer
    'TransactionID': 'count',  # Number of transactions made
    'ProductID': 'nunique',    # Number of unique products purchased
}).reset_index()

In [None]:
# Prepare data for clustering (exclude CustomerID for clustering)
features = customer_profiles.drop(columns=['CustomerID'])

In [None]:
# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

In [None]:
# Apply KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)  # Change n_clusters if needed
clusters = kmeans.fit_predict(normalized_features)

In [None]:
# Add cluster labels to customer profiles
customer_profiles['Cluster'] = clusters

In [None]:
# Calculate Davies-Bouldin Index
db_index = davies_bouldin_score(normalized_features, clusters)
print(f"Davies-Bouldin Index: {db_index}")

In [None]:
# Reduce dimensionality for visualization using PCA
pca = PCA(n_components=2)
pca_features = pca.fit_transform(normalized_features)

In [None]:
# Create a DataFrame for visualization
viz_data = pd.DataFrame(pca_features, columns=['PCA1', 'PCA2'])
viz_data['Cluster'] = customer_profiles['Cluster']

In [None]:
# Set plot style
sns.set(style="whitegrid")

In [None]:
# Plot the clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='PCA1', 
    y='PCA2', 
    hue='Cluster', 
    palette='tab10', 
    data=viz_data, 
    s=100, 
    alpha=0.8
)

In [None]:
# Add titles and labels
plt.title('Customer Segmentation (PCA Visualization)', fontsize=16)
plt.xlabel('Principal Component 1', fontsize=12)
plt.ylabel('Principal Component 2', fontsize=12)
plt.legend(title='Cluster', loc='best', fontsize=10)
plt.show()