In [None]:

import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Preparing data for clustering
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spend per customer
    'TransactionID': 'count'  # Number of transactions per customer
}).rename(columns={'TransactionID': 'NumTransactions'}).reset_index()

# Merge with customer profiles
customer_profiles = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)

# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['TotalValue', 'NumTransactions']
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

# Clustering into 5 clusters
kmeans = KMeans(n_clusters=5, random_state=42)
customer_profiles['Cluster'] = kmeans.fit_predict(customer_profiles[numerical_features])

# Visualize clusters using TotalValue and NumTransactions
plt.figure(figsize=(10, 7))
sns.scatterplot(
    x=customer_profiles['TotalValue'], 
    y=customer_profiles['NumTransactions'], 
    hue=customer_profiles['Cluster'], 
    palette='viridis',
    s=100
)
plt.title('Customer Clusters (5 Clusters)')
plt.xlabel('TotalValue (Standardized)')
plt.ylabel('NumTransactions (Standardized)')
plt.legend(title='Cluster')
plt.grid(True)
plt.show()

# Display clustering results
print("Cluster Sizes:")
print(customer_profiles['Cluster'].value_counts())

# Display cluster centers
print("Cluster Centers:")
print(kmeans.cluster_centers_)
