In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv("Global_Superstore2.csv", encoding="latin1")
print(df.head())


In [None]:
df = df[['Order Date', 'Customer ID', 'Sales']]

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'])

In [None]:
latest_date = df['Order Date'].max() + pd.Timedelta(days=1)

In [None]:
RFM = df.groupby('Customer ID').agg({
    'Order Date': lambda x: (latest_date - x.max()).days,   # Recency
    'Customer ID': 'count',                                # Frequency
    'Sales': 'sum'                                         # Monetary
})

In [None]:
RFM.rename(columns={'Order Date': 'Recency',
                    'Customer ID': 'Frequency',
                    'Sales': 'Monetary'}, inplace=True)

In [None]:
print(RFM.head())

In [None]:
# 5. Standardize Data
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(RFM)

In [None]:
# 6. Find Optimal Number of Clusters (Elbow Method)
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(rfm_scaled)
    wcss.append(kmeans.inertia_)

In [None]:
plt.plot(range(1, 11), wcss, marker='o')
plt.title("Elbow Method")
plt.xlabel("Number of clusters")
plt.ylabel("WCSS")
plt.show()

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)  # choose k based on elbow graph
clusters = kmeans.fit_predict(rfm_scaled)

In [None]:

RFM['Cluster'] = clusters

In [None]:
# 8. Analyze Clusters
cluster_summary = RFM.groupby('Cluster').mean()
print(cluster_summary)

In [None]:
# 9. Visualization
plt.figure(figsize=(10,6))
sns.scatterplot(x='Recency', y='Monetary', hue='Cluster', data=RFM, palette='Set2')
plt.title("Customer Segments")
plt.show()