In [None]:
// K-means Algorithm

# --------------------------------------------------
# K-Means Clustering on Mall Customers Dataset
# --------------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans

# Step 1: Load the Dataset
from google.colab import files
uploaded = files.upload()

df = pd.read_csv(next(iter(uploaded)))
print("Dataset Loaded Successfully\n")
print(df.head())

# Step 2: Handle Missing Values (if any)
df = df.dropna()
print("\nMissing values removed (if present).")

# Step 3: Select Required Features
data = df[['Annual Income (k$)', 'Spending Score (1-100)']]
print("\nSelected Relevant Features:\n", data.head())

# Step 4: Apply MinMax Scaling
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Step 5: Apply K-Means (k = 5 is common for this dataset)
kmeans = KMeans(n_clusters=5, random_state=42)
df['Cluster'] = kmeans.fit_predict(scaled_data)

print("\nCluster Centers:")
print(kmeans.cluster_centers_)

print("\nDataset with Cluster Labels:")
print(df.head())

# Step 6: Visualize Clusters
plt.figure(figsize=(8,6))
plt.scatter(data['Annual Income (k$)'], data['Spending Score (1-100)'],
            c=df['Cluster'], cmap='rainbow', s=100)

# Plot Centroids
centers = scaler.inverse_transform(kmeans.cluster_centers_)
plt.scatter(centers[:,0], centers[:,1], c='black', s=200, marker='X', label="Centroids")

plt.title("Customer Segmentation using K-Means")
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.legend()
plt.grid(True)
plt.show()