In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Step 1: Load dataset
data = pd.read_csv("Cities_r2.csv")

# Step 2: Handle non-numeric data (if needed)
if 'name_of_city' in data.columns:
    le = LabelEncoder()
    data['name_of_city'] = le.fit_transform(data['name_of_city'])

# Step 3: Select features
X = data[['effective_literacy_rate_total', 'population_total']]

# Step 4: Scale data (important for K-Means)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Apply K-Means
kmeans = KMeans(n_clusters=3, random_state=42)
data['Cluster'] = kmeans.fit_predict(X_scaled)

# Step 6: Visualize
plt.figure(figsize=(8,5))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=data['Cluster'], cmap='viridis')
plt.xlabel('Effective Literacy Rate (Scaled)')
plt.ylabel('Population (Scaled)')
plt.title('K-Means Clustering of Cities based on Literacy and Population')
plt.show()

print(data.head())
