<a href="https://colab.research.google.com/github/chahatgarg884/Clustering_ChahatGarg_102203557/blob/main/Clustering_ChahatGarg_102203557.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import pandas as pd
import numpy as np

# Load dataset
iris = load_iris()
X_original = pd.DataFrame(iris.data, columns=iris.feature_names)

# Preprocessing functions
def no_processing(X): return X.copy()
def normalize(X): return MinMaxScaler().fit_transform(X)
def transform(X): return np.sqrt(np.abs(X))
def apply_pca(X): return PCA(n_components=2).fit_transform(X)
def transform_then_normalize(X): return normalize(transform(X))
def t_n_pca(X): return apply_pca(transform_then_normalize(X))

preprocessing_methods = {
    "No Processing": no_processing,
    "Normalization": normalize,
    "Transform": transform,
    "PCA": lambda X: apply_pca(X),
    "T+N": transform_then_normalize,
    "T+N+PCA": t_n_pca,
}

cluster_range = [3, 4, 5]
results_kmeans = []

for method_name, preprocess in preprocessing_methods.items():
    try:
        X_proc = preprocess(X_original)
    except Exception as e:
        print(f"[ERROR] Preprocessing failed for {method_name}: {e}")
        continue

    for n_clusters in cluster_range:
        try:
            model = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto').fit(X_proc)
            labels = model.labels_
            sil = silhouette_score(X_proc, labels)
            cal = calinski_harabasz_score(X_proc, labels)
            db = davies_bouldin_score(X_proc, labels)
            results_kmeans.append({
                "Method": method_name,
                "Clusters": n_clusters,
                "Silhouette": round(sil, 2),
                "Calinski-Harabasz": round(cal),
                "Davies-Bouldin": round(db, 2)
            })
        except Exception as e:
            print(f"[ERROR] KMeans failed for {method_name} with k={n_clusters}: {e}")
            results_kmeans.append({
                "Method": method_name,
                "Clusters": n_clusters,
                "Silhouette": "NA",
                "Calinski-Harabasz": "NA",
                "Davies-Bouldin": "NA"
            })

# Convert to DataFrame
results_df = pd.DataFrame(results_kmeans)
results_df

Unnamed: 0,Method,Clusters,Silhouette,Calinski-Harabasz,Davies-Bouldin
0,No Processing,3,0.55,562,0.67
1,No Processing,4,0.5,530,0.75
2,No Processing,5,0.49,495,0.82
3,Normalization,3,0.48,351,0.79
4,Normalization,4,0.44,314,0.91
5,Normalization,5,0.42,263,0.99
6,Transform,3,0.56,434,0.92
7,Transform,4,0.39,604,1.03
8,Transform,5,0.33,614,1.07
9,PCA,3,0.6,694,0.56
