<a href="https://colab.research.google.com/github/chahatgarg884/Clustering_ChahatGarg_102203557/blob/main/Clustering_ChahatGarg_102203557.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering, MeanShift
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings("ignore")

# Load dataset
iris = load_iris()
X_original = pd.DataFrame(iris.data, columns=iris.feature_names)

# Preprocessing functions
def no_processing(X): return X.copy()
def normalize(X): return MinMaxScaler().fit_transform(X)
def transform(X): return np.sqrt(np.abs(X))
def apply_pca(X): return PCA(n_components=2).fit_transform(X)
def transform_then_normalize(X): return normalize(transform(X))
def t_n_pca(X): return apply_pca(transform_then_normalize(X))

preprocessing_methods = {
    "No Processing": no_processing,
    "Normalization": normalize,
    "Transform": transform,
    "PCA": lambda X: apply_pca(X),
    "T+N": transform_then_normalize,
    "T+N+PCA": t_n_pca,
}

cluster_range = [3, 4, 5]
results = []

# ================================
# Evaluate each algorithm
# ================================
for method_name, preprocess in preprocessing_methods.items():
    try:
        X_proc = preprocess(X_original)
    except Exception as e:
        print(f"[ERROR] Preprocessing failed for {method_name}: {e}")
        continue

    # --- KMeans ---
    for k in cluster_range:
        try:
            model = KMeans(n_clusters=k, random_state=42, n_init='auto').fit(X_proc)
            labels = model.labels_
            sil = silhouette_score(X_proc, labels)
            cal = calinski_harabasz_score(X_proc, labels)
            db = davies_bouldin_score(X_proc, labels)
            results.append({
                "Algorithm": "KMeans",
                "Method": method_name,
                "Clusters": k,
                "Silhouette": round(sil, 2),
                "Calinski-Harabasz": round(cal),
                "Davies-Bouldin": round(db, 2)
            })
        except Exception as e:
            print(f"KMeans failed for {method_name} with k={k}: {e}")

    # --- Hierarchical (Agglomerative) ---
    for k in cluster_range:
        try:
            model = AgglomerativeClustering(n_clusters=k).fit(X_proc)
            labels = model.labels_
            sil = silhouette_score(X_proc, labels)
            cal = calinski_harabasz_score(X_proc, labels)
            db = davies_bouldin_score(X_proc, labels)
            results.append({
                "Algorithm": "Hierarchical",
                "Method": method_name,
                "Clusters": k,
                "Silhouette": round(sil, 2),
                "Calinski-Harabasz": round(cal),
                "Davies-Bouldin": round(db, 2)
            })
        except Exception as e:
            print(f"Hierarchical failed for {method_name} with k={k}: {e}")

    # --- Mean Shift (no need to specify k) ---
    try:
        model = MeanShift().fit(X_proc)
        labels = model.labels_
        sil = silhouette_score(X_proc, labels)
        cal = calinski_harabasz_score(X_proc, labels)
        db = davies_bouldin_score(X_proc, labels)
        results.append({
            "Algorithm": "MeanShift",
            "Method": method_name,
            "Clusters": len(np.unique(labels)),
            "Silhouette": round(sil, 2),
            "Calinski-Harabasz": round(cal),
            "Davies-Bouldin": round(db, 2)
        })
    except Exception as e:
        print(f"MeanShift failed for {method_name}: {e}")

# Final DataFrame
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Algorithm,Method,Clusters,Silhouette,Calinski-Harabasz,Davies-Bouldin
0,KMeans,No Processing,3,0.55,562,0.67
1,KMeans,No Processing,4,0.5,530,0.75
2,KMeans,No Processing,5,0.49,495,0.82
3,Hierarchical,No Processing,3,0.55,558,0.66
4,Hierarchical,No Processing,4,0.49,515,0.8
5,Hierarchical,No Processing,5,0.48,488,0.82
6,MeanShift,No Processing,2,0.69,510,0.39
7,KMeans,Normalization,3,0.48,351,0.79
8,KMeans,Normalization,4,0.44,314,0.91
9,KMeans,Normalization,5,0.42,263,0.99
