<a href="https://colab.research.google.com/github/chirag21120/Clustering/blob/main/Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering, MeanShift
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

In [3]:
wine = load_wine()
X = wine.data

In [35]:
preprocessing_techniques = {
    'No Preprocessing': lambda x: x,
    'Normalization': MinMaxScaler().fit_transform,
    'Standardization': StandardScaler().fit_transform,
    'PCA': PCA(n_components=2).fit_transform,
    'Normalization + PCA': lambda x: PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(x)),
    'Standardization + PCA': lambda x: PCA(n_components=2).fit_transform(StandardScaler().fit_transform(x)),
    'Normalization + Standardization + PCA': lambda x: PCA(n_components=2).fit_transform(StandardScaler().fit_transform(MinMaxScaler().fit_transform(x)))
}


In [16]:
clustering_algorithms = {
    'KMeans': KMeans(n_clusters=3),
    'Hierarchical': AgglomerativeClustering(n_clusters=3),
    'MeanShift': MeanShift()
}


In [17]:
evaluation_metrics = {
    'Silhouette Score': silhouette_score,
    'Davies-Bouldin Index': davies_bouldin_score,
    'Calinski-Harabasz Index': calinski_harabasz_score
}

In [21]:
import os
if not os.path.exists('results'):
    os.makedirs('results')

In [36]:
for algo_name, algorithm in clustering_algorithms.items():
    algo_results = {}
    for preproc_name, preproc_func in preprocessing_techniques.items():
        X_preprocessed = preproc_func(X)
        algorithm.fit(X_preprocessed)

        # Check if the number of unique labels is valid
        unique_labels = set(algorithm.labels_)
        if len(unique_labels) > 1 and -1 not in unique_labels:
            labels = algorithm.labels_

            # Compute evaluation metrics
            metrics = {}
            for metric_name, metric_func in evaluation_metrics.items():
                score = metric_func(X_preprocessed, labels)
                metrics[metric_name] = score

            algo_results[preproc_name] = metrics

    # Convert results to DataFrame for easy visualization
    results_df = pd.DataFrame(algo_results).stack().unstack(level=1)

    # Save results to a CSV file
    results_df.to_csv(f"results/{algo_name}_results.csv")


