In [91]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import SpectralClustering
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

In [92]:
wine = load_wine()
X=wine.data

## KMeans

#### N = 3

In [93]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=3, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.571138 ┃                  561.816  ┃               0.534243 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.299708 ┃                   83.2643 ┃               1.32105  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.572255 ┃                  562.519  ┃               0.532969 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [94]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=4, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.563108 ┃                  702.973  ┃               0.54676  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.244663 ┃                   65.5891 ┃               1.67557  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.560205 ┃                  704.218  ┃               0.544438 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [95]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=5, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.548999 ┃                  787.047  ┃               0.545047 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.25716  ┃                   53.2933 ┃               1.77284  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.48879  ┃                  632.915  ┃               0.54718  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

## Hierarchial Clustering

#### N = 3

In [96]:
results_data = []

preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():

    clustering = AgglomerativeClustering(n_clusters=3)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.56448  ┃                  552.852  ┃               0.535734 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.294829 ┃                   81.3276 ┃               1.31831  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.565581 ┃                  553.532  ┃               0.534284 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [97]:
results_data = []

# Hierarchical Clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform hierarchical clustering
    clustering = AgglomerativeClustering(n_clusters=4)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.560673 ┃                  670.626  ┃               0.553574 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.248703 ┃                   63.5149 ┃               1.75287  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.56192  ┃                  672.023  ┃               0.55233  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [98]:
results_data = []

# Hierarchical Clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform hierarchical clustering
    clustering = AgglomerativeClustering(n_clusters=5)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.507484 ┃                   684.223 ┃               0.551315 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.204154 ┃                    52.277 ┃               1.93678  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.500913 ┃                   685.285 ┃               0.556345 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

## Spectral Clustering

#### N = 3

In [99]:
results_data = []

# Spectral clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform Spectral clustering
    clustering = SpectralClustering(n_clusters=3)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.280348 ┃                   3.3691  ┃               0.447723 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.299876 ┃                  82.8284  ┃               1.31162  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃          -0.067969 ┃                   1.98697 ┃               0.84763  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [100]:
results_data = []

# Spectral clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform Spectral clustering
    clustering = SpectralClustering(n_clusters=4)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.280348 ┃                   3.3691  ┃               0.447723 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.24971  ┃                  63.8306  ┃               1.71285  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃          -0.067969 ┃                   1.98697 ┃               0.84763  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [101]:
results_data = []

# Spectral clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform Spectral clustering
    clustering = SpectralClustering(n_clusters=5)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.280348 ┃                   3.3691  ┃               0.447723 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.226397 ┃                  52.4825  ┃               1.69517  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃          -0.067969 ┃                   1.98697 ┃               0.84763  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor