In [None]:
import pandas as pd
dataset = pd.read_csv('./Dataset.csv')
dataset.drop(columns=['Unnamed: 0'], inplace=True)
dataset

In [None]:
import warnings
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.model_selection import KFold
import numpy as np

param_grid = {
    'KMeans': [{'n_clusters': 2, 'random_state': 42}, 
        {'n_clusters': 3, 'random_state': 42}
        {'n_clusters': 4, 'random_state': 42}, 
        {'n_clusters': 5, 'random_state': 42}
        {'n_clusters': 6, 'random_state': 42}],
    'AgglomerativeClustering': [{'n_clusters': 2, 'linkage': 'ward'}, 
        {'n_clusters': 3, 'linkage': 'ward'},
        {'n_clusters': 4, 'linkage': 'ward'},
        {'n_clusters': 6, 'linkage': 'ward'},
        {'n_clusters': 2, 'linkage': 'complete'},
        {'n_clusters': 4, 'linkage': 'complete'},
        {'n_clusters': 6, 'linkage': 'complete'},
        {'n_clusters': 2, 'linkage': 'average'},
        {'n_clusters': 4, 'linkage': 'average'},
        {'n_clusters': 6, 'linkage': 'average'},
        {'n_clusters': 2, 'linkage': 'single'},
        {'n_clusters': 4, 'linkage': 'single'},
        {'n_clusters': 6, 'linkage': 'single'}]
    'DBSCAN': [{'eps': 0.1, 'min_samples': 2}, {'eps': 0.1, 'min_samples'},
        {'eps': 0.5, 'min_samples': 2}, {'eps': 0.5, 'min_samples'},
        {'eps': 1.0, 'min_samples': 2}, {'eps': 1.0, 'min_samples'}]
}
best_scores = {}
best_models = {}
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for method, param_values in param_grid.items():
    print(f"Running {method}...")
    best_score = float('-inf')
    for params in param_values:
        fold_silhouette_scores = []
        for train_index, _ in kf.split(dataset):
            X_train = dataset.iloc[train_index]
            if method == 'KMeans':
                model = KMeans(**params)
            elif method == 'AgglomerativeClustering':
                model = AgglomerativeClustering(**params)
            elif method == 'DBSCAN':
                model = DBSCAN(**params)
            model.fit(X_train)
            train_clusters = model.labels_
            if len(np.unique(train_clusters)) == 1:
                continue
            silhouette_avg = silhouette_score(X_train, train_clusters
            fold_silhouette_scores.append(silhouette_avg)
        if not fold_silhouette_scores:
            continue
        avg_silhouette = sum(fold_silhouette_scores) / len(fold_silhouette_scores)
        if avg_silhouette > best_score:
            best_score = avg_silhouette
            best_models[method] = model
            best_scores[method] = avg_silhouette
for method, score in best_scores.items():
    print(f"Best {method} score: {score}")
    print(f"Best {method} parameters: {best_models[method].get_params})

In [None]:
import pandas as pd
from sklearn.cluster import MeanShift, SpectralClustering
from sklearn.metrics import davies_bouldin_score

param_grid = {
    'MeanShift': [{'bandwidth': 0.1}, {'bandwidth': 0.5}, {'bandwidth': 1.0}
    'SpectralClustering': [{'n_clusters': 2, 'random_state': 42},
        {'n_clusters': 3, 'random_state': 42},
        {'n_clusters': 4, 'random_state': 42},
        {'n_clusters': 5, 'random_state': 42},
        {'n_clusters': 6, 'random_state': 42}]
    }

kf = KFold(n_splits=5, shuffle=True, random_state=42)

best_scores = {}
best_models = {}

for method, param_values in param_grid.items():
    print(f"Running {method}...")
    best_score = float('-inf')
    best_model = None
    for params in param_values:
        fold_scores = []
        for train_index, _ in kf.split(dataset):
            X_train = dataset.iloc[train_index]
            if method == 'MeanShift':
                model = MeanShift(**params)
            elif method == 'SpectralClustering':
                model = SpectralClustering(**params)
            model.fit(X_train)
            train_clusters = model.labels_
            if method == 'MeanShift':
                score = len(set(model.labels_))
            elif method == 'SpectralClustering':
                score = davies_bouldin_score(X_train, train_clusters)
            fold_scores.append(score)
        avg_score = sum(fold_scores) / len(fold_scores)
        if avg_score > best_score:
            best_score = avg_score
            best_model = model
    best_scores[method] = best_score
    best_models[method] = best_model
for method, score in best_scores.items():
    print(f"Best {method} score: {score}")
    print(f"Best {method} parameters: {best_models[method].get_params})