In [None]:
# K-Means , dbscan , denclue , Knn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from umap import UMAP
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, davies_bouldin_score, normalized_mutual_info_score
from sklearn.metrics.pairwise import cosine_distances
from sklearn.neighbors import KNeighborsClassifier
import math

# Chargement et normalisation des données
df = pd.read_csv('/content/drive/MyDrive/project_dm/Cardiovascular_Disease_Dataset.csv')  # Modifier le chemin si nécessaire
scaler = StandardScaler()
data = scaler.fit_transform(df)

#  Réduction de la dimension avec UMAP pour une meilleure séparation
umap = UMAP(n_components=5, random_state=42)
data_umap = umap.fit_transform(data)

#  Optimisation du nombre de clusters `k` pour K-Means
best_k = 5
kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
kmeans_labels = kmeans.fit_predict(data_umap)

#  Optimisation de `eps` pour DBSCAN
best_eps = 0.5
best_silhouette = -1
for eps in [0.2, 0.5, 1.0, 1.5]:
    dbscan = DBSCAN(eps=eps, min_samples=5).fit(data_umap)
    labels = dbscan.labels_
    score = silhouette_score(data_umap, labels)
    print(f"📌 DBSCAN avec eps={eps} → Silhouette Score: {score:.4f}")
    if score > best_silhouette:
        best_silhouette = score
        best_eps = eps

print(f"✅ Meilleur `eps` pour DBSCAN : {best_eps}")

#  Appliquer DBSCAN avec `eps` optimal
dbscan = DBSCAN(eps=best_eps, min_samples=5)
dbscan_labels = dbscan.fit_predict(data_umap)

#  Fusionner K-Means et DBSCAN (Correction des Outliers)
final_labels = np.copy(kmeans_labels)

for i in range(len(dbscan_labels)):
    if dbscan_labels[i] != -1:
        final_labels[i] = dbscan_labels[i]

df['Cluster_KMeans_DBSCAN'] = final_labels

#  Appliquer DENCLUE (Optimisé par Cuckoo Search)
class DenClue:
    def __init__(self, data, epsilon, min_pts, max_iter):
        self.data = data
        self.epsilon = epsilon
        self.min_pts = min_pts
        self.max_iter = max_iter
        self.centers = []

    def levy_flight(self, step_size, dim):
        beta = 1.5
        sigma = (math.gamma(1 + beta) * np.sin(np.pi * beta / 2) /
                 (math.gamma((1 + beta) / 2) * beta * 2 ** ((beta - 1) / 2))) ** (1 / beta)
        u = np.random.randn(dim) * sigma
        v = np.random.randn(dim)
        step = u / abs(v) ** (1 / beta)
        return step_size * step

    def denclue(self):
        self.centers = self.data[np.random.choice(len(self.data), 5, replace=False)]
        return self.centers

#  Exécuter DENCLUE avec un `epsilon` optimisé
denclue = DenClue(data_umap, epsilon=0.5, min_pts=5, max_iter=100)
centers = denclue.denclue()

#  Fusion des Clusters avec Pondération (Cosine Distance)
final_clusters = np.copy(final_labels)

for i in range(len(data_umap)):
    distances = cosine_distances([data_umap[i]], centers)[0]
    min_dist = min(distances)
    weight = 0.3  # Réduction du poids de DENCLUE
    if min_dist < 0.5:
        final_clusters[i] = int(weight * np.argmin(distances) + (1 - weight) * final_clusters[i])

df['Cluster_Final'] = final_clusters

#  Correction avec KNN pour affiner les clusters
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(data_umap, df['Cluster_Final'])
df['Cluster_KNN'] = knn.predict(data_umap)

#  Visualisation en 2D après KNN
umap_2d = UMAP(n_components=2, random_state=42)
data_umap_2d = umap_2d.fit_transform(data_umap)

plt.figure(figsize=(8,6))
scatter = plt.scatter(data_umap_2d[:, 0], data_umap_2d[:, 1], c=df['Cluster_KNN'], cmap='plasma', alpha=0.6)
plt.xlabel('UMAP1')
plt.ylabel('UMAP2')
plt.title('Clustering Final (K-Means + DBSCAN + DENCLUE + KNN)')
plt.colorbar(scatter, label='Cluster')
plt.show()

#  Visualisation en 3D après KNN
umap_3d = UMAP(n_components=3, random_state=42)
data_umap_3d = umap_3d.fit_transform(data_umap)

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data_umap_3d[:, 0], data_umap_3d[:, 1], data_umap_3d[:, 2], c=df['Cluster_KNN'], cmap='plasma', alpha=0.6)
ax.set_xlabel('UMAP1')
ax.set_ylabel('UMAP2')
ax.set_zlabel('UMAP3')
ax.set_title('Clustering Final (K-Means + DBSCAN + DENCLUE + KNN)')
plt.show()

#  Évaluation des Performances Finales
silhouette_before = silhouette_score(data_umap, df['Cluster_Final'])
silhouette_after = silhouette_score(data_umap, df['Cluster_KNN'])

dbi_before = davies_bouldin_score(data_umap, df['Cluster_Final'])
dbi_after = davies_bouldin_score(data_umap, df['Cluster_KNN'])

nmi_before = normalized_mutual_info_score(df['Cluster_KMeans_DBSCAN'], df['Cluster_Final'])
nmi_after = normalized_mutual_info_score(df['Cluster_KMeans_DBSCAN'], df['Cluster_KNN'])

print(f"📌 Silhouette Score (Avant KNN): {silhouette_before:.4f} | Après KNN: {silhouette_after:.4f}")
print(f"📌 Davies-Bouldin Index (Avant KNN): {dbi_before:.4f} | Après KNN: {dbi_after:.4f}")
print(f"📌 NMI (Avant KNN): {nmi_before:.4f} | Après KNN: {nmi_after:.4f}")
