In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN


n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))


In [2]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# сначала получим предсказанные кластеры при помощи метода кластеризации
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)
kmeans_pred = kmeans.labels_

# теперь посчитаем коэффициент силуэта
silhouette_score(X=X, labels=kmeans_pred, metric='euclidean')

0.5131209788437305

In [8]:
gm = GaussianMixture(n_components=3, random_state=42)
gm.fit(X)
y_pred = gm.predict(X)
y_pred
silhouette_score(X=X, labels=y_pred, metric='euclidean')

0.49893287606943293

In [16]:
k_means = KMeans(n_clusters=3, random_state=42)
k_means.fit(X)
print(silhouette_score(X=X, labels=k_means.labels_, metric="euclidean"))

gm = GaussianMixture(n_components=3, random_state=42)
gm_labels = gm.fit_predict(X)
print(silhouette_score(X=X, labels=gm_labels, metric="euclidean"))

agg = AgglomerativeClustering(n_clusters=3)
agg_labels = agg.fit_predict(X)
print(silhouette_score(X=X, labels=agg_labels, metric="euclidean"))

dbs = DBSCAN(eps=0.9, min_samples=35)
dbs_labels = dbs.fit_predict(X)
print(silhouette_score(X=X, labels=dbs_labels, metric="euclidean"))



0.5131209788437305
0.49893287606943293
0.4811992210663849
0.4454335539277996


In [17]:
clusters = list(range(2,11))

In [20]:
for n_clusters in clusters:
    k_means = KMeans(n_clusters=n_clusters, random_state=42)
    k_means.fit(X)
    sil = silhouette_score(X=X, labels=k_means.labels_, metric="euclidean")
    print(f"{n_clusters}\t{sil}")


2	0.4553109501667062
3	0.5131209788437305
4	0.5114911234090096
5	0.48124147837648434
6	0.4617161738600555
7	0.39702814018472976
8	0.33335049057121036
9	0.3295856272928711
10	0.3325217295314097


In [22]:
for n_clusters in clusters:
    gm = GaussianMixture(n_components=n_clusters, random_state=42)
    gm_labels = gm.fit_predict(X)
    sil = silhouette_score(X=X, labels=gm_labels, metric="euclidean")
    print(f"{n_clusters}\t{sil}")

2	0.45506572865005773
3	0.49893287606943293
4	0.510682306926087
5	0.481377079675046
6	0.462912077774331
7	0.44345594385628
8	0.32796642598251413
9	0.3096981122180304
10	0.30558600896830485


In [23]:
for n_clusters in clusters:
    agg = AgglomerativeClustering(n_clusters=n_clusters)
    agg_labels = agg.fit_predict(X)
    sil = silhouette_score(X=X, labels=agg_labels, metric="euclidean")
    print(f"{n_clusters}\t{sil}")

2	0.40560374862968174
3	0.4811992210663849
4	0.48470679039805054
5	0.4732978942735451
6	0.4360547281175275
7	0.3610107002721555
8	0.3470279907723165
9	0.27831000812401474
10	0.281314682459535
