In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))


In [2]:
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import completeness_score

# сначала получим предсказанные кластеры при помощи метода кластеризации
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)
kmeans_pred = kmeans.labels_

# теперь посчитаем полноту
completeness_score(labels_true=y, labels_pred=kmeans_pred)


0.7842373379017128

In [5]:
# 4.13.1
from sklearn.mixture import GaussianMixture


# GaussianMixture
def calc_gaussianmixture_completeness_score(components=3):
    gm = GaussianMixture(n_components=components, random_state=42)
    gm.fit(X)
    y_pred = gm.predict(X)
    return completeness_score(labels_true=y, labels_pred=y_pred)


print(calc_gaussianmixture_completeness_score())

0.9325740421656737


In [6]:
# 4.13.2
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN


# K-means
def calc_kmeans_completeness_score(clusters=3):
    kmeans = KMeans(n_clusters=clusters, random_state=42)
    kmeans.fit(X)
    kmeans_pred = kmeans.labels_
    return completeness_score(labels_true=y, labels_pred=kmeans_pred)


# Агломеративная кластеризация
def calc_agglomerative_clustering_completeness_score(clusters=3):
    ac = AgglomerativeClustering(n_clusters=clusters) 
    ac.fit(X)
    y_pred = ac.labels_.astype(np.int)
    return completeness_score(labels_true=y, labels_pred=y_pred)


# DBSCAN
def calc_dbscan_completeness_score(epsilon=0.9, samples=35):
    dbscan = DBSCAN(eps=epsilon, min_samples=samples)
    dbscan.fit(X)
    y_pred = dbscan.labels_.astype(np.int)
    return completeness_score(labels_true=y, labels_pred=y_pred)


print('K-means completeness_score: ', calc_kmeans_completeness_score())
print('GaussianMixture completeness_score: ', calc_gaussianmixture_completeness_score())
print('AgglomerativeClustering completeness_score: ', calc_agglomerative_clustering_completeness_score())
print('DBSCAN completeness_score: ', calc_dbscan_completeness_score())


K-means completeness_score:  0.7842373379017128
GaussianMixture completeness_score:  0.9325740421656737


AgglomerativeClustering completeness_score:  0.6901277117253529
DBSCAN completeness_score:  0.6965554058940557
