In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))


In [2]:
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import homogeneity_score

# сначала получим предсказанные кластеры при помощи метода кластеризации
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)
kmeans_pred = kmeans.labels_

# теперь посчитаем однородность
homogeneity_score(labels_true=y, labels_pred=kmeans_pred)


0.807281738408743

In [6]:
# 4.12.1
from sklearn.cluster import AgglomerativeClustering
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN


# K-means
def calc_kmeans_homogeneity_score(clusters=3):
    kmeans = KMeans(n_clusters=clusters, random_state=42)
    kmeans.fit(X)
    kmeans_pred = kmeans.labels_
    return homogeneity_score(labels_true=y, labels_pred=kmeans_pred)


# GaussianMixture
def calc_gaussianmixture_homogeneity_score(components=3):
    gm = GaussianMixture(n_components=components, random_state=42)
    gm.fit(X)
    y_pred = gm.predict(X)
    return homogeneity_score(labels_true=y, labels_pred=y_pred)


# Агломеративная кластеризация
def calc_agglomerative_clustering_homogeneity_score(clusters=3):
    ac = AgglomerativeClustering(n_clusters=clusters) 
    ac.fit(X)
    y_pred = ac.labels_.astype(np.int)
    return homogeneity_score(labels_true=y, labels_pred=y_pred)


# DBSCAN
def calc_dbscan_homogeneity_score(epsilon=0.9, samples=35):
    dbscan = DBSCAN(eps=epsilon, min_samples=samples)
    dbscan.fit(X)
    y_pred = dbscan.labels_.astype(np.int)
    return homogeneity_score(labels_true=y, labels_pred=y_pred)


print('K-means homogeneity_score: ', calc_kmeans_homogeneity_score())
print('GaussianMixture homogeneity_score: ', calc_gaussianmixture_homogeneity_score())
print('AgglomerativeClustering homogeneity_score: ', calc_agglomerative_clustering_homogeneity_score())
print('DBSCAN homogeneity_score: ', calc_dbscan_homogeneity_score())


K-means homogeneity_score:  0.807281738408743
GaussianMixture homogeneity_score:  0.9339791347507893


AgglomerativeClustering homogeneity_score:  0.7179146987805298
DBSCAN homogeneity_score:  0.8689356473867522
