In [None]:
from sklearn.datasets import load_iris
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline

iris = load_iris()
feature_names = ['sepal_length','sepal_width','petal_length','petal_width']

irisDF = pd.DataFrame(data=iris.data, columns=feature_names)
irisDF['target'] = iris.target

In [None]:
gmm = GaussianMixture(n_components=3, random_state=0).fit(iris.data) # components : mixture 모델의 개수
gmm_cluster_labels = gmm.predict(iris.data)

irisDF['gmm_cluster'] = gmm_cluster_labels # 클러스터링 결과 추가

iris_result = irisDF.groupby(['target'])['gmm_cluster'].value_counts()
print(iris_result)

In [None]:
def visualize_cluster_plot(clusterobj, dataframe, label_name, iscenter=True):
    if iscenter : # K-means의 클러스터링 중심점
        centers = clusterobj.cluster_centers_
        
    unique_labels = np.unique(dataframe[label_name].values)
    markers=['o', 's', '^', 'x', '*']
    isNoise=False

    for label in unique_labels: 
        label_cluster = dataframe[dataframe[label_name]==label]
        if label == -1: # DBSCAN을 위한
            cluster_legend = 'Noise'
            isNoise=True
        else :
            cluster_legend = 'Cluster '+str(label)
        
        plt.scatter(x=label_cluster['ftr1'], y=label_cluster['ftr2'], s=70, edgecolor='k', marker=markers[label], label=cluster_legend)
        
        if iscenter: # K-means을 위한
            center_x_y = centers[label]
            plt.scatter(x=center_x_y[0], y=center_x_y[1], s=250, color='white', alpha=0.9, edgecolor='k', marker=markers[label])
            plt.scatter(x=center_x_y[0], y=center_x_y[1], s=70, color='k', edgecolor='k', marker='$%d$' % label)
    if isNoise:
        legend_loc='upper center'
    else: legend_loc='upper right'
    
    plt.legend(loc=legend_loc)
    plt.show()

In [None]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=300, n_features=2, centers=3, cluster_std=0.5, random_state=0)
 
transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] # 길게 늘어난 타원형의 데이터 셋을 생성하기 위해
X_aniso = np.dot(X, transformation)

clusterDF = pd.DataFrame(data=X_aniso, columns=['ftr1', 'ftr2'])
clusterDF['target'] = y
visualize_cluster_plot(None, clusterDF, 'target', iscenter=False)

**1. Mean_shift**

In [None]:
from sklearn.cluster import MeanShift
from sklearn.cluster import estimate_bandwidth

best_bandwidth = estimate_bandwidth(X_aniso, quantile=0.25)

meanshift= MeanShift(best_bandwidth)
cluster_labels = meanshift.fit_predict(X)
clusterDF['Meanshift_label'] = cluster_labels
print('cluster labels 유형:',np.unique(cluster_labels))
visualize_cluster_plot(meanshift, clusterDF, 'Meanshift_label', iscenter=False)

In [None]:
print(clusterDF.groupby('target')['Meanshift_label'].value_counts())

**2. K-means**

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(3, random_state=0)
kmeans_label = kmeans.fit_predict(X_aniso)
clusterDF['kmeans_label'] = kmeans_label

visualize_cluster_plot(kmeans, clusterDF, 'kmeans_label',iscenter=True)

In [None]:
print(clusterDF.groupby('target')['kmeans_label'].value_counts())

**3. GMM**

In [None]:
gmm = GaussianMixture(n_components=3, random_state=0)
gmm_label = gmm.fit(X_aniso).predict(X_aniso)
clusterDF['gmm_label'] = gmm_label

visualize_cluster_plot(gmm, clusterDF, 'gmm_label',iscenter=False)

In [None]:
print(clusterDF.groupby('target')['gmm_label'].value_counts())