# K-Mean 觀察 : 使用輪廓分析

# [作業目標]
- 試著模仿範例寫法, 利用隨機生成的 5 群高斯分布資料, 以輪廓分析來觀察 K-mean 分群時不同 K 值的比較

# [作業重點]
- 使用輪廓分析的圖表, 以及實際的分群散佈圖, 觀察 K-Mean 分群法在 K 有所不同時, 分群的效果如何變化 (In[3], Out[3])

# 作業
* 試著模擬出 5 群高斯分布的資料, 並以此觀察 K-mean 與輪廓分析的結果  

In [1]:
# 載入套件
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.metrics import silhouette_samples, silhouette_score

np.random.seed(5)

%matplotlib inline

In [2]:
# 生成 5 群資料
X, y = make_blobs(n_samples=500,
                  n_features=2,
                  centers=5,
                  cluster_std=1,
                  center_box=(-10.0, 10.0),
                  shuffle=True,
                  random_state=123) 

# 設定需要計算的 K 值集合
range_n_clusters = [2, 3, 4, 5, 6, 7, 8]

In [10]:
# 計算並繪製輪廓分析的結果
for i in range_n_clusters:
    estimators = KMeans(n_clusters = i , random_state = 0)
    output = estimators.fit_predict(X)
    silhouette = silhouette_score(X,output)
    print(f'分{i}群： silhouette score : {silhouette}')

分2群： silhouette score : 0.5027144446956527
分3群： silhouette score : 0.6105565451092732
分4群： silhouette score : 0.6270122040179333
分5群： silhouette score : 0.6115749260799671
分6群： silhouette score : 0.5494466732541075
分7群： silhouette score : 0.468332257811922
分8群： silhouette score : 0.45252651622165796


In [11]:
# 由輪廓分析得出：分五、六群效果比較好

In [12]:
estimators = KMeans(n_clusters = 5 , random_state = 0)
output = estimators.fit_predict(X)
output

array([3, 2, 3, 0, 3, 0, 2, 3, 4, 4, 4, 3, 3, 1, 3, 2, 3, 1, 2, 3, 2, 3,
       2, 3, 0, 4, 1, 3, 3, 1, 2, 1, 2, 3, 1, 2, 2, 4, 0, 3, 1, 3, 4, 2,
       3, 4, 4, 2, 3, 0, 4, 2, 3, 0, 4, 4, 1, 2, 1, 2, 2, 1, 1, 1, 2, 4,
       2, 0, 4, 0, 0, 1, 1, 3, 0, 1, 0, 1, 2, 0, 2, 0, 3, 0, 1, 2, 0, 1,
       4, 2, 1, 4, 2, 2, 2, 0, 2, 2, 1, 4, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2,
       0, 0, 1, 0, 0, 0, 1, 2, 0, 1, 1, 3, 3, 0, 3, 2, 2, 4, 3, 0, 0, 3,
       0, 2, 2, 1, 1, 4, 3, 4, 0, 1, 4, 2, 4, 2, 1, 3, 0, 4, 2, 3, 2, 3,
       3, 1, 4, 0, 4, 3, 0, 2, 4, 3, 1, 4, 2, 2, 3, 3, 4, 1, 0, 0, 0, 2,
       1, 0, 3, 4, 3, 3, 2, 0, 0, 2, 4, 1, 1, 3, 1, 1, 4, 1, 2, 2, 3, 0,
       1, 3, 3, 0, 1, 2, 4, 3, 2, 2, 1, 4, 4, 4, 1, 3, 4, 2, 4, 4, 0, 3,
       1, 3, 4, 4, 0, 1, 3, 1, 3, 1, 3, 3, 4, 1, 0, 0, 1, 4, 2, 0, 1, 3,
       2, 0, 4, 1, 4, 2, 3, 3, 3, 2, 4, 0, 0, 1, 1, 2, 4, 2, 1, 2, 1, 1,
       1, 2, 2, 1, 4, 4, 0, 3, 0, 3, 0, 3, 0, 2, 3, 0, 3, 3, 1, 1, 4, 2,
       4, 3, 4, 4, 0, 2, 0, 0, 1, 2, 3, 4, 4, 0, 2,

In [13]:
y

array([3, 4, 3, 2, 3, 2, 4, 3, 0, 0, 0, 3, 3, 1, 3, 4, 3, 1, 4, 3, 4, 3,
       4, 3, 2, 0, 1, 3, 3, 1, 4, 1, 4, 3, 1, 4, 4, 0, 2, 3, 1, 3, 0, 4,
       3, 0, 0, 4, 3, 2, 0, 4, 3, 2, 0, 0, 1, 4, 1, 4, 0, 1, 1, 1, 4, 0,
       4, 2, 0, 2, 2, 1, 1, 3, 2, 1, 2, 1, 4, 2, 4, 2, 3, 2, 1, 4, 2, 1,
       0, 4, 1, 0, 4, 4, 4, 2, 4, 4, 1, 2, 4, 4, 1, 1, 4, 4, 1, 1, 4, 4,
       2, 2, 1, 2, 2, 2, 1, 4, 2, 1, 1, 3, 3, 2, 3, 4, 4, 0, 3, 0, 2, 3,
       2, 4, 4, 1, 1, 0, 3, 0, 2, 1, 0, 4, 0, 4, 1, 3, 2, 0, 4, 3, 4, 3,
       3, 1, 0, 0, 0, 3, 2, 4, 0, 3, 1, 0, 4, 4, 3, 3, 0, 1, 2, 2, 2, 4,
       1, 2, 3, 2, 3, 3, 4, 2, 2, 4, 0, 1, 1, 3, 1, 1, 0, 1, 4, 4, 3, 2,
       1, 3, 3, 2, 1, 4, 0, 3, 4, 4, 1, 0, 0, 0, 1, 3, 0, 4, 2, 2, 2, 3,
       1, 3, 0, 0, 2, 1, 3, 1, 3, 1, 3, 3, 0, 1, 2, 2, 1, 0, 4, 0, 1, 3,
       4, 2, 0, 1, 0, 4, 3, 3, 3, 4, 0, 0, 4, 1, 1, 4, 0, 4, 1, 4, 1, 1,
       1, 4, 4, 1, 0, 0, 2, 3, 2, 3, 2, 3, 2, 4, 3, 2, 3, 3, 1, 1, 0, 4,
       0, 3, 0, 0, 0, 4, 2, 2, 1, 4, 3, 0, 0, 2, 2,