In [2]:
import numpy as np

compressed = np.load('./compressed3.npy')
feature = np.load('./feature3.npy')
X_scaled = np.load('./X_scaled3.npy')

In [3]:
print(X_scaled.shape)
print(compressed.shape)

(1140, 28, 28, 1)
(1140, 7, 7, 2)


In [4]:
# Clustering Algorithm
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering, AgglomerativeClustering

# Normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Needed Library!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [5]:
# Accuracy를 가져오는 함수
# data_y: 정답 데이터
# pred_y: 예측 데이터
def getAccuracy(data_y, pred_y):
    count = 0
    bool_array = (data_y == pred_y)
    for correct in bool_array:
        if(correct):
            count += 1
    return count / pred_y.size

In [6]:
# 정답 레이블 만들기
## 원하는 클래스 레이블의 리스트를 넘긴다.
## ex) list = [1, 4, 5] -> 클래스 1번, 4번, 5번 에 대하여 레이블 생성
def getClassLabelFor(list, batch_size=190):
    y=np.array([])
    for i in list:
        y_=np.full((1, batch_size), i)[0]
        y=np.hstack([y, y_])

    return y

In [7]:
# Cluster Algorithm
def kmeans(dataset, n_clusters, normalization='standard'):

    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    print("Scaled_dataset: \n{}".format(scaled_dataset))
    cluster_data = KMeans(n_clusters=n_clusters).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# DBSCAN
def dbscan(dataset, eps=0.5, min_samples=5, normalization='standard'):
   
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_data = dbscan.fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Spectral Clustering
def spectralClustering(dataset, n_clusters, n_init = 10, normalization='standard'):
    
    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = SpectralClustering(n_clusters=n_clusters, n_init=n_init).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Hierarchical Clustering
def hierarchicalClustering(dataset, n_clusters, n_init = 10, linkage = 'ward', normalization='standard'):

    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = AgglomerativeClustering(n_clusters = n_clusters, linkage = linkage ).fit(scaled_dataset)
    return cluster_data, scaled_dataset

In [8]:
result, scaled_x = kmeans(feature, 6, normalization='minmax')

Scaled_dataset: 
[[0.28699148 0.         0.07158716 ... 0.21458949 0.6689296  1.        ]
 [0.04903603 0.         0.03027538 ... 0.8099792  0.40901053 0.06793353]
 [0.15434486 0.         0.27478167 ... 0.29570422 0.11035186 0.44604456]
 ...
 [0.04903603 0.         0.02123009 ... 0.8099792  0.40901053 0.06793353]
 [0.24784172 0.         0.88789433 ... 0.23404753 0.3687706  0.06793353]
 [0.15434486 0.         0.26415908 ... 0.21656537 0.8586761  0.5890475 ]]


In [9]:
import sys
import numpy

numpy.set_printoptions(threshold=sys.maxsize)
print(result.labels_)

[3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3
 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 5 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2
 1 0 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1
 2 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2
 4 5 3 2 1 2 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 0 4
 5 3 2 1 0 4 5 3 0 1 2 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 2 4 5
 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3
 2 1 0 4 5 3 2 1 2 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2
 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1
 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0
 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4
 5 3 2 1 0 4 5 3 0 1 0 4 5 3 2 1 2 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5
 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 2 1 0 4 5 3 0 1 2 4 5 3 0 1 0 4 5 3
 2 1 0 4 5 3 2 1 2 4 5 3 

In [10]:
y_label = [3, 2, 1, 0, 4, 5] * 190

In [11]:
class_ = np.array(result.labels_)
accuracy = getAccuracy(y_label, class_)
print(accuracy)

0.9359649122807018


In [42]:
result2, scaled_x2 = dbscan(feature, 1.192, 6, normalization='minmax')

In [43]:
print(result2.labels_)

[ 0  1 -1  2 -1 -1  0  1 -1  2 -1  3  0  1 -1  1 -1 -1  0  1 -1  2 -1  3
  0  1 -1  2  5 -1  0  1  4  2 -1 -1  0  1 -1  2 -1 -1  0  1 -1  2  5 -1
  0  1  4  2  6 -1  0  1 -1  2 -1 -1  0  1 -1  2 -1  3  0  1 -1  2 -1  3
  0  1 -1  2 -1 -1  0  1 -1  1 -1  3  0  1 -1  2  5 -1  0  1 -1  2  9  3
  0  1 -1  2 -1  3  0  1  4  2  9 -1  0  1 -1  1 -1  7  0  1  4  2 -1  7
  0  1 -1  1 -1  3  0  1  4  1 -1  3  0  1  4  2 -1  3  0  1 -1  2 -1  3
  0  1 -1  1 -1 -1  0  1  4  1 -1 -1  0  1 -1  1 -1  3  0  1  4  2 -1  3
  0  1  4  2  6  3  0  1 -1  1 -1  8  0  1  4  2 -1 -1  0  1 -1  2 -1  8
  0  2  4  1 -1  3  0  1  4  1 -1  8  0  1 -1  2 -1  3  0  1  4  1 -1  8
  0  1 -1  1 -1  3  0  1 -1  2  9  3  0  1 -1  2 -1 -1  0  1 -1  2 -1  3
  0  1 -1  2  6  3  0  1 -1  2 -1  3  0  1 -1  2 -1 -1  0  1 -1  2 -1  3
  0  1 -1  1 -1 -1  0  1 -1  1 -1  3  0  1 -1  2 -1 -1  0  1 -1  2  9  3
  0  1  4  2 -1 -1  0  1  4  2 -1  3  0  1 -1  2 -1 -1  0  1  4  2  9 -1
  0  1 -1  2 -1  3  0  1 -1  2 -1 -1  0  1 -1  1 -1

In [44]:
y_label2 = [0, 1, 4, 2, 5, 3] * 190

In [45]:
class2_ = np.array(result2.labels_)
accuracy2 = getAccuracy(y_label2, class2_)
print(accuracy2)

0.5719298245614035


In [46]:
result3, scaled_x3 = hierarchicalClustering(feature, 6)

In [47]:
print(result3.labels_)

[5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5
 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 1 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0
 2 4 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2
 0 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0
 3 1 5 0 2 0 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 4 3
 1 5 0 2 4 3 1 5 4 2 0 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 0 3 1
 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5
 0 2 4 3 1 5 0 2 0 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0
 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2
 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4
 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3
 1 5 0 2 4 3 1 5 4 2 4 3 1 5 0 2 0 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1
 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 0 2 4 3 1 5 4 1 0 3 1 5 4 1 4 3 1 5
 0 2 4 3 1 5 0 2 0 3 1 5 

In [50]:
y_label3 = [5, 0, 2, 4, 3, 1] * 190

In [51]:
class3_ = np.array(result3.labels_)
accuracy3 = getAccuracy(y_label3, class3_)
print(accuracy3)

0.9342105263157895


In [53]:
print("CMAMP_RP1_96의 KMeans Accuray : " + str(accuracy))
print("CMAMP_RP1_96의 DBSCAN Accuray : " + str(accuracy2))
print("CMAMP_RP1_96의 계층적 군집화 Accuray : " + str(accuracy3))

CMAMP_RP1_96의 KMeans Accuray : 0.9359649122807018
CMAMP_RP1_96의 DBSCAN Accuray : 0.5719298245614035
CMAMP_RP1_96의 계층적 군집화 Accuray : 0.9342105263157895
