In [1]:
import numpy as np

compressed = np.load('./compressed4.npy')
feature = np.load('./feature4.npy')
X_scaled = np.load('./X_scaled4.npy')

In [2]:
print(X_scaled.shape)
print(compressed.shape)

(1330, 28, 28, 1)
(1330, 7, 7, 2)


In [3]:
# Clustering Algorithm
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering, AgglomerativeClustering

# Normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Needed Library!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Accuracy를 가져오는 함수
# data_y: 정답 데이터
# pred_y: 예측 데이터
def getAccuracy(data_y, pred_y):
    count = 0
    bool_array = (data_y == pred_y)
    for correct in bool_array:
        if(correct):
            count += 1
    return count / pred_y.size

In [5]:
# 정답 레이블 만들기
## 원하는 클래스 레이블의 리스트를 넘긴다.
## ex) list = [1, 4, 5] -> 클래스 1번, 4번, 5번 에 대하여 레이블 생성
def getClassLabelFor(list, batch_size=190):
    y=np.array([])
    for i in list:
        y_=np.full((1, batch_size), i)[0]
        y=np.hstack([y, y_])

    return y

In [6]:
# Cluster Algorithm
def kmeans(dataset, n_clusters, normalization='standard'):

    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    print("Scaled_dataset: \n{}".format(scaled_dataset))
    cluster_data = KMeans(n_clusters=n_clusters).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# DBSCAN
def dbscan(dataset, eps=0.5, min_samples=5, normalization='standard'):
   
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_data = dbscan.fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Spectral Clustering
def spectralClustering(dataset, n_clusters, n_init = 10, normalization='standard'):
    
    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = SpectralClustering(n_clusters=n_clusters, n_init=n_init).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Hierarchical Clustering
def hierarchicalClustering(dataset, n_clusters, n_init = 10, linkage = 'ward', normalization='standard'):

    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = AgglomerativeClustering(n_clusters = n_clusters, linkage = linkage ).fit(scaled_dataset)
    return cluster_data, scaled_dataset

In [7]:
result, scaled_x = kmeans(feature, 7, normalization='minmax')

Scaled_dataset: 
[[0.5650824  0.3389668  0.48680142 ... 0.2651287  0.15985456 0.09067261]
 [0.14653423 0.37081832 0.23992121 ... 0.70367575 0.         0.40230024]
 [0.7119544  0.92514485 0.59025264 ... 0.581699   0.01537091 0.6982856 ]
 ...
 [0.2368486  0.6325552  0.6907741  ... 0.10357791 0.01596511 0.09261549]
 [0.1760668  0.36391813 0.20140629 ... 0.14144441 0.47990534 0.41146702]
 [0.06501773 0.29593056 0.48553616 ... 0.47705004 0.57893443 0.38249332]]


In [8]:
import sys
import numpy

numpy.set_printoptions(threshold=sys.maxsize)
print(result.labels_)

[0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5
 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 0 3 4 0 5 6 1 2 3 4 0 1 0 1
 2 3 4 0 5 6 1 2 3 4 0 5 0 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3
 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0
 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 0
 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 5 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2
 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4
 0 1 6 1 2 3 4 0 5 6 1 2 4 4 0 5 6 5 2 3 4 0 5 6 5 0 4 4 0 5 6 5 2 4 4 4 5
 6 1 2 4 4 0 5 6 5 2 4 4 0 5 6 1 2 3 4 0 5 6 5 2 4 4 0 5 6 5 0 4 4 0 5 6 5
 2 4 0 0 1 6 1 2 4 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 3 4 0 5 6 1 2 4
 0 0 5 6 5 2 4 4 0 5 6 1 2 3 4 0 5 6 5 2 3 4 0 5 6 5 2 3 4 0 1 6 1 2 3 4 0
 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 1 6
 1 2 3 4 0 1 0 1 2 3 4 0 1 6 1 2 3 4 0 1 6 1 2 3 4 0 5 6 1 2 3 4 0 1 6 1 2
 3 4 0 1 6 1 2 3 4 0 1 6 

In [9]:
y_label = [0, 5, 6, 1, 2, 3, 4] * 190

In [10]:
class_ = np.array(result.labels_)
accuracy = getAccuracy(y_label, class_)
print(accuracy)

0.8481203007518797


In [109]:
result2, scaled_x2 = dbscan(feature, 0.9713975, 7, normalization='minmax')

In [110]:
print(result2.labels_)

[-1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1  5  2 -1 -1  0 -1
  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2
 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  1
 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1
  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1
  0 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1
 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1
 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1
  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  1 -1  1 -1  2
 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1
 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1  2 -1 -1  1 -1  1 -1
  2 -1 -1  0 -1  1 -1  3 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1 -1 -1  3 -1 -1
  0 -1 -1 -1  3 -1 -1  0 -1  1 -1  3 -1 -1  0 -1 -1 -1  3 -1 -1  0 -1  1
 -1  2 -1 -1  0 -1 -1 -1  3 -1 -1  0 -1 -1 -1  3 -1

In [111]:
result3, scaled_x3 = hierarchicalClustering(feature, 7)

In [112]:
print(result3.labels_)

[2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3
 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 4 2 4
 0 1 6 2 3 5 4 0 1 6 2 3 2 4 0 1 6 2 3 5 4 0 1 6 2 3 2 4 0 1 2 2 3 5 4 0 1
 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2
 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 6 4 5 4 0 1 6 2 4 2
 4 0 1 6 2 4 5 4 0 1 6 6 4 5 4 0 1 6 2 3 5 4 0 1 6 2 4 2 4 0 1 6 2 4 2 4 0
 1 6 2 4 5 4 0 1 6 2 4 2 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6
 2 4 5 4 0 1 6 2 3 5 4 0 2 6 2 3 5 3 0 1 6 2 3 5 3 0 2 6 2 3 5 3 0 2 6 2 3
 5 4 0 2 6 2 3 5 3 0 2 6 2 3 5 4 0 1 6 2 3 5 3 0 2 6 2 3 5 3 0 2 6 2 3 5 3
 0 2 6 2 4 5 4 0 2 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 1 6 2 3 5 4 0 2
 6 2 3 5 3 0 2 6 2 3 5 4 0 1 2 2 3 5 3 0 1 6 2 3 5 3 0 1 6 2 4 5 4 0 1 6 2
 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5
 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 4 5 4 0 1 6 2 3 5 4 0 1 6 2 4 5 4 0
 1 6 2 4 5 4 0 1 6 2 4 5 

In [113]:
y_label3 = [2, 3, 5, 4, 0, 1, 6] * 190

In [114]:
class3_ = np.array(result3.labels_)
accuracy3 = getAccuracy(y_label3, class3_)
print(accuracy3)

0.8541353383458646


In [116]:
print("ASISX_RP1_28의 KMeans Accuray : " + str(accuracy))
print("ASISX_RP1_28의 DBSCAN Accuray : 측정 불가능 - 패턴이 없음")
print("ASISX_RP1_28의 계층적 군집화 Accuray : " + str(accuracy3))

ASISX_RP1_28의 KMeans Accuray : 0.8481203007518797
ASISX_RP1_28의 DBSCAN Accuray : 측정 불가능 - 패턴이 없음
ASISX_RP1_28의 계층적 군집화 Accuray : 0.8541353383458646
