In [1]:
import numpy as np

compressed = np.load('./compressed2.npy')
feature = np.load('./feature2.npy')
X_scaled = np.load('./X_scaled2.npy')

In [2]:
print(X_scaled.shape)
print(compressed.shape)

(1330, 96, 96, 1)
(1330, 12, 12, 2)


In [3]:
# Clustering Algorithm
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering, AgglomerativeClustering

# Normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Needed Library!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Accuracy를 가져오는 함수
# data_y: 정답 데이터
# pred_y: 예측 데이터
def getAccuracy(data_y, pred_y):
    count = 0
    bool_array = (data_y == pred_y)
    for correct in bool_array:
        if(correct):
            count += 1
    return count / pred_y.size

In [5]:
# 정답 레이블 만들기
## 원하는 클래스 레이블의 리스트를 넘긴다.
## ex) list = [1, 4, 5] -> 클래스 1번, 4번, 5번 에 대하여 레이블 생성
def getClassLabelFor(list, batch_size=190):
    y=np.array([])
    for i in list:
        y_=np.full((1, batch_size), i)[0]
        y=np.hstack([y, y_])

    return y

In [6]:
# Cluster Algorithm
def kmeans(dataset, n_clusters, normalization='standard'):

    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    print("Scaled_dataset: \n{}".format(scaled_dataset))
    cluster_data = KMeans(n_clusters=n_clusters).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# DBSCAN
def dbscan(dataset, eps=0.5, min_samples=5, normalization='standard'):
   
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_data = dbscan.fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Spectral Clustering
def spectralClustering(dataset, n_clusters, n_init = 10, normalization='standard'):
    
    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = SpectralClustering(n_clusters=n_clusters, n_init=n_init).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Hierarchical Clustering
def hierarchicalClustering(dataset, n_clusters, n_init = 10, linkage = 'ward', normalization='standard'):

    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = AgglomerativeClustering(n_clusters = n_clusters, linkage = linkage ).fit(scaled_dataset)
    return cluster_data, scaled_dataset

In [7]:
result, scaled_x = kmeans(feature, 7, normalization='minmax')

Scaled_dataset: 
[[0.12869266 0.1944374  0.31041503 ... 0.10582252 0.22042952 0.2885632 ]
 [0.36149484 0.         0.5506828  ... 0.15745087 0.06279364 0.        ]
 [0.7109965  0.8570393  0.60048974 ... 0.         0.5659062  0.        ]
 ...
 [0.59670997 0.10539619 0.7351928  ... 0.         0.33955365 0.        ]
 [0.2766949  0.3882143  0.7213162  ... 0.36514887 0.02570792 0.56257683]
 [0.3375677  0.47331065 0.6663536  ... 0.         0.60508734 0.        ]]


In [8]:
import sys
import numpy

numpy.set_printoptions(threshold=sys.maxsize)
print(result.labels_)

[3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5
 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 1 0 1
 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4
 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3
 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0
 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 5 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2
 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3
 3 1 0 1 2 4 3 3 5 0 1 2 3 3 3 5 0 5 6 4 3 3 5 0 5 2 3 3 3 5 0 5 2 3 3 3 5
 0 1 2 3 3 3 5 0 5 2 3 3 3 5 0 1 6 4 3 3 5 0 5 2 3 3 3 5 0 5 2 3 3 3 5 0 5
 2 3 3 3 1 0 1 2 3 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 6 4 3 3 5 0 1 2 3
 3 3 5 0 5 2 3 3 3 5 0 1 6 4 3 3 5 0 5 6 4 3 3 5 0 3 6 4 3 3 1 0 1 2 4 3 3
 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0
 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 1 0 1 2 4 3 3 5 0 1 2 4 3 3 1 0 1 2
 4 3 3 1 0 1 2 4 3 3 1 0 

In [9]:
y_label = [3, 5, 0, 1, 6, 4, 3] * 190

In [10]:
class_ = np.array(result.labels_)
accuracy = getAccuracy(y_label, class_)
print(accuracy)

0.7684210526315789


In [64]:
result2, scaled_x2 = dbscan(feature, 0.5107, 6, normalization='minmax')

In [65]:
print(result2.labels_)

[-1  0 -1  1 -1  2 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1 -1 -1 -1 -1 -1  0 -1
  1 -1  2 -1 -1  0 -1  1 -1  2 -1 -1  0 -1 -1 -1 -1 -1 -1  0 -1  3 -1 -1
 -1 -1  0 -1  1 -1  2 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1 -1 -1  2 -1 -1  4
 -1  3 -1  2 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1  3 -1  2 -1 -1  0 -1  3 -1
  2 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1  3 -1 -1 -1 -1  0 -1  5 -1  2 -1 -1
  0 -1 -1 -1  2 -1 -1  0 -1 -1 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3
 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1 -1 -1
 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1
  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  0 -1  3 -1  2 -1 -1  4 -1  3 -1  2
 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4
 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1  2 -1 -1  4 -1  3 -1
  2 -1 -1  0 -1 -1 -1 -1 -1 -1  0 -1 -1 -1  2 -1 -1  0 -1 -1 -1 -1 -1 -1
  0 -1 -1 -1 -1 -1 -1  0 -1  1 -1 -1 -1 -1  0 -1 -1 -1 -1 -1 -1  0 -1 -1
 -1  2 -1 -1  0 -1 -1 -1 -1 -1 -1  0 -1 -1 -1 -1 -1

In [13]:
result3, scaled_x3 = hierarchicalClustering(feature, 7)

In [14]:
print(result3.labels_)

[0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4
 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 2 5 2
 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3
 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0
 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5
 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 4 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1
 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0
 0 2 5 2 1 3 0 0 4 5 2 1 0 0 0 4 5 4 6 3 0 0 4 5 4 1 0 0 0 4 5 4 1 0 0 0 4
 5 2 1 0 0 0 4 5 4 1 0 0 0 4 5 2 6 3 0 0 4 5 4 1 0 0 0 4 5 4 1 0 0 0 4 5 4
 1 0 0 0 2 5 2 1 0 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 6 3 0 0 4 5 2 1 0
 0 0 4 5 4 1 0 0 0 4 5 2 6 3 0 0 4 5 4 6 3 0 0 4 5 4 6 3 0 0 2 5 2 1 3 0 0
 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5
 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 2 5 2 1 3 0 0 4 5 2 1 3 0 0 2 5 2 1
 3 0 0 2 5 2 1 3 0 0 2 5 

In [15]:
y_label3 = [0, 4, 5, 2, 6, 3, 1] * 190

In [16]:
class3_ = np.array(result3.labels_)
accuracy3 = getAccuracy(y_label3, class3_)
print(accuracy3)

0.6255639097744361


In [66]:
print("ASISX_RP2_96의 KMeans Accuray : " + str(accuracy))
print("ASISX_RP2_96의 DBSCAN Accuray : 측정 불가능 - 패턴이 없음")
print("ASISX_RP2_96의 계층적 군집화 Accuray : " + str(accuracy3))

ASISX_RP2_96의 KMeans Accuray : 0.7684210526315789
ASISX_RP2_96의 DBSCAN Accuray : 측정 불가능 - 패턴이 없음
ASISX_RP2_96의 계층적 군집화 Accuray : 0.6255639097744361
