In [2]:
import numpy as np

compressed = np.load('./compressed.npy')
feature = np.load('./feature.npy')
X_scaled = np.load('./X_scaled.npy')

In [3]:
print(X_scaled.shape)
print(compressed.shape)

(1330, 96, 96, 1)
(1330, 12, 12, 2)


In [4]:
# Clustering Algorithm
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering, AgglomerativeClustering

# Normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Needed Library!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [5]:
# Accuracy를 가져오는 함수
# data_y: 정답 데이터
# pred_y: 예측 데이터
def getAccuracy(data_y, pred_y):
    count = 0
    bool_array = (data_y == pred_y)
    for correct in bool_array:
        if(correct):
            count += 1
    return count / pred_y.size

In [6]:
# 정답 레이블 만들기
## 원하는 클래스 레이블의 리스트를 넘긴다.
## ex) list = [1, 4, 5] -> 클래스 1번, 4번, 5번 에 대하여 레이블 생성
def getClassLabelFor(list, batch_size=190):
    y=np.array([])
    for i in list:
        y_=np.full((1, batch_size), i)[0]
        y=np.hstack([y, y_])

    return y

In [7]:
# Cluster Algorithm
def kmeans(dataset, n_clusters, normalization='standard'):

    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    print("Scaled_dataset: \n{}".format(scaled_dataset))
    cluster_data = KMeans(n_clusters=n_clusters).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# DBSCAN
def dbscan(dataset, eps=0.5, min_samples=5, normalization='standard'):
   
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_data = dbscan.fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Spectral Clustering
def spectralClustering(dataset, n_clusters, n_init = 10, normalization='standard'):
    
    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = SpectralClustering(n_clusters=n_clusters, n_init=n_init).fit(scaled_dataset)
    return cluster_data, scaled_dataset

# Hierarchical Clustering
def hierarchicalClustering(dataset, n_clusters, n_init = 10, linkage = 'ward', normalization='standard'):

    #scaled_dataset = norm(dataset, normalization=normalization)
    scaled_dataset = []
    if normalization == 'standard':
        scaler = StandardScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    elif normalization == 'minmax':
        scaler = MinMaxScaler().fit(dataset)
        scaled_dataset = scaler.transform(dataset)
    else:
        scaled_dataset = dataset
        print("정규화 진행 안함")
    cluster_data = AgglomerativeClustering(n_clusters = n_clusters, linkage = linkage ).fit(scaled_dataset)
    return cluster_data, scaled_dataset

In [11]:
result, scaled_x = kmeans(feature, 7, normalization='minmax')

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
import sys
import numpy

numpy.set_printoptions(threshold=sys.maxsize)
print(result.labels_)

[4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2
 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 2 2 5 2 1 3 6 4 2 2 2
 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3
 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4
 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0
 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1
 3 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6
 4 2 0 2 1 3 6 4 2 5 2 1 5 6 4 2 5 2 1 3 6 4 2 5 2 1 5 6 4 2 5 2 1 5 6 4 2
 5 2 1 5 6 2 2 5 2 1 5 6 4 2 0 2 1 3 6 4 2 0 2 1 5 6 4 2 5 2 1 5 6 4 2 0 2
 1 5 6 4 2 5 2 1 5 6 2 2 5 2 1 3 6 4 2 5 2 1 3 6 2 2 5 2 1 3 6 2 2 5 2 1 5
 6 2 2 0 2 1 5 6 4 2 5 2 1 3 6 4 2 5 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4
 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 4 3 6 4 2 0 2 1 3 6 4 2 0
 2 4 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 0 2 1 3 6 4 2 5 2 1 3 6 4 2 5 2 1
 3 6 4 2 5 2 1 3 6 4 2 5 

In [13]:
y_label = [4, 2, 5, 2, 1, 3, 6] * 190

In [14]:
class_ = np.array(result.labels_)
accuracy = getAccuracy(y_label, class_)
print(accuracy)

0.9037593984962407


In [76]:
result2, scaled_x2 = dbscan(feature, 1.175, 11, normalization='minmax')

In [77]:
print(result2.labels_)

[ 0  0 -1  0 -1  1 -1 -1  0 -1  0 -1  1 -1  0  0 -1  0 -1  1  2  0  0 -1
  0 -1  1  2  0  0 -1  0 -1  1 -1  0  0 -1  0 -1  1 -1  0  0 -1  0 -1  1
 -1  0  0 -1  0 -1  1 -1 -1  0 -1  0 -1  1 -1  0  0 -1  0 -1  1  2 -1  0
 -1  0 -1  1 -1 -1  0 -1  0 -1  1 -1 -1  0 -1  0 -1  1 -1  0  0 -1  0 -1
  1 -1 -1  0 -1  0 -1  1 -1  0  0 -1  0 -1  1 -1 -1  0 -1  0 -1  1 -1 -1
  0 -1  0 -1  1 -1  0  0 -1  0 -1  1 -1  4  0 -1  0  3  1  2  4  0  7  0
  3  1  2  4  0 -1  0  3  1  2  4  0  6  0  3  1  2  4  0  6  0  3  1  2
  4  0 -1  0  3  1  2  4  0 -1  0  3  1  2  4  0 -1  0  3  1  2  4  0  6
  0  3  1  2  4  0  6  0 -1  1  2 -1  0 -1  0  3  1  2 -1  0 -1  0  3  1
  2  4  0 -1  0  3  1  2  4  0 -1  0  3  1  2  4  0 -1  0  3  1  2  4  0
 -1  0  3  1  2  4  0 -1  0  3  1  2 -1  0  6  0  3  1  2  4  0 -1  0  3
  1  2 -1  0 -1  0 -1 -1  0 -1  0 -1  0 -1  1  5 -1  0 -1  0 -1 -1  5 -1
  0 -1  0 -1 -1  5 -1  0 -1  0 -1 -1  0 -1  0 -1  0 -1 -1  0 -1  0 -1  0
 -1  1 -1 -1  0 -1  0 -1 -1  0 -1  0 -1  0 -1 -1 -1

In [78]:
y_label2 = [3, 1, 2, 4, 0, 6, 5] * 190

In [79]:
class2_ = np.array(result2.labels_)
accuracy2 = getAccuracy(y_label2, class2_)
print(accuracy2)

0.009774436090225564


In [82]:
result3, scaled_x3 = hierarchicalClustering(feature, 7)

In [83]:
print(result3.labels_)

[6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 2 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1
 3 1 0 5 4 6 1 3 1 0 5 4 6 1 0 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1
 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5
 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6
 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3
 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 1 1 3 1 0 5 4 6 1 3 1 0
 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4
 6 1 3 1 0 5 4 6 1 3 1 0 2 4 6 1 3 1 0 5 4 6 1 3 1 0 2 4 6 1 3 1 0 2 4 6 1
 3 1 0 2 4 6 1 3 1 0 2 4 6 1 3 1 0 5 4 6 1 3 1 0 2 4 6 1 3 1 0 2 4 6 1 3 1
 0 2 4 6 1 3 1 0 2 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 2
 4 6 1 3 1 0 2 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 2 0 5 4 6 1 3 1 0 5 4 6
 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3
 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0 5 4 6 1 3 1 0
 5 4 6 1 3 1 0 5 4 6 1 3 

In [84]:
y_label3 = [6, 1, 3, 2, 0, 5, 4] * 190

In [85]:
class3_ = np.array(result3.labels_)
accuracy3 = getAccuracy(y_label3, class3_)
print(accuracy3)

0.8330827067669173


In [87]:
print("ASISX_RP1_96의 KMeans Accuray : " + str(accuracy))
print("ASISX_RP1_96의 DBSCAN Accuray : " + str(accuracy2))
print("ASISX_RP1_96의 계층적 군집화 Accuray : " + str(accuracy3))

ASISX_RP1_96의 KMeans Accuray : 0.9037593984962407
ASISX_RP1_96의 DBSCAN Accuray : 0.009774436090225564
ASISX_RP1_96의 계층적 군집화 Accuray : 0.8330827067669173
