## 라이브러리 import

In [1]:
import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt
import seaborn as sb
import time

from sklearn.preprocessing import StandardScaler
from sklearn.manifold      import TSNE
from sklearn.cluster       import KMeans

%matplotlib

Using matplotlib backend: Qt5Agg


## 차원축소 대상 데이터 불러오기

In [2]:
GripTestData = pd.read_csv('./GripTestData.csv', sep = ',')
GripTestData.drop(['center deflection max(mm)','center deflection min(mm)', 'max deg L', 'min deg L', 'max deg R', 'min deg R'], axis='columns', inplace=True)

Data_X = GripTestData.iloc[:, 1:8]
Data_Y = GripTestData.iloc[:, 8:]

In [3]:
Data_X_NotGripped = pd.read_csv('./GripTestData_NotGripped.csv', sep = ',')
# Data_X_NotGripped.index = [12, 13, 15, 16]
Data_X_NotGripped = Data_X_NotGripped.iloc[:,1:8]

In [4]:
Data_X = pd.concat([Data_X, Data_X_NotGripped], axis = 0)
Data_X

Unnamed: 0,Thickness(mm),Weight(g/sqm),SurfaceRoughness(SMD),TensileStrength,Extension,BendingRigidity,Density
0,1.51,516.7,2.29,333.0,49.4,1.32,84
1,0.33,167.8,2.17,176.0,22.25,0.07,173
2,0.96,304.0,1.97,225.0,205.0,0.06,102
3,0.45,248.5,2.31,373.0,36.35,0.12,175
4,0.38,169.6,7.02,173.0,34.35,0.05,137
5,0.93,309.7,4.61,437.0,37.9,0.54,149
6,0.45,144.7,4.81,137.0,274.0,0.01,135
7,1.46,428.1,1.68,530.5,155.0,2.34,93
8,0.9,417.2,1.06,302.5,257.5,0.54,194
9,0.66,232.0,11.38,172.5,22.3,0.13,211


In [5]:
# Pearson 상관계수 상위 3개 선정
# 'Thickness(mm)', 'Weight(g/sqm)', 'BendingRigidity' 선정   'SurfaceRoughness(SMD)', 'TensileStrength', 'Extension', 'Density' 제외
Data_X_Selected = Data_X.copy()
Data_X_Selected.drop(['SurfaceRoughness(SMD)', 'TensileStrength', 'Extension', 'Density'], axis = 'columns', inplace = True)
Data_X_Selected

Unnamed: 0,Thickness(mm),Weight(g/sqm),BendingRigidity
0,1.51,516.7,1.32
1,0.33,167.8,0.07
2,0.96,304.0,0.06
3,0.45,248.5,0.12
4,0.38,169.6,0.05
5,0.93,309.7,0.54
6,0.45,144.7,0.01
7,1.46,428.1,2.34
8,0.9,417.2,0.54
9,0.66,232.0,0.13


In [6]:
XXPearsonSorted = pd.read_csv('./PearsonCoeffResult/XXPearsonSorted.csv')
XYPearsonSorted = pd.read_csv('./PearsonCoeffResult/XYPearsonSorted_Deleted.csv')
YYPearsonSorted = pd.read_csv('./PearsonCoeffResult/YYPearsonSorted_Deleted.csv')

## 차원축소, 데이터시각화

In [7]:
XXPearsonSorted

Unnamed: 0,X factor 1,X factor 2,PearsonCoeff
0,Thickness(mm),Weight(g/sqm),0.911197
1,Weight(g/sqm),BendingRigidity,0.662136
2,Thickness(mm),BendingRigidity,0.648507
3,SurfaceRoughness(SMD),Extension,-0.579931
4,TensileStrength,BendingRigidity,0.528562
5,SurfaceRoughness(SMD),Density,-0.474038
6,Thickness(mm),Density,-0.406743
7,BendingRigidity,Density,-0.375172
8,Weight(g/sqm),TensileStrength,0.371949
9,Weight(g/sqm),Density,-0.295556


# 차원축소, 데이터시각화

## X 인자 t-SNE 계산 및 결과표시

In [8]:
# 선택 안 된 7개의 X 인자

time_start   = time.time()
tsne_X         = TSNE(n_components=2, verbose=1, perplexity=10, n_iter=10000, random_state=1)
tsne_X_results = tsne_X.fit_transform(Data_X)

print( '\n\n t-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start ))

plt.figure(figsize=(8,8))

# 파지된 원단
plt.scatter(tsne_X_results[:-4,0], tsne_X_results[:-4,1], marker='o', label = 'X Factor Gripped', color = 'b')
# 파지되지 않은 원단
plt.scatter(tsne_X_results[-4:,0], tsne_X_results[-4:,1], marker='o', label = 'X Factor Not Gripped', color = 'r')

plt.title('X factors t-SNE Result(2D)', fontsize=15)
plt.grid(alpha=0.5)
plt.legend(fontsize=12)
plt.xlabel('t-SNE_1')
plt.ylabel('t-SNE_2')
plt.show()

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 40 samples in 0.000s...
[t-SNE] Computed neighbors for 40 samples in 0.010s...
[t-SNE] Computed conditional probabilities for sample 40 / 40
[t-SNE] Mean sigma: 24.685926
[t-SNE] KL divergence after 250 iterations with early exaggeration: 56.703617
[t-SNE] KL divergence after 1150 iterations: 0.156411


 t-SNE done! Time elapsed: 0.3068082332611084 seconds


In [9]:
# 선택한 3개의 X 인자

time_start   = time.time()
tsne_X         = TSNE(n_components=2, verbose=1, perplexity=10, n_iter=10000, random_state=1)
tsne_X_Selected_results = tsne_X.fit_transform(Data_X_Selected)

print( '\n\n t-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start ))

plt.figure(figsize=(8,8))

# 파지된 원단
plt.scatter(tsne_X_Selected_results[:-4,0], tsne_X_Selected_results[:-4,1], marker='o', label = 'X Factor Gripped', color = 'b')
# 파지되지 않은 원단
plt.scatter(tsne_X_Selected_results[-4:,0], tsne_X_Selected_results[-4:,1], marker='o', label = 'X Factor Not Gripped', color = 'r')

plt.title('Selected X factors t-SNE Result(2D)', fontsize=15)
plt.grid(alpha=0.5)
plt.legend(fontsize=12)
plt.xlabel('t-SNE_1')
plt.ylabel('t-SNE_2')
plt.show()

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 40 samples in 0.000s...
[t-SNE] Computed neighbors for 40 samples in 0.003s...
[t-SNE] Computed conditional probabilities for sample 40 / 40
[t-SNE] Mean sigma: 30.968420
[t-SNE] KL divergence after 250 iterations with early exaggeration: 53.401714
[t-SNE] KL divergence after 2000 iterations: 0.117265


 t-SNE done! Time elapsed: 0.5006895065307617 seconds


## Y 인자 t-SNE 계산 및 결과표시

In [10]:
# 선택한 3개의 X 인자

time_start   = time.time()
tsne_Y         = TSNE(n_components=2, verbose=1, perplexity=10, n_iter=10000, random_state=1)
tsne_Y_Selected_results = tsne_Y.fit_transform(Data_Y)

print( '\n\n t-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start ))

plt.figure(figsize=(8,8))

plt.scatter(tsne_Y_Selected_results[:,0], tsne_Y_Selected_results[:,1], marker='o', label = 'Y 2D Points', color = 'r')
plt.title('Y factors t-SNE Result(2D)', fontsize=15)
plt.grid(alpha=0.5)
plt.legend(fontsize=12)
plt.xlabel('t-SNE_1')
plt.ylabel('t-SNE_2')
plt.show()

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 36 samples in 0.001s...
[t-SNE] Computed neighbors for 36 samples in 0.002s...
[t-SNE] Computed conditional probabilities for sample 36 / 36
[t-SNE] Mean sigma: 12.789586
[t-SNE] KL divergence after 250 iterations with early exaggeration: 56.856762
[t-SNE] KL divergence after 2550 iterations: 0.146743


 t-SNE done! Time elapsed: 0.6456000804901123 seconds


# K-means Clustering

In [26]:
# 클러스터 개수
Cluster = 3

## X 인자 값 군집화, t-SNE 이후 플로팅

In [61]:
# 전체 데이터로 군집화(K-평균 군집화)
kmeans_X_Raw = KMeans(n_clusters = Cluster).fit(Data_X)
ClusterResult = kmeans_X_Raw.labels_

# 원단 번호로 인덱스 재정의
X_ClothNo = [1,2,3,4,5,6,7,8,9,10,
            11,14,17,18,19,20,
            21,22,23,24,25,26,27,28,29,30,
            31,32,33,34,35,36,37,38,39,40, #파지된것
            12,13,15,16] # 파지안된것
Data_X.index = X_ClothNo

# t-SNE로 차원축소
tsne_X         = TSNE(n_components=2, verbose=1, perplexity=10, n_iter=10000, random_state=1)
tsne_X_results = tsne_Y.fit_transform(Data_X) # 클러스터 열 제외
tsne_X_results = pd.DataFrame(tsne_X_results)
tsne_X_results.index = X_ClothNo

# 클러스터별로 정렬
tsne_X_results['Cluster'] = ClusterResult
tsne_X_results = tsne_X_results.sort_values(by = ['Cluster'], ascending = True)

# tsne_X_results
plt.figure(figsize=(8,8))

for i in range(tsne_X_results.shape[0]):
    if tsne_X_results.iloc[i,2] == 0:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'r') # 0 붉은색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호
    elif tsne_X_results.iloc[i,2] == 1:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'b') # 1 파란색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호
    else:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'g') # 2 초록색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호

plt.title('X factor K-means Clustring & t-SNE')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.grid(alpha = 0.5)
plt.show()

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 40 samples in 0.000s...
[t-SNE] Computed neighbors for 40 samples in 0.002s...
[t-SNE] Computed conditional probabilities for sample 40 / 40
[t-SNE] Mean sigma: 24.685926
[t-SNE] KL divergence after 250 iterations with early exaggeration: 56.703617
[t-SNE] KL divergence after 1150 iterations: 0.156411


파지 안 된 원단은 모두 동일한 군집, 파지된 원단은 다양한 군집 분포

## X 인자 값 군집화, t-SNE 이후 플로팅

In [63]:
# 3개(두께, 무게, 굽힘강성) 데이터로 군집화(K-평균 군집화)
kmeans_X_Raw = KMeans(n_clusters = Cluster).fit(Data_X_Selected)
ClusterResult = kmeans_X_Raw.labels_

# 원단 번호로 인덱스 재정의
X_ClothNo = [1,2,3,4,5,6,7,8,9,10,
            11,14,17,18,19,20,
            21,22,23,24,25,26,27,28,29,30,
            31,32,33,34,35,36,37,38,39,40, #파지된것
            12,13,15,16] # 파지안된것
Data_X_Selected.index = X_ClothNo

# t-SNE로 차원축소
tsne_X         = TSNE(n_components=2, verbose=1, perplexity=10, n_iter=10000, random_state=1)
tsne_X_results = tsne_X.fit_transform(Data_X_Selected) # 클러스터 열 제외
tsne_X_results = pd.DataFrame(tsne_X_results)
tsne_X_results.index = X_ClothNo

# 클러스터별로 정렬
tsne_X_results['Cluster'] = ClusterResult
tsne_X_results = tsne_X_results.sort_values(by = ['Cluster'], ascending = True)

# tsne_X_results
plt.figure(figsize=(8,8))

for i in range(tsne_X_results.shape[0]):
    if tsne_X_results.iloc[i,2] == 0:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'r') # 0 붉은색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호
    elif tsne_X_results.iloc[i,2] == 1:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'b') # 1 파란색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호
    else:
        plt.scatter(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], marker='o', color = 'g') # 2 초록색
        plt.text(tsne_X_results.iloc[i,0], tsne_X_results.iloc[i,1], str(tsne_X_results.index[i]), fontsize=10) # 원단번호

plt.title('X factor(3) K-means Clustring & t-SNE')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.grid(alpha = 0.5)
plt.show()

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 40 samples in 0.000s...
[t-SNE] Computed neighbors for 40 samples in 0.001s...
[t-SNE] Computed conditional probabilities for sample 40 / 40
[t-SNE] Mean sigma: 30.968420
[t-SNE] KL divergence after 250 iterations with early exaggeration: 53.401714
[t-SNE] KL divergence after 2000 iterations: 0.117265


파지 안 된 원단은 모두 동일한 군집, 파지된 원단은 다양한 군집 분포

## Y 인자 값 군집화, t-SNE 이후 플로팅

In [98]:
# 3개(두께, 무게, 굽힘강성) 데이터로 군집화(K-평균 군집화)
kmeans_Y_Raw = KMeans(n_clusters = Cluster).fit(Data_Y)
ClusterResult = kmeans_Y_Raw.labels_

# 원단 번호로 인덱스 재정의
Y_ClothNo = [1,2,3,4,5,6,7,8,9,10,
            11,14,17,18,19,20,
            21,22,23,24,25,26,27,28,29,30,
            31,32,33,34,35,36,37,38,39,40]
Data_Y.index = Y_ClothNo

# t-SNE로 차원축소
tsne_Y         = TSNE(n_components=2, verbose=1, perplexity=9, n_iter=10000, random_state=1)
tsne_Y_results = pd.DataFrame(tsne_Y_results)
tsne_Y_results.index = Y_ClothNo

# 클러스터별로 정렬
tsne_Y_results['Cluster'] = ClusterResult
tsne_Y_results = tsne_Y_results.sort_values(by = ['Cluster'], ascending = True)

# tsne_Y_results
plt.figure(figsize=(8,8))

for i in range(tsne_Y_results.shape[0]):
    if tsne_Y_results.iloc[i,2] == 0:
        plt.scatter(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], marker='o', color = 'r') # 0 붉은색
        plt.text(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], str(tsne_Y_results.index[i]), fontsize=10) # 원단번호
    elif tsne_Y_results.iloc[i,2] == 1:
        plt.scatter(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], marker='o', color = 'b') # 1 파란색
        plt.text(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], str(tsne_Y_results.index[i]), fontsize=10) # 원단번호
    else:
        plt.scatter(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], marker='o', color = 'g') # 2 초록색
        plt.text(tsne_Y_results.iloc[i,0], tsne_Y_results.iloc[i,1], str(tsne_Y_results.index[i]), fontsize=10) # 원단번호

plt.title('Y factor K-means Clustring & t-SNE')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.grid(alpha = 0.5)
plt.show()

[t-SNE] Computing 28 nearest neighbors...
[t-SNE] Indexed 36 samples in 0.001s...
[t-SNE] Computed neighbors for 36 samples in 0.001s...
[t-SNE] Computed conditional probabilities for sample 36 / 36
[t-SNE] Mean sigma: 11.924221
[t-SNE] KL divergence after 250 iterations with early exaggeration: 54.286858
[t-SNE] KL divergence after 1400 iterations: 0.142687


## 전체 데이터 or 선택 데이터 (X,Y) 군집화, tSNE 이후 플로팅

In [104]:
# 전체데이터로 군집화(K-평균 군집화)
GripTestDataSelected = GripTestData.iloc[:,1:]
kmeans_XY_Raw = KMeans(n_clusters = Cluster).fit(GripTestDataSelected)

# 선택데이터로 군집화(K-평균 군집화)
# GripTestDataSelected = GripTestData[{'Thickness(mm)', 'Weight(g/sqm)', 'BendingRigidity', 't_max (mm)', 't_min (mm)', '(t_max - t_min) percentage for Thickness'}]
# kmeans_XY_Raw = KMeans(n_clusters = Cluster).fit(GripTestDataSelected)

ClusterResult = kmeans_XY_Raw.labels_

# 원단 번호로 인덱스 재정의
XY_ClothNo = [1,2,3,4,5,6,7,8,9,10,
            11,14,17,18,19,20,
            21,22,23,24,25,26,27,28,29,30,
            31,32,33,34,35,36,37,38,39,40]
GripTestDataSelected.index = XY_ClothNo

# t-SNE로 차원축소
tsne_XY         = TSNE(n_components=2, verbose=1, perplexity=9, n_iter=10000, random_state=1)
tsne_XY_results = tsne_XY.fit_transform(GripTestDataSelected) # 클러스터 열 제외
tsne_XY_results = pd.DataFrame(tsne_XY_results)
tsne_XY_results.index = XY_ClothNo

# 클러스터별로 정렬
tsne_XY_results['Cluster'] = ClusterResult
tsne_XY_results = tsne_XY_results.sort_values(by = ['Cluster'], ascending = True)

# tsne_XY_results
plt.figure(figsize=(8,8))

for i in range(tsne_XY_results.shape[0]):
    if tsne_XY_results.iloc[i,2] == 0:
        plt.scatter(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], marker='o', color = 'r') # 0 붉은색
        plt.text(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], str(tsne_XY_results.index[i]), fontsize=10) # 원단번호
    elif tsne_XY_results.iloc[i,2] == 1:
        plt.scatter(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], marker='o', color = 'b') # 1 파란색
        plt.text(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], str(tsne_XY_results.index[i]), fontsize=10) # 원단번호
    else:
        plt.scatter(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], marker='o', color = 'g') # 2 초록색
        plt.text(tsne_XY_results.iloc[i,0], tsne_XY_results.iloc[i,1], str(tsne_XY_results.index[i]), fontsize=10) # 원단번호

plt.title('X, Y factor K-means Clustring & t-SNE')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.grid(alpha = 0.5)
plt.show()

[t-SNE] Computing 28 nearest neighbors...
[t-SNE] Indexed 36 samples in 0.000s...
[t-SNE] Computed neighbors for 36 samples in 0.001s...
[t-SNE] Computed conditional probabilities for sample 36 / 36
[t-SNE] Mean sigma: 44.390663
[t-SNE] KL divergence after 250 iterations with early exaggeration: 58.671394
[t-SNE] KL divergence after 1500 iterations: 0.145702


In [100]:
tsne_X_results

Unnamed: 0,0,1,Cluster
24,-29.649847,88.495041,0
23,-70.188675,111.346123,0
27,-21.581863,26.586491,0
28,4.715986,9.970119,0
20,-43.758804,153.049911,0
31,30.994375,-79.090248,0
18,31.236324,-53.281509,0
14,-29.902142,60.463398,0
26,-4.024683,44.905922,0
11,-7.546263,81.449112,0
