## 라이브러리 import

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as sp
import pywt

## 데이터 크기 정의하기

In [2]:
NoOfData    = 180  # 정상/고장 데이터 각 180개씩 
NoOfSensor  = 3    # 가속도(Acceleration), 전압(Voltage), 전류(Current)
NoOfFeature = 10   # 특징 개수:10개 (순서: Max, Min, Mean, RMS, Variance, Skewness, Kurtosis, Crest factor, Shape factor, Impulse factor)

NoOfData, NoOfSensor, NoOfFeature

(180, 3, 10)

## Time Domain 특징값 추출 (10 features * 3 sensors = 30개씩)

In [3]:
def rms(x): # RMS 함수 정의
    return np.sqrt(np.mean(x**2))

In [4]:
# 특징데이터 크기 지정
TimeFeature_Normal   = np.zeros((NoOfData, NoOfSensor*NoOfFeature))
TimeFeature_Abnormal = np.zeros((NoOfData, NoOfSensor*NoOfFeature))

for i in range(NoOfData):
    
    # 데이터 불러오기
    temp_path1 = './SpotWeldingData/Normal_%d.csv'%(i+1)   # Normal 데이터 파일 경로
    temp_path2 = './SpotWeldingData/Abnormal_%d.csv'%(i+1) # Abnormal 데이터 파일 경로
    temp_data1 = pd.read_csv(temp_path1 , sep=',')         # 임시 Normal 데이터
    temp_data2 = pd.read_csv(temp_path2 , sep=',')         # 임시 Abnormal 데이터
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Normal Time Domain Feature
        TimeFeature_Normal[i, 10*j+0] = np.max(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+1] = np.min(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+2] = np.mean(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+3] = rms(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+4] = np.var(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+5] = sp.skew(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+6] = sp.kurtosis(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+7] = np.max(temp_data1.iloc[:,j])/rms(temp_data1.iloc[:,j])
        TimeFeature_Normal[i, 10*j+8] = rms(temp_data1.iloc[:,j])/np.mean(np.abs(temp_data1.iloc[:,j]))
        TimeFeature_Normal[i, 10*j+9] = np.max(temp_data1.iloc[:,j])/np.mean(np.abs(temp_data1.iloc[:,j]))
        
        # Abnormal Time Domain Feature
        TimeFeature_Abnormal[i, 10*j+0] = np.max(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+1] = np.min(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+2] = np.mean(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+3] = rms(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+4] = np.var(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+5] = sp.skew(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+6] = sp.kurtosis(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+7] = np.max(temp_data2.iloc[:,j])/rms(temp_data2.iloc[:,j])
        TimeFeature_Abnormal[i, 10*j+8] = rms(temp_data2.iloc[:,j])/np.mean(np.abs(temp_data2.iloc[:,j]))
        TimeFeature_Abnormal[i, 10*j+9] = np.max(temp_data2.iloc[:,j])/np.mean(np.abs(temp_data2.iloc[:,j]))
        
print(TimeFeature_Normal.shape)
print(TimeFeature_Abnormal.shape)

(180, 30)
(180, 30)


Normal, Abnormal 데이터 합치기  

In [5]:
TimeFeature = np.concatenate([TimeFeature_Normal, TimeFeature_Abnormal] , axis=0)
TimeFeature.shape

(360, 30)

.

.

.



## Frequency Domain 특징값 추출 (10 features \* 8 wavelet levels * 3 sensors = 240개씩)

In [6]:
# Wavelet options

MotherWavelet = pywt.Wavelet('haar')   # Mother wavelet (모함수) 지정
Level   = 8                            # Wavelet 분해 레벨 지정
select  = 8                            # 특징추출 영역 고주파 영역부터 개수 지정 (d1~)

In [7]:
#Frequency Domain 특징값 추출 (Wavelet Transform 기반)
FreqFeature_Normal   = np.zeros(shape=(NoOfData, NoOfSensor*NoOfFeature*select))
FreqFeature_Abnormal = np.zeros(shape=(NoOfData, NoOfSensor*NoOfFeature*select))

for i in range(NoOfData):
    
    # 데이터 불러오기
    temp_path1 = './SpotWeldingData/Normal_%d.csv'%(i+1)    # Normal 데이터 파일 경로
    temp_path2 = './SpotWeldingData/Abnormal_%d.csv'%(i+1)  # Abnormal 데이터 파일 경로
    temp_data1 = np.array(pd.read_csv(temp_path1 , sep=',')) # 임시 Normal 데이터
    temp_data2 = np.array(pd.read_csv(temp_path2 , sep=',')) # 임시 Abnormal 데이터
    Coef1      = pywt.wavedec(temp_data1, MotherWavelet, level=Level, axis=0)
    Coef2      = pywt.wavedec(temp_data2, MotherWavelet, level=Level, axis=0)
    
    # Frequency Domain 특징값 추출
    for j in range(NoOfSensor):
        
        for k in range(select):
            coef1 = Coef1[Level-k]
            coef2 = Coef2[Level-k]
            
            # Normal Frequency Domain Feature
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+0] = np.max(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+1] = np.min(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+2] = np.mean(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+3] = rms(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+4] = np.var(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+5] = sp.skew(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+6] = sp.kurtosis(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+7] = np.max(coef1[:,j])/rms(coef1[:,j])
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+8] = rms(coef1[:,j])/np.mean(np.abs(coef1[:,j]))
            FreqFeature_Normal[i, NoOfFeature*j*select+k*NoOfFeature+9] = np.max(coef1[:,j])/np.mean(np.abs(coef1[:,j]))
            
            # Abnormal Frequency Domain Feature
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+0] = np.max(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+1] = np.min(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+2] = np.mean(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+3] = rms(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+4] = np.var(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+5] = sp.skew(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+6] = sp.kurtosis(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+7] = np.max(coef2[:,j])/rms(coef2[:,j])
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+8] = rms(coef2[:,j])/np.mean(np.abs(coef2[:,j]))
            FreqFeature_Abnormal[i, NoOfFeature*j*select+k*NoOfFeature+9] = np.max(coef2[:,j])/np.mean(np.abs(coef2[:,j]))

print(FreqFeature_Normal.shape)
print(FreqFeature_Abnormal.shape)

(180, 240)
(180, 240)


Normal, Abnormal 특징값 합치기      

In [8]:
FreqFeature = np.concatenate([FreqFeature_Normal, FreqFeature_Abnormal] , axis=0)
FreqFeature.shape

(360, 240)

.

.

.



## Feature 통합 (Time domain 30개 + Freq. domain 240 = 270개)

In [9]:
Features = np.concatenate([TimeFeature, FreqFeature] , axis=1)

print("Feature Data Size :", Features.shape)
print("= 데이터 %d개(정상/고장 각 %d개씩)가 각각 %d개의 특징값으로 구성됨"%(Features.shape[0], Features.shape[0]/2, Features.shape[1]))

Feature Data Size : (360, 270)
= 데이터 360개(정상/고장 각 180개씩)가 각각 270개의 특징값으로 구성됨


저장에 용이한 Data frame 형식으로 변환

In [10]:
FeatureData = pd.DataFrame(Features)
FeatureData

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,260,261,262,263,264,265,266,267,268,269
0,1.35100,-1.37200,0.011083,0.426105,0.181443,-0.190394,0.075070,3.170579,1.308556,4.148881,...,27.413055,-26.156209,1.339847,19.972107,397.089869,-0.069441,-1.500603,1.372567,1.136464,1.559874
1,31.66100,-22.78600,0.023339,2.312749,5.348262,0.733160,56.416904,13.689771,2.988680,40.914350,...,27.780724,-25.610594,1.594287,19.976598,396.522728,-0.095775,-1.513689,1.390663,1.123389,1.562256
2,31.83200,-23.61300,0.020506,2.313820,5.353342,0.809841,59.990727,13.757337,3.048930,41.945157,...,27.199963,-25.957053,1.403277,19.978553,397.173388,-0.094919,-1.520788,1.361458,1.128936,1.536999
3,1.41830,-1.08560,0.027215,0.396240,0.156266,-0.222429,-0.090202,3.579394,1.306388,4.676077,...,27.310880,-25.913666,1.279570,19.968877,397.118765,-0.098230,-1.502072,1.367672,1.125794,1.539718
4,1.05340,-1.05750,0.016574,0.388252,0.150465,-0.279663,-0.094792,2.713186,1.322550,3.588324,...,27.444745,-25.652639,1.124162,20.116104,403.393882,-0.091625,-1.514240,1.364317,1.126631,1.537082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,0.74749,-0.78229,0.037731,0.338766,0.113339,-0.331700,-0.224237,2.206506,1.300623,2.869832,...,27.606268,-26.712706,0.781096,20.229283,408.613777,-0.115580,-1.518307,1.364669,1.125013,1.535270
356,0.71832,-0.51380,0.036942,0.265980,0.069381,0.015054,-0.190223,2.700652,1.310460,3.539097,...,27.480376,-26.910771,0.740953,20.348248,413.502184,-0.106127,-1.515108,1.350503,1.128016,1.523390
357,0.84546,-0.65627,0.035927,0.321351,0.101976,0.066285,-0.316172,2.630953,1.307231,3.439263,...,26.793667,-27.107096,0.580973,20.351617,413.850805,-0.117642,-1.526302,1.316537,1.127961,1.485003
358,0.84845,-0.74789,0.035836,0.323284,0.103229,0.035263,-0.277617,2.624470,1.306921,3.429974,...,27.405049,-24.952029,1.216872,19.921322,395.378279,-0.080779,-1.508744,1.375664,1.127325,1.550821


추출된 특징 데이터 저장 (.csv 파일)

In [11]:
path = './ProcessedData/FeatureData.csv'   # path = '파일 경로/저장할 파일 이름'
FeatureData.to_csv(path, sep=',', header=None , index=None)