# 2020년도 2학기 '기계공학도를 위한 인공지능입문' 데이터 챌런지!

#### - 데이터 챌런지 목표 
: 많은 양의 데이터(빅데이터)에서 필요한 정보만 추출(인덱싱)하고, 원하는 형태로 가공할 수 있는 데이터 핸들링 수행능력 평가


#### - 데이터 출처 : PHM DATA CHALLENGE 2018 (일부 데이터 편집)
### - 데이터 설명자료 :
(https://www.phmsociety.org/sites/phmsociety.org/files/PHM%20Data%20Challenge%202018%20vFinal%20v2_0.pdf)


#### - 가이드에 따라 총 4단계의 데이터 챌런지를 수행하며, 각 단계의 결과값(.csv 데이터 파일)과 최종 코드파일(.ipynb)을 1개 압축파일(.zip)로 제출


### 라이브러리(패키지) import

In [3]:
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import scipy.stats       as sp
import seaborn           as sb

# 데이터 불러오기

### '공정 데이터'와 '고장기록 데이터' 불러오기

In [4]:
for i in range(5):
    s1= "M01_ProcessData_%d = pd.read_csv('DC_Data/M01_DC_train_%d.csv')"%(i+1,i+1) # 'DC_Data' 폴더가 코드와 동일경로에 있는 경우
    s2= "M02_ProcessData_%d = pd.read_csv('DC_Data/M02_DC_train_%d.csv')"%(i+1,i+1)
    exec(s1)
    exec(s2)
    
    s3 = "M01_FaultRecord_%d = pd.read_csv('DC_Data/Fault_ReferenceData/M01_train_fault_data_%d.csv')"%(i+1,i+1)
    s4 = "M02_FaultRecord_%d = pd.read_csv('DC_Data/Fault_ReferenceData/M02_train_fault_data_%d.csv')"%(i+1,i+1)
    exec(s3)
    exec(s4)

### 공정 데이터 살펴보기


In [5]:
M01_ProcessData_1

Unnamed: 0,time,IONGAUGEPRESSURE,ETCHBEAMVOLTAGE,ETCHBEAMCURRENT,ETCHSUPPRESSORVOLTAGE,ETCHSUPPRESSORCURRENT,FLOWCOOLFLOWRATE,FLOWCOOLPRESSURE,ETCHGASCHANNEL1READBACK,ETCHPBNGASREADBACK,ACTUALROTATIONANGLE,ACTUALSTEPDURATION
0,3283106,0.745235,1.085043,-1.151842,0.496681,-1.086037,-1.444695,-0.219349,0.744638,0.727147,-0.152495,-0.200587
1,3283110,-0.037024,-0.932286,-1.152112,-0.928112,-1.145790,-1.444695,-0.547157,-1.312143,-1.405385,-0.152495,-0.200587
2,3283114,-0.507875,-1.106610,-1.151909,-1.124562,-1.145790,-1.444695,-0.638781,-1.312143,-1.405385,-0.152495,-0.200587
3,3283118,-0.793893,-1.128402,-1.151977,-1.174503,-1.145790,-1.444695,-0.682351,-1.312143,-1.405385,-0.152495,-0.200587
4,3283122,-0.977717,-1.128402,-1.151706,-1.184062,-1.145790,-1.444695,-0.737326,-1.312143,-1.405385,-0.152495,-0.200587
...,...,...,...,...,...,...,...,...,...,...,...,...
3124699,43037320,-1.339439,-1.128402,-1.152112,-1.188503,-1.145790,-1.434879,-0.869670,-1.296143,-1.269834,-0.152501,-0.200587
3124700,43037324,-1.313654,-1.128402,-1.151706,-1.188503,-1.145790,-1.434879,-1.093230,-1.296143,-1.269834,-0.152495,-0.200587
3124701,43037328,-1.337850,-1.128402,-1.151909,-1.188503,-1.145790,-1.434879,-1.144132,-1.296143,-1.269834,-0.152495,-0.200587
3124702,43037332,-1.341035,-1.128402,-1.151977,-1.188503,-1.145790,-1.434879,-1.181188,-1.296143,-1.269834,-0.152495,-0.200587


### 고장기록 데이터 살펴보기


In [6]:
M02_FaultRecord_5

Unnamed: 0,time,fault_name,Tool
0,3574572,Flowcool Pressure Too High Check Flowcool Pump,05M02
1,5145048,FlowCool Pressure Dropped Below Limit,05M02
2,5145048,FlowCool Pressure Dropped Below Limit,05M02
3,9741392,Flowcool leak,05M02
4,14328602,Flowcool Pressure Too High Check Flowcool Pump,05M02
5,14666470,FlowCool Pressure Dropped Below Limit,05M02
6,16746750,Flowcool leak,05M02
7,16748980,Flowcool leak,05M02
8,16750714,Flowcool leak,05M02
9,16756186,Flowcool leak,05M02


.

.

.



# [1단계] 공정 데이터 & 고장기록 데이터 인덱싱 (4점)

### * Tip : 데이터 내에서 특정 인덱스(Index)에 해당하는 데이터만 추출하는 방법

예시데이터 (Data frame) 생성

In [227]:
ExampleData = pd.DataFrame({"First Name":['Lee', 'Lee', 'Lee', 'Kim', 'Park'] ,
                     "Birth Year":[2013 , 2014 , 2015 , 2016 , 2015  ] ,
                     "Score":[1.5  , 1.7  , 3.6  , 2.4  , 2.9   ]})
ExampleData

Unnamed: 0,First Name,Birth Year,Score
0,Lee,2013,1.5
1,Lee,2014,1.7
2,Lee,2015,3.6
3,Kim,2016,2.4
4,Park,2015,2.9


First Name이 'Lee'에 해당하는 데이터만 추출 (행 인덱싱)

In [228]:
ExampleData_Lee = ExampleData[  ExampleData['First Name']== 'Lee' ]
ExampleData_Lee

Unnamed: 0,First Name,Birth Year,Score
0,Lee,2013,1.5
1,Lee,2014,1.7
2,Lee,2015,3.6


First Name과 Score에 데이터만 추출 (열 인덱싱)

In [229]:
ExampleData_NameAndScore = ExampleData[['First Name', 'Score']]
ExampleData_NameAndScore

Unnamed: 0,First Name,Score
0,Lee,1.5
1,Lee,1.7
2,Lee,3.6
3,Kim,2.4
4,Park,2.9


.

.

.



## 공정 데이터 인덱싱

첫번째 열 (time) + 상단 데이터 설명자료 링크 (PDF파일) 내 데이터 설명표 참조하여 Type이 'Numeric(Sensor)'인 열만 추출 (총 6열)

#### [필수] 인덱싱 후 데이터  이름(변수명) : 
- M01_SensorData_1 ~ M01_SensorData_5

- M02_SensorData_1 ~ M02_SensorData_5

In [7]:
for i in range(5):
    s1 = "M01_SensorData_%d = M01_ProcessData_%d[['time', 'IONGAUGEPRESSURE', 'ETCHSUPPRESSORCURRENT', 'FLOWCOOLPRESSURE', 'ACTUALROTATIONANGLE', 'ACTUALSTEPDURATION']]"%(i+1,i+1)
    s2 = "M02_SensorData_%d = M02_ProcessData_%d[['time', 'IONGAUGEPRESSURE', 'ETCHSUPPRESSORCURRENT', 'FLOWCOOLPRESSURE', 'ACTUALROTATIONANGLE', 'ACTUALSTEPDURATION']]"%(i+1,i+1)
    exec(s1)
    exec(s2)

M01_SensorData_5

Unnamed: 0,time,IONGAUGEPRESSURE,ETCHSUPPRESSORCURRENT,FLOWCOOLPRESSURE,ACTUALROTATIONANGLE,ACTUALSTEPDURATION
0,3280856,-0.567390,-1.514782,-0.003223,-0.254317,-0.546321
1,3280860,-1.012916,-1.514782,-0.005451,-0.254317,-0.546321
2,3280864,-1.222574,-1.514782,-0.007677,-0.254317,-0.546321
3,3280868,-1.380129,-1.514782,-0.009906,-0.254317,-0.546321
4,3280872,-1.476403,-1.514782,-0.012134,-0.254317,-0.546321
...,...,...,...,...,...,...
4779549,42635566,0.602250,0.994923,0.337608,-0.254317,1.481146
4779550,42635570,0.602250,0.989938,0.337608,-0.254317,1.481146
4779551,42635574,0.602250,0.984204,0.337608,-0.254317,1.481146
4779552,42635578,0.602250,0.990688,0.339852,-0.254317,1.481146


.

.

.



## 고장기록 데이터 인덱싱

고장모드 1,2 (Fault1, 2)에 해당하는 행만 추출
* Fault 1 : FlowCool Pressure Dropped Below Limit
* Fault 2 : Flowcool Pressure Too High Check Flowcool Pump

#### [필수] 인덱싱 후 데이터 이름(변수명) : 
- M01_Fault1Record_1 ~ M01_Fault1Record_5

- M01_Fault2Record_1 ~ M01_Fault2Record_5

- M02_Fault1Record_1 ~ M02_Fault1Record_5

- M02_Fault2Record_1 ~ M02_Fault2Record_5

In [8]:
for i in range(5):
    s1 = "M01_Fault1Record_%d = M01_FaultRecord_%d[  M01_FaultRecord_%d['fault_name']== 'FlowCool Pressure Dropped Below Limit']"%(i+1,i+1,i+1)
    s2 = "M01_Fault2Record_%d = M01_FaultRecord_%d[  M01_FaultRecord_%d['fault_name']== 'Flowcool Pressure Too High Check Flowcool Pump']"%(i+1,i+1,i+1)
    s3 = "M02_Fault1Record_%d = M02_FaultRecord_%d[  M02_FaultRecord_%d['fault_name']== 'FlowCool Pressure Dropped Below Limit']"%(i+1,i+1,i+1)
    s4 = "M02_Fault2Record_%d = M02_FaultRecord_%d[  M02_FaultRecord_%d['fault_name']== 'Flowcool Pressure Too High Check Flowcool Pump']"%(i+1,i+1,i+1)
    exec(s1)
    exec(s2)
    exec(s3)
    exec(s4)


.

.

.



### 1단계 결과물 체출용 데이터 파일로 저장 (수강생 번호 외 코드수정X)

- 위에서 데이터 이름(변수명) 가이드에 맞게 지정됐는지 재확인 요망

In [9]:
StudentNo = 51   # 수강생 번호 입력

Path1 = './Result/ST%d_DC1_1'%StudentNo
Path2 = './Result/ST%d_DC1_2'%StudentNo
Path3 = './Result/ST%d_DC1_3'%StudentNo

M01_SensorData_3.to_csv(  Path1 , sep=',' , header=None , index=None)
M01_Fault1Record_1.to_csv(Path2 , sep=',' , header=None , index=None)
M02_Fault2Record_5.to_csv(Path3 , sep=',' , header=None , index=None)

.

.

.



# [2단계] 고장모드 별 데이터 인덱싱 (10점)

### * Tip : 고장기록 데이터의 시간정보를 이용해 공정 데이터 인덱싱하는 방법

예시데이터 이용해 고장기록 데이터의 첫번째 고장확인 시점의 직전에 해당하는 센서데이터의 행 정보(EndPoint) 탐색

In [10]:
Example_SensorData  = pd.read_csv('./ExampleData/Example_SensorData', header=None)
Example_FaultRecord = pd.read_csv('./ExampleData/Example_FaultRecord',header=None)

In [11]:
Example_SensorData

Unnamed: 0,0,1,2,3,4,5
0,3286308,-1.456413,-1.279979,-1.953704,-0.207056,2.520568
1,3286312,-1.456232,-1.276657,-1.953704,-0.207056,2.520568
2,3286316,-1.456138,-1.277026,-1.953704,-0.207056,2.520568
3,3286320,-1.456166,-1.277026,-1.953704,-0.207056,2.520568
4,3286324,-1.456554,-1.276287,-1.953704,-0.207056,2.520568
...,...,...,...,...,...,...
1907097,43254146,0.122507,0.005141,0.617337,3.883366,2.520568
1907098,43254148,0.122507,0.007360,0.617337,3.883366,2.520568
1907099,43254154,0.122507,0.008832,0.617337,3.883366,2.520568
1907100,43254158,0.122507,0.010681,0.617337,3.883366,2.520568


In [12]:
Example_FaultRecord

Unnamed: 0,0,1,2
0,5145048,FlowCool Pressure Dropped Below Limit,05M02
1,5145048,FlowCool Pressure Dropped Below Limit,05M02
2,14666470,FlowCool Pressure Dropped Below Limit,05M02
3,26050762,FlowCool Pressure Dropped Below Limit,05M02
4,26067786,FlowCool Pressure Dropped Below Limit,05M02
5,26080144,FlowCool Pressure Dropped Below Limit,05M02


In [13]:
EndPoint = np.where(Example_SensorData.iloc[:,0].values <= Example_FaultRecord.iloc[0,0])[0][-1]
EndPoint

112939

In [14]:
Example_SensorData.iloc[EndPoint-999:EndPoint+1,:]

Unnamed: 0,0,1,2,3,4,5
111940,5137836,0.427373,1.348597,0.498501,-0.207056,-1.110815
111941,5137840,0.427373,1.327549,0.494628,-0.207056,-1.110815
111942,5137844,0.427373,1.353027,0.498501,-0.207056,-1.110815
111943,5137848,0.427373,1.326810,0.502354,-0.207056,-1.110815
111944,5137852,0.427373,1.327549,0.500417,-0.207056,-1.110815
...,...,...,...,...,...,...
112935,5142706,-1.423075,-1.280717,-1.951134,-0.207056,-0.671480
112936,5142710,-1.426623,-1.277026,-1.952419,-0.207056,-0.671480
112937,5142714,-1.428260,-1.277395,-1.953062,-0.207056,-0.671480
112938,5142718,-1.428408,-1.277395,-1.953704,-0.207056,-0.671480


.

.

.



### M01의 Fault 1에 해당하는 구간 (EndPoint-1000 ~ EndPoint) 데이터 추출
- 아래 가이드에 따르지 않고 더 좋은 방법으로 코드 작성해도 결과만 맞으면 무관

#### [필수] 추출 후 데이터 이름(변수명) : 
- (M01_Fault1 센서데이터 추출) M01_Fault1_1 ~ M01_Fault1_n
- (M01_Fault2 센서데이터 추출) M01_Fault2_1 ~ M01_Fault2_n
- (M02_Fault1 센서데이터 추출) M02_Fault1_1 ~ M02_Fault1_n
- (M02_Fault2 센서데이터 추출) M02_Fault2_1 ~ M02_Fault2_n

In [27]:
M01_Fault1Record_1

Unnamed: 0,time,fault_name,Tool
0,6533184,FlowCool Pressure Dropped Below Limit,01M01
1,8286500,FlowCool Pressure Dropped Below Limit,01M01
2,8627086,FlowCool Pressure Dropped Below Limit,01M01
3,8697340,FlowCool Pressure Dropped Below Limit,01M01
4,8971484,FlowCool Pressure Dropped Below Limit,01M01
6,12828064,FlowCool Pressure Dropped Below Limit,01M01
7,14026692,FlowCool Pressure Dropped Below Limit,01M01
8,15827488,FlowCool Pressure Dropped Below Limit,01M01
12,31212736,FlowCool Pressure Dropped Below Limit,01M01
13,31229684,FlowCool Pressure Dropped Below Limit,01M01


In [29]:
M01_EndPoint_1 = np.where(M01_SensorData_1.iloc[:,0].values <= M01_Fault1Record_1.iloc[0,0])[0][-1]
M01_Fault1_1 = M01_SensorData_1.iloc[M01_EndPoint_1-999:M01_EndPoint_1+1,:]

M01_SensorData_1.iloc[M01_EndPoint_1,0]

6533184

M1 f1

In [20]:
NoOfEndpoint_pre = 0
    
for j in range(5):
    a1 = "NoOfEndpoint = M01_Fault1Record_%d.shape[0]"%(j+1)
    a2 = "FaultRecord = M01_Fault1Record_%d"%(j+1)
    a3 = "SensorData = M01_SensorData_%d"%(j+1)
    exec(a1)
    exec(a2)
    exec(a3)
    NoOfEndpoint_af = NoOfEndpoint_pre
    for i in range(NoOfEndpoint):
        s1 = "M01_EndPoint_%d = np.where(SensorData.iloc[:,0].values <= FaultRecord.iloc[%d,0])[0][-1]"%(i+1,i)
        s2 = "M01_Fault1_%d = SensorData.iloc[M01_EndPoint_%d-999:M01_EndPoint_%d+1,:]"%(i+1+NoOfEndpoint_af,i+1,i+1)
        exec(s1)
        exec(s2)
        NoOfEndpoint_pre = NoOfEndpoint_pre +1
NoOfData_M01_Fault1 = NoOfEndpoint_pre

M01_Fault1_1

Unnamed: 0,time,IONGAUGEPRESSURE,ETCHSUPPRESSORCURRENT,FLOWCOOLPRESSURE,ACTUALROTATIONANGLE,ACTUALSTEPDURATION
172175,6525172,0.764834,1.275832,0.343420,-0.152484,-0.739683
172176,6525176,0.764834,1.267625,0.343420,-0.152484,-0.739683
172177,6525180,0.764834,1.259088,0.343420,-0.152484,-0.739683
172178,6525184,0.764834,1.259746,0.343420,-0.152484,-0.739683
172179,6525188,0.764834,1.265329,0.343420,-0.152484,-0.739683
...,...,...,...,...,...,...
173170,6533168,0.910440,1.293234,0.350345,-0.152495,-0.254497
173171,6533172,0.910440,1.350688,0.350345,-0.152495,-0.254497
173172,6533176,0.910440,1.368419,0.350345,-0.152495,-0.254497
173173,6533180,0.910440,1.373996,0.350345,-0.152495,-0.254497


M1 f2

In [260]:
NoOfEndpoint_pre = 0
    
for j in range(5):
    a1 = "NoOfEndpoint = M01_Fault2Record_%d.shape[0]"%(j+1)
    a2 = "FaultRecord = M01_Fault2Record_%d"%(j+1)
    a3 = "SensorData = M01_SensorData_%d"%(j+1)
    exec(a1)
    exec(a2)
    exec(a3)
    NoOfEndpoint_af = NoOfEndpoint_pre
    for i in range(NoOfEndpoint):
        s1 = "M01_EndPoint_%d = np.where(SensorData.iloc[:,0].values <= FaultRecord.iloc[%d,0])[0][-1]"%(i+1,i)
        s2 = "M01_Fault2_%d = SensorData.iloc[M01_EndPoint_%d-999:M01_EndPoint_%d+1,:]"%(i+1+NoOfEndpoint_af,i+1,i+1)
        exec(s1)
        exec(s2)
        NoOfEndpoint_pre = NoOfEndpoint_pre +1
NoOfData_M01_Fault2 = NoOfEndpoint_pre

NoOfData_M01_Fault2

73

M2 f1

In [261]:
NoOfEndpoint_pre = 0
    
for j in range(5):
    a1 = "NoOfEndpoint = M02_Fault1Record_%d.shape[0]"%(j+1)
    a2 = "FaultRecord = M02_Fault1Record_%d"%(j+1)
    a3 = "SensorData = M02_SensorData_%d"%(j+1)
    exec(a1)
    exec(a2)
    exec(a3)
    NoOfEndpoint_af = NoOfEndpoint_pre
    for i in range(NoOfEndpoint):
        s1 = "M02_EndPoint1_%d = np.where(SensorData.iloc[:,0].values <= FaultRecord.iloc[%d,0])[0][-1]"%(i+1,i)
        s2 = "M02_Fault1_%d = SensorData.iloc[M02_EndPoint1_%d-999:M02_EndPoint1_%d+1,:]"%(i+1+NoOfEndpoint_af,i+1,i+1)
        exec(s1)
        exec(s2)
        NoOfEndpoint_pre = NoOfEndpoint_pre +1
NoOfData_M02_Fault1 = NoOfEndpoint_pre

NoOfData_M02_Fault1

393

M2 f2

In [262]:
NoOfEndpoint_pre = 0
    
for j in range(5):
    a1 = "NoOfEndpoint = M02_Fault2Record_%d.shape[0]"%(j+1)
    a2 = "FaultRecord = M02_Fault2Record_%d"%(j+1)
    a3 = "SensorData = M02_SensorData_%d"%(j+1)
    exec(a1)
    exec(a2)
    exec(a3)
    NoOfEndpoint_af = NoOfEndpoint_pre
    for i in range(NoOfEndpoint):
        s1 = "M02_EndPoint_%d = np.where(SensorData.iloc[:,0].values <= FaultRecord.iloc[%d,0])[0][-1]"%(i+1,i)
        s2 = "M02_Fault2_%d = SensorData.iloc[M02_EndPoint_%d-999:M02_EndPoint_%d+1,:]"%(i+1+NoOfEndpoint_af,i+1,i+1)
        exec(s1)
        exec(s2)
        NoOfEndpoint_pre = NoOfEndpoint_pre +1
NoOfData_M02_Fault2 = NoOfEndpoint_pre

NoOfData_M02_Fault2

140

M01의 Fault2, M02의 Fault1, M02의 Fault2에 대해서도 동일하게 수행

.

.

.



### 2단계 결과물 체출용 데이터 파일로 저장 (수강생 번호 외 코드수정X)

- 위에서 데이터 이름(변수명) 가이드에 맞게 지정됐는지 재확인 요망

In [242]:
# StudentNo = 0   # 수강생 번호 입력

Path1 = './Result/ST%d_DC2_1'%StudentNo
Path2 = './Result/ST%d_DC2_2'%StudentNo
Path3 = './Result/ST%d_DC2_3'%StudentNo
Path4 = './Result/ST%d_DC2_4'%StudentNo

M01_Fault1_88.to_csv(  Path1 , sep=',' , header=None , index=None)
M01_Fault2_73.to_csv(  Path2 , sep=',' , header=None , index=None)
M02_Fault1_393.to_csv( Path3 , sep=',' , header=None , index=None)
M02_Fault2_140.to_csv( Path4 , sep=',' , header=None , index=None)

.

.

.



# [3단계] 데이터 특징 추출 (4점)

2단계에서 추출한 데이터들에 대하여 각 센서(열) 별로 실습에 사용된 Time Domain 10개 특징값 추출 (특징 순서 동일하게!)

In [243]:
def rms(x): # RMS 함수 정의
    return np.sqrt(np.mean(x**2))


#### [필수] 추출 후 특징데이터 (총 4개) 이름(변수명) : 
- (M01_Fault1 특징데이터) FeatureData_M01_Fault1
- (M01_Fault2 특징데이터) FeatureData_M01_Fault2
- (M02_Fault1 특징데이터) FeatureData_M02_Fault1
- (M02_Fault2 특징데이터) FeatureData_M02_Fault2

M1 f1

In [270]:
NoOfSensor  = 5
NoOfFeature = 10
NoOfData    = NoOfData_M01_Fault1

# 특징데이터 크기 지정
FeatureData_M01_Fault1   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

for i in range(NoOfData):
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Time Domain Feature
        s1 = "FeatureData_M01_Fault1[10*j+0, i] = np.max(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s2 = "FeatureData_M01_Fault1[10*j+1, i] = np.min(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s3 = "FeatureData_M01_Fault1[10*j+2, i] = np.mean(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s4 = "FeatureData_M01_Fault1[10*j+3, i] = rms(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s5 = "FeatureData_M01_Fault1[10*j+4, i] = np.var(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s6 = "FeatureData_M01_Fault1[10*j+5, i] = sp.skew(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s7 = "FeatureData_M01_Fault1[10*j+6, i] = sp.kurtosis(M01_Fault1_%d.iloc[:,j+1])"%(i+1)
        s8 = "FeatureData_M01_Fault1[10*j+7, i] = np.max(M01_Fault1_%d.iloc[:,j+1])/rms(M01_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        s9 = "FeatureData_M01_Fault1[10*j+8, i] = rms(M01_Fault1_%d.iloc[:,j+1])/np.mean(M01_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        s10 = "FeatureData_M01_Fault1[10*j+9, i] = np.max(M01_Fault1_%d.iloc[:,j+1])/np.mean(M01_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        exec(s1)
        exec(s2)
        exec(s3)
        exec(s4)
        exec(s5)
        exec(s6)
        exec(s7)
        exec(s8)
        exec(s9)
        exec(s10)

M1 f2

In [271]:
NoOfSensor  = 5
NoOfFeature = 10
NoOfData    = NoOfData_M01_Fault2

# 특징데이터 크기 지정
FeatureData_M01_Fault2   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

for i in range(NoOfData):
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Time Domain Feature
        s1 = "FeatureData_M01_Fault2[10*j+0, i] = np.max(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s2 = "FeatureData_M01_Fault2[10*j+1, i] = np.min(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s3 = "FeatureData_M01_Fault2[10*j+2, i] = np.mean(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s4 = "FeatureData_M01_Fault2[10*j+3, i] = rms(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s5 = "FeatureData_M01_Fault2[10*j+4, i] = np.var(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s6 = "FeatureData_M01_Fault2[10*j+5, i] = sp.skew(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s7 = "FeatureData_M01_Fault2[10*j+6, i] = sp.kurtosis(M01_Fault2_%d.iloc[:,j+1])"%(i+1)
        s8 = "FeatureData_M01_Fault2[10*j+7, i] = np.max(M01_Fault2_%d.iloc[:,j+1])/rms(M01_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        s9 = "FeatureData_M01_Fault2[10*j+8, i] = rms(M01_Fault2_%d.iloc[:,j+1])/np.mean(M01_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        s10 = "FeatureData_M01_Fault2[10*j+9, i] = np.max(M01_Fault2_%d.iloc[:,j+1])/np.mean(M01_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        exec(s1)
        exec(s2)
        exec(s3)
        exec(s4)
        exec(s5)
        exec(s6)
        exec(s7)
        exec(s8)
        exec(s9)
        exec(s10)

M2 f1

In [272]:
NoOfSensor  = 5
NoOfFeature = 10
NoOfData    = NoOfData_M02_Fault1

# 특징데이터 크기 지정
FeatureData_M02_Fault1   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

for i in range(NoOfData):
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Time Domain Feature
        s1 = "FeatureData_M02_Fault1[10*j+0, i] = np.max(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s2 = "FeatureData_M02_Fault1[10*j+1, i] = np.min(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s3 = "FeatureData_M02_Fault1[10*j+2, i] = np.mean(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s4 = "FeatureData_M02_Fault1[10*j+3, i] = rms(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s5 = "FeatureData_M02_Fault1[10*j+4, i] = np.var(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s6 = "FeatureData_M02_Fault1[10*j+5, i] = sp.skew(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s7 = "FeatureData_M02_Fault1[10*j+6, i] = sp.kurtosis(M02_Fault1_%d.iloc[:,j+1])"%(i+1)
        s8 = "FeatureData_M02_Fault1[10*j+7, i] = np.max(M02_Fault1_%d.iloc[:,j+1])/rms(M02_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        s9 = "FeatureData_M02_Fault1[10*j+8, i] = rms(M02_Fault1_%d.iloc[:,j+1])/np.mean(M02_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        s10 = "FeatureData_M02_Fault1[10*j+9, i] = np.max(M02_Fault1_%d.iloc[:,j+1])/np.mean(M02_Fault1_%d.iloc[:,j+1])"%(i+1,i+1)
        exec(s1)
        exec(s2)
        exec(s3)
        exec(s4)
        exec(s5)
        exec(s6)
        exec(s7)
        exec(s8)
        exec(s9)
        exec(s10)

M2 f2

In [273]:
NoOfSensor  = 5
NoOfFeature = 10
NoOfData    = NoOfData_M02_Fault2

# 특징데이터 크기 지정
FeatureData_M02_Fault2   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

for i in range(NoOfData):
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Time Domain Feature
        s1 = "FeatureData_M02_Fault2[10*j+0, i] = np.max(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s2 = "FeatureData_M02_Fault2[10*j+1, i] = np.min(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s3 = "FeatureData_M02_Fault2[10*j+2, i] = np.mean(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s4 = "FeatureData_M02_Fault2[10*j+3, i] = rms(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s5 = "FeatureData_M02_Fault2[10*j+4, i] = np.var(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s6 = "FeatureData_M02_Fault2[10*j+5, i] = sp.skew(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s7 = "FeatureData_M02_Fault2[10*j+6, i] = sp.kurtosis(M02_Fault2_%d.iloc[:,j+1])"%(i+1)
        s8 = "FeatureData_M02_Fault2[10*j+7, i] = np.max(M02_Fault2_%d.iloc[:,j+1])/rms(M02_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        s9 = "FeatureData_M02_Fault2[10*j+8, i] = rms(M02_Fault2_%d.iloc[:,j+1])/np.mean(M02_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        s10 = "FeatureData_M02_Fault2[10*j+9, i] = np.max(M02_Fault2_%d.iloc[:,j+1])/np.mean(M02_Fault2_%d.iloc[:,j+1])"%(i+1,i+1)
        exec(s1)
        exec(s2)
        exec(s3)
        exec(s4)
        exec(s5)
        exec(s6)
        exec(s7)
        exec(s8)
        exec(s9)
        exec(s10)

.

.

.



### Data frame 형식으로 변환 및 데이터 확인

In [274]:
FeatureData_M01_Fault1_df = pd.DataFrame(FeatureData_M01_Fault1)
FeatureData_M01_Fault2_df = pd.DataFrame(FeatureData_M01_Fault2)
FeatureData_M02_Fault1_df = pd.DataFrame(FeatureData_M02_Fault1)
FeatureData_M02_Fault2_df = pd.DataFrame(FeatureData_M02_Fault2)

### 3단계 결과물 체출용 데이터 파일로 저장 (수강생 번호 외 코드수정X)

- 위에서 데이터 이름(변수명) 가이드에 맞게 지정됐는지 재확인 요망

In [276]:
# StudentNo = 0   # 수강생 번호 입력

Path1 = './Result/ST%d_DC3_1'%StudentNo
Path2 = './Result/ST%d_DC3_2'%StudentNo
Path3 = './Result/ST%d_DC3_3'%StudentNo
Path4 = './Result/ST%d_DC3_4'%StudentNo

FeatureData_M01_Fault1_df.to_csv( Path1 , sep=',' , header=None , index=None)
FeatureData_M01_Fault2_df.to_csv( Path2 , sep=',' , header=None , index=None)
FeatureData_M02_Fault1_df.to_csv( Path3 , sep=',' , header=None , index=None)
FeatureData_M02_Fault2_df.to_csv( Path4 , sep=',' , header=None , index=None)

.

.

.



# [4단계]  M01 & M02 공통 상위 P-value 해당하는 핵심 특징 선정 (7점)

- 3단계의 데이터를 활용하여 특징별 P-value 계산 (M01 / M02 각각 수행)

### M01 기준 Fault1 / Fault2 데이터 t-Test

In [280]:
NoOfFeature_total = 50  # 특징 개수 : 특징개수 10개 * 센서 5개


# P-value 추출 코드 작성
P_value = np.zeros((NoOfFeature_total , 2))

# 특징값 각각 T-검정 수행
for i in np.arange(NoOfFeature_total):
    
    T_test       = np.array(sp.ttest_ind(FeatureData_M01_Fault1_df.iloc[i,:] , FeatureData_M01_Fault2_df.iloc[i,:]))
    P_value[i,0] = i          # Feature Index
    P_value[i,1] = T_test[1]  # P값 (P-value)
    
P_value      = pd.DataFrame(P_value)

### M02 기준 Fault1 / Fault2 데이터 t-Test

In [281]:
NoOfFeature_total2 = 50  # 특징 개수 : 특징개수 10개 * 센서 5개


# P-value 추출 코드 작성
P_value2 = np.zeros((NoOfFeature_total2 , 2))

# 특징값 각각 T-검정 수행
for i in np.arange(NoOfFeature_total2):
    
    T_test2       = np.array(sp.ttest_ind(FeatureData_M02_Fault1_df.iloc[i,:] , FeatureData_M02_Fault2_df.iloc[i,:]))
    P_value2[i,0] = i          # Feature Index
    P_value2[i,1] = T_test2[1]  # P값 (P-value)
    
P_value2      = pd.DataFrame(P_value2)

.

.

.



### P-value 오름차순으로 재정렬 및 핵심 특징값 확인
- P-value 작은 상위 10개 중 M01 / M02 중복되는 '핵심 특징값' 확인

In [282]:
# P-value 기준 오름차순 재정렬 코드 작성
P_value_Rank = P_value.sort_values([1],ascending=True)

P_value_Rank

Unnamed: 0,0,1
47,47.0,0.040401
16,16.0,0.041461
10,10.0,0.087723
3,3.0,0.096193
15,15.0,0.103848
6,6.0,0.113171
17,17.0,0.12029
13,13.0,0.124289
7,7.0,0.158696
2,2.0,0.164984


In [283]:
P_value_Rank2 = P_value2.sort_values([1],ascending=True)  # P-value 기준 오름차순 정렬

P_value_Rank2

Unnamed: 0,0,1
2,2.0,2.697175e-09
21,21.0,2.311669e-07
13,13.0,3.656906e-07
23,23.0,1.089895e-06
27,27.0,6.448794e-06
12,12.0,7.827788e-06
20,20.0,1.244603e-05
22,22.0,3.832323e-05
15,15.0,6.139508e-05
16,16.0,0.0004446589


.

.

.



### 최종 선택된 특징데이터 인덱싱

- 핵심 특징값의 특징번호(index) 오름차순으로 특징데이터(FeatureData)에서 추출

#### [필수] 추출 후 특징데이터 (총 4개) 이름(변수명) : 
- (M01_Fault1 특징선택 데이터) FeatureSelected_M01_Fault1
- (M01_Fault2 특징선택 데이터) FeatureSelected_M01_Fault2
- (M02_Fault1 특징선택 데이터) FeatureSelected_M02_Fault1
- (M02_Fault2 특징선택 데이터) FeatureSelected_M02_Fault2

M1 f1

In [284]:
# 핵심 특징값 index 기반 최종 특징데이터 추출 코드 작성
FeatureData_M01_Fault1_df_T = np.transpose(FeatureData_M01_Fault1_df)
FeatureSelected_M01_Fault1 = np.transpose(FeatureData_M01_Fault1_df_T[[2, 13, 15, 16]])

FeatureSelected_M01_Fault1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,78,79,80,81,82,83,84,85,86,87
2,-0.047738,0.054994,-0.180056,0.140949,-0.073556,-0.675509,0.002022,0.111641,-0.843472,-0.843472,...,-1.427293,-1.427293,0.330837,0.097859,0.055996,-0.205316,-0.205316,0.112375,0.169398,-0.022881
13,1.21423,1.007261,1.238821,0.999586,1.19054,1.104077,0.968543,0.981108,1.191018,1.191018,...,1.443814,1.443814,0.975113,0.93435,0.852867,1.026406,1.026406,0.917912,0.893934,0.977728
15,-0.095048,-0.302779,0.23488,-0.409839,0.083538,0.990469,0.057118,-0.121161,1.459769,1.459769,...,2.18755,2.18755,-1.127801,-0.906738,-0.569696,-0.32922,-0.32922,-1.03373,-1.23762,-0.895346
16,-1.9519,-1.396186,-1.787407,-1.531637,-1.794629,-1.000398,-1.725827,-1.719468,0.150501,0.150501,...,2.805837,2.805837,-0.70543,-1.113578,-1.127711,-1.780402,-1.780402,-0.882024,-0.426053,-1.116898


M1 f2

In [286]:
FeatureData_M01_Fault2_df_T = np.transpose(FeatureData_M01_Fault2_df)
FeatureSelected_M01_Fault2 = np.transpose(FeatureData_M01_Fault2_df_T[[2, 13, 15, 16]])

M2 f1

In [287]:
FeatureData_M02_Fault1_df_T = np.transpose(FeatureData_M02_Fault1_df)
FeatureSelected_M02_Fault1 = np.transpose(FeatureData_M02_Fault1_df_T[[2, 13, 15, 16]])

M2 f2

In [288]:
FeatureData_M02_Fault2_df_T = np.transpose(FeatureData_M02_Fault2_df)
FeatureSelected_M02_Fault2 = np.transpose(FeatureData_M02_Fault2_df_T[[2, 13, 15, 16]])

.

.

.



### 4단계 결과물 체출용 데이터 파일로 저장 (수강생 번호 외 코드수정X)

- 위에서 데이터 이름(변수명) 가이드에 맞게 지정됐는지 재확인 요망

In [289]:
# StudentNo = 0   # 수강생 번호 입력

Path1 = './Result/ST%d_DC4_1'%StudentNo
Path2 = './Result/ST%d_DC4_2'%StudentNo
Path3 = './Result/ST%d_DC4_3'%StudentNo
Path4 = './Result/ST%d_DC4_4'%StudentNo

FeatureSelected_M01_Fault1.to_csv( Path1 , sep=',' , header=None , index=None)
FeatureSelected_M01_Fault2.to_csv( Path2 , sep=',' , header=None , index=None)
FeatureSelected_M02_Fault1.to_csv( Path3 , sep=',' , header=None , index=None)
FeatureSelected_M02_Fault2.to_csv( Path4 , sep=',' , header=None , index=None)

.

.

.



# * 결과 데이터(ST0_DC1_1 ~ DC4_4) 및 본 코드파일 함께 zip파일로 압축하여 제출

### >>> 압축파일 이름 ST(수강생 번호)_DC (예시 : 'ST00_DC'  //  'ST0_DC)