In [466]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report,confusion_matrix

In [469]:
# 대회 데이터셋 : 경진대회용 용해탱크 데이터셋.csv
# 전처리 데이터셋 : 로트,중량전처리조.csv

# data = pd.read_csv('경진대회용 용해탱크 데이터셋.csv')
data = pd.read_csv('로트,중량전처리조.csv')[['STD_DT','NUM','MELT_TEMP','MOTORSPEED','MELT_WEIGHT_PRE','INSP','TAG']]
data.columns = ['STD_DT','NUM','MELT_TEMP','MOTORSPEED','MELT_WEIGHT','INSP','TAG']
data.head()

Unnamed: 0,STD_DT,NUM,MELT_TEMP,MOTORSPEED,MELT_WEIGHT,INSP,TAG
0,2020-03-04 0:00,0,489,116,631.0,3.19,OK
1,2020-03-04 0:00,1,433,78,609.0,3.19,OK
2,2020-03-04 0:00,2,464,154,608.0,3.19,OK
3,2020-03-04 0:00,3,379,212,606.0,3.19,OK
4,2020-03-04 0:00,4,798,1736,604.0,3.21,OK


In [470]:
# 사용할 feature 및 tag 숫자로 변환

data = data[['STD_DT','MELT_TEMP','MOTORSPEED','MELT_WEIGHT','INSP','TAG']]
data = data.set_index('STD_DT')
data.index = pd.to_datetime(data.index)
data['TAG_num'] = [0 if i =='OK' else 1 for i in data.TAG]

In [471]:
data.shape

(835200, 6)

In [476]:
# 데이터 구간 나누는 곳 : 구간별로 모델 학습 
# 1번 구간 [:188400]
# 2번 구간 [188400:222000]
# 3번 구간 [222000:312000]
# 4번 구간 [312000:392400]
# 5번 구간 [392400:471600]
# 6번 구간 [471600:598200]
# 7번 구간 [598200:]

# 1개 구간에 대한 학습 시
# data = data.iloc[471600:598200,:]
# data

# 2개 이상 구간 concat 후 학습 시(risk level 단위 학습)
data1 = data.iloc[:222000,:]
data2 = data.iloc[471600:,:]
data = pd.concat([data1,data2],axis=0)
data

Unnamed: 0_level_0,MELT_TEMP,MOTORSPEED,MELT_WEIGHT,INSP,TAG,TAG_num
STD_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-04 00:00:00,489,116,631.0,3.19,OK,0
2020-03-04 00:00:00,433,78,609.0,3.19,OK,0
2020-03-04 00:00:00,464,154,608.0,3.19,OK,0
2020-03-04 00:00:00,379,212,606.0,3.19,OK,0
2020-03-04 00:00:00,798,1736,604.0,3.21,OK,0
...,...,...,...,...,...,...
2020-04-30 23:59:00,755,1743,318.0,3.21,OK,0
2020-04-30 23:59:00,385,206,317.0,3.19,OK,0
2020-04-30 23:59:00,465,148,316.0,3.20,OK,0
2020-04-30 23:59:00,467,0,314.0,3.19,OK,0


In [477]:
# train/test 데이터 분리

train = pd.concat([data.iloc[:int(len(data)*0.5),:],data.iloc[int(len(data)*0.8):,:]])
test = data.iloc[int(len(data)*0.5):int(len(data)*0.8),:]
X_train = train.iloc[:,:4]
y_train = train.iloc[:,5:]
X_test = test.iloc[:,:4]
y_test = test.iloc[:,5:]

In [481]:
# 학습을 위한 window data 생성

scaler = MinMaxScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train))
X_test = pd.DataFrame(scaler.transform(X_test))



def timeseries_data(dataset, target, start_index, end_index, window_size, target_size) :
    data = []
    labels = []

    y_start_index = start_index + window_size 
    y_end_index = end_index - target_size  

    for i in range(y_start_index, y_end_index) :
        data.append(dataset.iloc[i-window_size:i,:].values)
        labels.append(target.iloc[i:i+target_size,:].values)
    data = np.array(data)
    labels = np.array(labels)
    labels = labels.reshape(-1,target_size)  
    return data, labels

window = 10  
X_train, y_train = timeseries_data(X_train,y_train,0,len(X_train),window,1)
X_test, y_test = timeseries_data(X_test,y_test,0,len(X_test),window,1)

In [484]:
# LSTM model 생성

model = keras.models.Sequential()
model.add(keras.layers.LSTM(50, input_shape = (X_train.shape[1], X_train.shape[2]), activation = 'tanh', return_sequences = False))
model.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [485]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [486]:
# 모델 학습 

early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10)
history = model.fit(X_train, y_train, epochs = 100, validation_split=0.2, batch_size = 50, callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


In [487]:
y_pred = model.predict(X_test)
y_pred = [1 if i>=0.5 else 0 for i in y_pred]

In [488]:
# 모델 성능 평가

result = classification_report(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
print(result)
print(confusion)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    175669

    accuracy                           1.00    175669
   macro avg       1.00      1.00      1.00    175669
weighted avg       1.00      1.00      1.00    175669

[[175669]]
