In [1]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import *

In [2]:
from keras.models import Sequential,load_model
from keras.layers import Dense, LSTM, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam 
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
from time import time
from sklearn.preprocessing import MinMaxScaler  # MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from keras.callbacks import Callback,ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
import keras.backend as K

In [3]:
# 2. Load Data

df = pd.read_csv('./melting_tank.csv',
                 parse_dates=['STD_DT'],
                 infer_datetime_format=True,
                 index_col='STD_DT'
                )
df.drop(['NUM'],axis=1,inplace=True) # NUM column 제거

In [4]:
encoder = preprocessing.LabelEncoder()
df['TAG'] = encoder.fit_transform(df['TAG']) # categorical variable to numerical variable
df['TAG'] = df['TAG'].astype(int)

Unnamed: 0,STD_DT,NUM,MELT_TEMP,MOTORSPEED,MELT_WEIGHT,INSP,TAG
0,2020-03-04 0:00,0,489,116,631,3.19,OK
1,2020-03-04 0:00,1,433,78,609,3.19,OK
2,2020-03-04 0:00,2,464,154,608,3.19,OK
3,2020-03-04 0:00,3,379,212,606,3.19,OK
4,2020-03-04 0:00,4,798,1736,604,3.21,OK
...,...,...,...,...,...,...,...
835195,2020-04-30 23:59,835195,755,1743,318,3.21,OK
835196,2020-04-30 23:59,835196,385,206,317,3.19,OK
835197,2020-04-30 23:59,835197,465,148,316,3.20,OK
835198,2020-04-30 23:59,835198,467,0,314,3.19,OK


In [None]:
print(f'data shape = {df.shape}')
df.head(20)

In [None]:
Counter(df['TAG']) # 1이 normal, 0이 abnormal

In [None]:
input_x = df.drop('TAG', axis=1).values
input_y = df['TAG'].values

n_features = input_x.shape[1]

In [None]:
def make_dataset(data, label, window_size):
    feature_list = []
    label_list = []
    for i in range(len(data)-window_size):
        feature_list.append(data[i:i+window_size])
        label_list.append(label[i+window_size])
    return np.array(feature_list), np.array(label_list)

In [None]:
def scale(X, scaler):
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
        
    return X

In [None]:
scaler = MinMaxScaler().fit(input_x)
x_scaled = scaler.transform(input_x)

In [None]:
from imblearn.combine import SMOTETomek

smtt = SMOTETomek()

In [None]:
x_scaled, input_y = smtt.fit_resample(x_scaled, input_y)

In [None]:
x_scaled.shape

In [None]:
x_scaled ,y_sequence = make_dataset(x_scaled,input_y,10)

In [None]:
temp_fft = np.array(list(map(lambda x: np.fft.fft(x[:,0])/10  , x_scaled)))
speed_fft = np.array(list(map(lambda x: np.fft.fft(x[:,1])/10  , x_scaled)))
weight_fft = np.array(list(map(lambda x: np.fft.fft(x[:,2])/10  , x_scaled)))

In [None]:
temp_fft = temp_fft[:,:,np.newaxis]
speed_fft = speed_fft[:,:,np.newaxis]
weight_fft = weight_fft[:,:,np.newaxis]

In [None]:
new_x = np.concatenate([x_scaled,temp_fft,speed_fft,weight_fft],axis=2)

In [None]:
# Split into train, valid, and test 
x_train, x_test, y_train, y_test = train_test_split(new_x, y_sequence, test_size=0.3)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.3)

print(len(x_train))  
print(len(x_valid))
print(len(x_test)) 

# lstm

In [23]:
from keras.models import Sequential,load_model
from keras.layers import Dense, LSTM, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam 
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
from time import time
from sklearn.preprocessing import MinMaxScaler  # MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from keras.callbacks import Callback,ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
import keras.backend as K
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
from datetime import datetime
from sklearn import preprocessing
%matplotlib inline

In [None]:
# lstm도 테스트 해보자

def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

model_basic = Sequential()
model_basic.add(LSTM(50, input_shape=(10, 7),
                    activation='tanh', return_sequences=False))
model_basic.add(Dense(1, activation='sigmoid'))
model_basic.compile(loss='binary_crossentropy',
              metrics=[get_f1],
              optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=5)

model_basic.summary()

history = model_basic.fit(x_train, y_train, 
                          epochs = 30, 
                          batch_size = 50, 
                          validation_data=(x_valid, y_valid), 
                          callbacks=[early_stop])

In [None]:
from sklearn.metrics import *

In [None]:
pred = model_basic.predict(x_test)
pred_df = pd.DataFrame(pred, columns=['TAG'])
pred_df['TAG'] = pred_df['TAG'].apply(lambda x:1 if x>= 0.5 else 0)

p = precision_score(y_test, pred_df)
r = recall_score(y_test, pred_df)
f1 = f1_score(y_test, pred_df)
acc = accuracy_score(y_test, pred_df)

print("Precision: %0.4f" %p)
print("Recall: %0.4f" %r)
print("F1-score: %0.4f" %f1)
print("Accuracy: %0.4f" %acc)