# Load the pre-trained 30-cycle Classifier
Binary classification: Predict if an asset will fail within certain time frame (e.g. days). 

In [1]:
import pickle

In [2]:
# load the scaler from pickle file
scaler_pkl_path = "deploy\\scaler.pkl"
scaler_pkl = open(scaler_pkl_path, 'rb')
scaler = pickle.load(scaler_pkl)
scaler_pkl.close()

In [3]:
# load the column normalizer index from pickle file
cols_normalize_pkl_path = "deploy\\cols_normalize.pkl"
cols_normalize_pkl = open(cols_normalize_pkl_path, 'rb')
cols_normalize = pickle.load(cols_normalize_pkl)
cols_normalize_pkl.close()

In [4]:
# load the classification model from hdf5 file
from keras.models import load_model
model_hdf5_path = "deploy\\model_predictve_maintenance_LSTM_30days_classifier.h5"
model = load_model(model_hdf5_path)

Using TensorFlow backend.


In [5]:
sequence_length = 50
# define time windows
w1 = 30 # time window 1 = 30 cycles
w0 = 15 # time window 2 = 15 cycles

# Load test data

In [6]:
import pandas as pd
import numpy as np

In [7]:
# read test data
test_df = pd.read_csv("data\\PM_test.txt", sep=" ", header=None)
# drop 02 last columns that are not used
test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)
# specify column names
test_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']

In [8]:
test_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413


In [9]:
# find out the last cycle of each engine id
last_cycle_test_df = pd.DataFrame(test_df.groupby('id')['cycle'].max())
last_cycle_test_df.head()

Unnamed: 0_level_0,cycle
id,Unnamed: 1_level_1
1,31
2,49
3,126
4,106
5,98


In [10]:
last_cycle_test_df.tail()

Unnamed: 0_level_0,cycle
id,Unnamed: 1_level_1
96,97
97,134
98,121
99,97
100,198


In [11]:
# Pick out an engine id and a cycle number for running the model

# engine id must exist in test data
engine_id_run = 67
if engine_id_run in test_df['id'].unique():
    print('engine id is OK')
else:
    print('engine id does not exist')
    
# cycle number must >= 50, and <= last cycle
cycle_run = 62
last_cycle = last_cycle_test_df.loc[engine_id_run, 'cycle']
if (cycle_run >= sequence_length) and (cycle_run <= last_cycle):
    print('cycle run is OK')
else: 
    print('cycle run is NG')

engine id is OK
cycle run is OK


In [12]:
# slice out the data for running the model
run_df = test_df[test_df['id']==engine_id_run]
run_df = run_df[run_df['cycle'] <= cycle_run]
run_df = run_df[run_df['cycle'] > cycle_run - sequence_length]
run_df

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
8727,67,13,0.0038,-0.0001,100.0,518.67,641.99,1588.27,1397.56,14.62,...,522.02,2388.03,8141.33,8.4102,0.03,392,2388,100.0,38.95,23.3125
8728,67,14,-0.0027,-0.0002,100.0,518.67,641.68,1583.66,1407.82,14.62,...,522.17,2388.04,8141.35,8.4013,0.03,391,2388,100.0,39.06,23.4504
8729,67,15,0.005,-0.0002,100.0,518.67,641.83,1586.29,1396.5,14.62,...,521.65,2388.03,8136.19,8.3976,0.03,393,2388,100.0,38.89,23.2719
8730,67,16,-0.0043,-0.0003,100.0,518.67,642.48,1591.35,1400.68,14.62,...,522.02,2388.0,8135.84,8.3971,0.03,391,2388,100.0,38.85,23.3899
8731,67,17,0.0019,-0.0005,100.0,518.67,641.89,1585.01,1395.9,14.62,...,522.27,2388.02,8136.01,8.4135,0.03,392,2388,100.0,39.12,23.2776
8732,67,18,0.0004,-0.0002,100.0,518.67,642.53,1595.96,1400.88,14.62,...,522.23,2388.07,8140.81,8.4416,0.03,393,2388,100.0,38.91,23.306
8733,67,19,0.0002,-0.0005,100.0,518.67,642.09,1583.39,1402.92,14.62,...,522.23,2388.03,8144.88,8.3965,0.03,393,2388,100.0,38.99,23.2938
8734,67,20,0.0052,0.0001,100.0,518.67,642.37,1584.69,1404.77,14.62,...,521.85,2388.04,8139.29,8.447,0.03,393,2388,100.0,38.68,23.3861
8735,67,21,-0.0006,0.0003,100.0,518.67,642.36,1583.55,1399.21,14.62,...,522.0,2388.07,8141.25,8.3926,0.03,392,2388,100.0,38.95,23.3767
8736,67,22,-0.0026,-0.0004,100.0,518.67,642.14,1592.39,1397.49,14.62,...,522.13,2388.11,8139.4,8.4327,0.03,393,2388,100.0,39.1,23.4316


# Data Preprocessing & Feature Engineering

In [13]:
# colume "cycle" could be used as a feature, copy it, to normalize it later
run_df['cycle_norm'] = run_df['cycle']

#MinMax Normalization
norm_run_df = pd.DataFrame(scaler.transform(run_df[cols_normalize]),
                           columns = cols_normalize,
                           index = run_df.index)

In [14]:
# convert to numpy array
seq_array_run = np.asarray(norm_run_df.values).astype(np.float32)
seq_array_run = seq_array_run.reshape((1,seq_array_run.shape[0],seq_array_run.shape[1]))
seq_array_run.shape

(1, 50, 25)

# Make prediction & check against ground truth

In [15]:
# make predictions
# expect an input in the shape of a numpy array of 3 dimensions (samples, time steps, features)
# where samples is the number of training sequences,
# time steps is the look back window or sequence length
# and features is the number of features of each sequence at each time step.
y_pred_run = model.predict_classes(seq_array_run)



In [16]:
y_pred = y_pred_run[0][0]
y_pred

0

In [17]:
# read ground truth data
truth_df = pd.read_csv("data\\PM_truth.txt", sep=" ", header=None)
# remove the last column that not necessary
truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)
# set column name 'more' for remaining working cycles
truth_df.columns = ['more']
# add column for engine id
truth_df['id'] = truth_df.index + 1
truth_df.head()

Unnamed: 0,more,id
0,112,1
1,98,2
2,69,3
3,82,4
4,91,5


In [18]:
# compute max cycle of selected engine id
cycle_max = truth_df[truth_df['id']==engine_id_run]['more'].values[0] + last_cycle
cycle_max

148

In [19]:
# calculate true value
if (cycle_run + w1 >= cycle_max):
    y_true = 1
else:
    y_true = 0
y_true

0

In [20]:
# compute confusion matrix
if (y_true == y_pred):
    print("Correct prediction!!!")
else:
    print("Incorrect prediction!!!")

Correct prediction!!!
