In [15]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Activation, Masking, Dropout,BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler #to normalize data
from os.path import exists

In [16]:
#see https://github.com/archd3sai/Predictive-Maintenance-of-Aircraft-Engine/blob/master/RUL%20Prediction%20Regression/LSTM%20RUL%20Prediction.ipynb
#dependent_var = ['RUL']
index_columns_names =  ["UnitNumber","Cycle"]
operational_settings_columns_names = ["OpSet"+str(i) for i in range(1,4)]
sensor_measure_columns_names =["SensorMeasure"+str(i) for i in range(1,22)]
input_file_column_names = index_columns_names + operational_settings_columns_names + sensor_measure_columns_names

cols_to_drop = ['OpSet3', 'SensorMeasure1', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure10', 'SensorMeasure14',
     'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19']
    
df_train = pd.read_csv('train_FD002.txt',delim_whitespace=True,names=input_file_column_names)
    
df_test = pd.read_csv('test_FD004.txt',delim_whitespace=True,names=input_file_column_names)

df_train = df_train.drop(cols_to_drop, axis = 1)
df_test = df_test.drop(cols_to_drop, axis = 1)

In [17]:
print("Predictive Maintenance techniques are used to determine the condition of an equipment to plan the maintenance/failure ahead of its time\n")
print("Classification: Predicting the failure of machine in upcoming n days\n")
print("The lengths of the run varied with a minimum run length of 128 cycles and the maximum length of 356 cycles\n")
print("RUL - remaining useful life\n\n")
print(df_train.head(1))
print(df_test.head(1))

Predictive Maintenance techniques are used to determine the condition of an equipment to plan the maintenance/failure ahead of its time

Classification: Predicting the failure of machine in upcoming n days

The lengths of the run varied with a minimum run length of 128 cycles and the maximum length of 356 cycles

RUL - remaining useful life


   UnitNumber  Cycle   OpSet1  OpSet2  SensorMeasure2  SensorMeasure3  \
0           1      1  34.9983    0.84          555.32         1358.61   

   SensorMeasure4  SensorMeasure7  SensorMeasure8  SensorMeasure9  \
0         1137.23          194.64         2222.65         8341.91   

   SensorMeasure11  SensorMeasure12  SensorMeasure13  SensorMeasure15  \
0            42.02           183.06          2387.72           9.3461   

   SensorMeasure17  SensorMeasure20  SensorMeasure21  
0              334            14.73           8.8071  
   UnitNumber  Cycle   OpSet1  OpSet2  SensorMeasure2  SensorMeasure3  \
0           1      1  20.0072     0.7  

In [18]:
print("Find the max cycle count for each unit\n"
     )
rul = pd.DataFrame(df_train.groupby('UnitNumber')['Cycle'].max()).reset_index()
rul.columns = ['UnitNumber', 'max']
print(rul.head(3))

df_train = df_train.merge(rul, on=['UnitNumber'], how='left')
df_train['RUL'] = df_train['max'] - df_train['Cycle']
df_train.drop('max', axis=1, inplace=True)
print(df_train.head(3))


rul2 = pd.DataFrame(df_test.groupby('UnitNumber')['Cycle'].max()).reset_index()
rul2.columns = ['UnitNumber', 'max']


df_test = df_test.merge(rul2, on=['UnitNumber'], how='left')
df_test['RUL'] = df_test['max'] - df_test['Cycle']
df_test.drop('max', axis=1, inplace=True)


Find the max cycle count for each unit

   UnitNumber  max
0           1  149
1           2  269
2           3  206
   UnitNumber  Cycle   OpSet1  OpSet2  SensorMeasure2  SensorMeasure3  \
0           1      1  34.9983  0.8400          555.32         1358.61   
1           1      2  41.9982  0.8408          549.90         1353.22   
2           1      3  24.9988  0.6218          537.31         1256.76   

   SensorMeasure4  SensorMeasure7  SensorMeasure8  SensorMeasure9  \
0         1137.23          194.64         2222.65         8341.91   
1         1125.78          138.51         2211.57         8303.96   
2         1047.45          175.71         1915.11         8001.42   

   SensorMeasure11  SensorMeasure12  SensorMeasure13  SensorMeasure15  \
0            42.02           183.06          2387.72           9.3461   
1            42.20           130.42          2387.66           9.3774   
2            36.69           164.22          2028.03          10.8941   

   SensorMeasure17  S

In [19]:
print(rul2.head(3))
print(df_test.head(3))
#y_true = pd.read_csv('RUL_FD001.txt', delim_whitespace=True,names=["RUL"])
#y_true["UnitNumber"] = y_true.index

#print(y_true)

   UnitNumber  max
0           1  230
1           2  153
2           3  141
   UnitNumber  Cycle   OpSet1  OpSet2  SensorMeasure2  SensorMeasure3  \
0           1      1  20.0072   0.700          606.67         1481.04   
1           1      2  24.9984   0.620          536.22         1256.17   
2           1      3  42.0000   0.842          549.23         1340.13   

   SensorMeasure4  SensorMeasure7  SensorMeasure8  SensorMeasure9  \
0         1227.81          332.52         2323.67         8704.98   
1         1031.48          174.46         1915.21         7999.94   
2         1105.88          137.34         2211.93         8305.38   

   SensorMeasure11  SensorMeasure12  SensorMeasure13  SensorMeasure15  \
0            43.83           313.03          2387.78           9.2229   
1            36.11           163.61          2028.09          10.8632   
2            41.52           129.98          2387.95           9.3960   

   SensorMeasure17  SensorMeasure20  SensorMeasure21  RUL  
0

In [20]:
def gen_train(id_df, seq_length, seq_cols):
    """
        function to prepare train data into (samples, time steps, features)
        id_df = train dataframe
        seq_length = look back period
        seq_cols = feature columns
    """
        
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    lstm_array=[]
    
    for start, stop in zip(range(0, num_elements-seq_length+1), range(seq_length, num_elements+1)):
        lstm_array.append(data_array[start:stop, :])
    
    return np.array(lstm_array)

def gen_target(id_df, seq_length, label):
    data_array = id_df[label].values
    num_elements = data_array.shape[0]
    return data_array[seq_length-1:num_elements+1]

X_Columns = [column for column in df_train.columns if column !="RUL"]


X_ColumnsTest = [column for column in df_test.columns if column !="RUL"]

min_max_scaler = MinMaxScaler(feature_range=(-1,1))

df_train[X_Columns] = min_max_scaler.fit_transform(df_train[X_Columns])
df_test[X_ColumnsTest] = min_max_scaler.fit_transform(df_test[X_ColumnsTest])
sequence_length=50

X_train=np.concatenate(list(list(gen_train(df_train[df_train['UnitNumber']==unit], sequence_length, X_Columns)) for unit in df_train['UnitNumber'].unique()))
print("X_train",X_train.shape)
y_train = np.concatenate(list(list(gen_target(df_train[df_train['UnitNumber']==unit], sequence_length, "RUL")) for unit in df_train['UnitNumber'].unique()))
print("y_train",y_train.shape)



X_train (41019, 50, 17)
y_train (41019,)


In [21]:
X_test=np.concatenate(list(list(gen_train(df_test[df_test['UnitNumber']==unit], sequence_length, X_ColumnsTest)) for unit in df_test['UnitNumber'].unique()))
print("X_test",X_test.shape)
y_test = np.concatenate(list(list(gen_target(df_test[df_test['UnitNumber']==unit], sequence_length, "RUL")) for unit in df_test['UnitNumber'].unique()))
print("y_test",y_test.shape)

nb_features = X_train.shape[2]
nb_out = 1



model = Sequential()
model.add(LSTM(
         units=512,
         return_sequences=True,
         input_shape=(sequence_length, nb_features)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(LSTM(
          units=128,
          return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='relu'))
model.add(Activation("relu"))
model.compile(loss="mse", optimizer="rmsprop", metrics=['mse'])

model.summary()

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 3 dimension(s) and the array at index 9 has 1 dimension(s)

In [None]:

print ("Train the model")
length=len(X_train)
path_to_file="D:\\RulNasa\\lstm_aircraft_model4.h5"

file_exists = exists(path_to_file)
if(file_exists):
    model.load_weights(path_to_file)
    
history=model.fit(X_train, y_train, epochs=3, batch_size=32, validation_split=0.1, verbose=1,
          callbacks = [EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')])

model.save_weights(path_to_file)


Train the model
Epoch 1/3

In [None]:
plt.plot(history.history['mse'])
#plt.plot(history.history['val_mean_squared_error'])
plt.title('MSE')
plt.ylabel('Mean Squared Error')
plt.xlabel('# Epoch')
plt.legend(['train'], loc='upper left')
plt.show()

In [None]:
scores = model.evaluate(X_train, y_train, verbose=1, batch_size=200)
print('MSE: {}'.format(scores[1]))

In [None]:
y_pred_test = model.predict(X_test, verbose=0)

plt.figure(figsize=(20,5))
plt.plot(y_pred_test, color='orange', label='Prediction')
plt.plot(y_test, color='green', label='Ground Truth')
plt.ylabel("RUL")
plt.xlabel("Unit Number")
plt.legend(loc='upper left')
plt.show()

In [None]:
print("done!")