Load training data (normal operation) and data with faulty operation. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from scipy.fft import fft, fftfreq
import scipy.io as sio

RAW_FILES = 'raw/'
SENSOR_NAMES = ['X098_FE_time', 'X123_FE_time', 'X190_FE_time', 'X227_FE_time', 
                'X110_FE_time', 'X175_FE_time', 'X214_FE_time', 'X136_FE_time', 
                'X202_FE_time', 'X239_FE_time']
TARGET = 'Fault type'
NORMAL_CONDITION = 'Time_Normal'

files = []
frames = []
column_names = []

for file in os.listdir(RAW_FILES):
    files.append(file)
    raw_dict = sio.loadmat(RAW_FILES + file)  # load raw data file in .mat format
    for i in range(len(SENSOR_NAMES)):
        sensor = SENSOR_NAMES[i]
        if sensor in raw_dict:
            time_series = raw_dict[sensor].flatten()  # extract time series for one of the accelerometers and convert 2D array to 1D array
            sample_points = len(time_series)
            print('Reading file: ' + file)
            print('Number of sample points in file: ' + str(sample_points))

            time_series_df = pd.DataFrame(time_series, columns=[file[:-10]])
            frames.append(time_series_df)
            column_names.append(file[:-10])

all_conditions_df = pd.concat(frames, axis=1, ignore_index=True)
all_conditions_df.to_csv('CWRU_raw_time_series.csv', index=False)
all_conditions_df.columns = column_names

train_df = all_conditions_df[NORMAL_CONDITION]  # get training data which corresponds to time signal during normal operation
train_df.to_csv('AE_train.csv', index=False)

Visualize raw time series for training (normal condition) and all faulty conditions. 

In [2]:
import plotly.express as px

all_conditions_fig = px.line(all_conditions_df, labels={'value': 'Acceleration'}, log_x=True, log_y=True)
all_conditions_fig.show()

Plot first 500 data points of the raw training time series (normal) in matplotlib for comparison, i.e. to see if there's missing data.

In [None]:
train_df_slice = train_df.iloc[:500]
plt.plot(train_df_slice)
plt.show()

Create data sequences.

In [15]:
TIME_STEPS = 2000


def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

x_train = create_sequences(train_df)

Reshape input into [samples, timesteps, features].

In [None]:
NFEATURES = 1

x_input = x_train.reshape(x_train.shape[0], x_train.shape[1], NFEATURES)
print("Training input shape after reshaping:", x_train.shape)

Define model parameters and inputs.

In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, RepeatVector, TimeDistributed


MODEL_PATH = './models/autoencoder.h5'
MODEL_PARAMETERS = {'LSTM_units_1': 16,
                    'LSTM_units_2': 8,
                    'batch_size': TIME_STEPS,
                    'n_epochs': 20,
                    'optimizer': tf.keras.optimizers.Adam(learning_rate=0.01),
                    'loss': tf.keras.losses.MeanAbsoluteError(),
                    'validation_split': 0.1}

Define model architecture and build model.

In [None]:
model = Sequential()
model.add(LSTM(units=MODEL_PARAMETERS['LSTM_units_1'], activation='tanh', return_sequences=True, input_shape=(None, NFEATURES)))
model.add(LSTM(units=MODEL_PARAMETERS['LSTM_units_2'], activation='tanh', return_sequences=False))
model.add(RepeatVector(MODEL_PARAMETERS['batch_size']))
model.add(LSTM(units=MODEL_PARAMETERS['LSTM_units_2'], activation='tanh', return_sequences=True))
model.add(LSTM(units=MODEL_PARAMETERS['LSTM_units_1'], activation='tanh', return_sequences=True))
model.add(TimeDistributed(Dense(NFEATURES, activation='linear')))

model.compile(optimizer=MODEL_PARAMETERS['optimizer'], loss=MODEL_PARAMETERS['loss'])  # Compile model
model.summary()

Train model.

In [None]:
history = model.fit(x_input, x_input, 
                    epochs=MODEL_PARAMETERS['n_epochs'], 
                    batch_size=MODEL_PARAMETERS['batch_size'], 
                    validation_split=MODEL_PARAMETERS['validation_split'])

model.save(MODEL_PATH)