# Predictive maintenance for turbofan engine example

## Part 2: Linear Regression


Based on open dataset provided by NASA at:
https://data.nasa.gov/widgets/vrks-gjie

dataset can be downloaded at: http://ti.arc.nasa.gov/c/6/

In [None]:
import os, time
import datetime

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow import keras

# Load the TensorBoard notebook extension (optional, can be started from the command line)
#%load_ext tensorboard

# Select a plotting style
#plt.style.use('dark_background')
plt.style.use('seaborn')
#plt.style.available

SCALE = 1
SEED = 1
EPOCHS = 20

### Data Preparation

In [None]:
# Load the data

data_root = 'data/'
original_dir = data_root + 'original/'
dataset_dir = data_root + 'dataset/'

train_data = pd.read_csv(dataset_dir + 'train_data.csv')
test_data = pd.read_csv(dataset_dir + 'test_data.csv')
train_data

### Quick EDA

In [None]:
# Plot the lifecycles
one_engine = []
for i, r in train_data.iterrows():
    rul = r['RUL']
    one_engine.append(rul)
    if rul == 0:
        plt.plot(one_engine)
        one_engine = []
        
#plt.grid()
plt.xlabel('Cycles')
plt.ylabel('RUL')

## Machine Learning Application

We will split the data in 4 parts: x_train, y_train, x_test, y_test.

(actually the dataset is already split)
- x is for the sensor data
- y is for the known Remaining Useful Life
- train is for data we will use to train the model (we will use the known RUL in the training)
- test is for data validation... we will apply predictions and compute models performance metrics using the known RUL

In [None]:
# Shuffle train data frame and apply scaling factor
train_data = train_data.sample(frac=SCALE, random_state=SEED).reset_index(drop=True)


# prepare a x frame with useful data and a y frame with RUL value
x_train = train_data.drop(columns=['Unit', 'Cycle', 'RUL'])
y_train = train_data['RUL']

x_test = test_data.drop(columns=['Cycle', 'RUL'])

y_test = test_data['RUL']

In [None]:
# data normalization

mean = x_train.mean()
std = x_train.std()

x_train = (x_train - mean) / std
x_test = (x_test - mean) / std


x_train = x_train.dropna(axis=1, how='any')
x_test = x_test.dropna(axis=1, how='any')

#x_test = np.asarray(x_test).astype('float32')


# what's the shape now we dropped some columns? create a variable to use in 
# get_model_v1 function call
(lines,shape) = x_train.shape

In [None]:
# Build a ML model

def get_model_v1(shape):
    model = keras.models.Sequential()
    model.add(keras.layers.Input(shape, name='input_layer'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n1'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n2'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n3'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n4'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n5'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n6'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n7'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n8'))
    model.add(keras.layers.Dense(128, activation='relu', name='dense_n9'))
    model.add(keras.layers.Dense(1, name='output'))
    
    model.compile(optimizer = 'adam',
                  loss      = 'mse',
                  metrics   = ['mae', 'mse'],
                 )

    return model

# Instanciate the model

model = get_model_v1((shape,))
model.summary()

In [None]:
# Train the model

# Configure callback for vizualization of the training data in tensorboard
if not os.path.exists('logs/'):
    os.mkdir('logs')

log_dir = 'logs/fit/' + f'S{SCALE}_E{EPOCHS}_' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

#%tensorboard --logdir ./logs

start_time = time.perf_counter()

history = model.fit(x_train,
                    y_train,
                    epochs          = EPOCHS,
                    batch_size      = 20,
                    verbose         = 1,
                    validation_data = (x_test, y_test),
                    callbacks = [tensorboard_callback],)

end_time = time.perf_counter()

print(f"\n\nTraining time: {end_time-start_time}")

In [None]:
# Evaluate the model
score = model.evaluate(x_test, y_test, verbose=1)

## Training History

In [None]:
df = pd.DataFrame(data=history.history)
display(df)

In [None]:
print("min(val_mae) : {:.4f}".format(min(history.history['val_mae'])))

In [None]:
def plot_history(history, figsize=(8,6), 
                 plot={"Accuracy":['accuracy','val_accuracy'], 'Loss':['loss', 'val_loss']},
                 save_as='auto'):
    """
    Show history
    args:
        history: history
        figsize: fig size
        plot: list of data to plot : {<title>:[<metrics>,...], ...}
    """
    fig_id=0
    for title,curves in plot.items():
        plt.figure(figsize=figsize)
        plt.title(title)
        plt.ylabel(title)
        plt.xlabel('Epoch')
        for c in curves:
            plt.plot(history.history[c])
        plt.legend(curves, loc='upper left')
        plt.show()


plot_history(history, plot={'MSE' :['mse', 'val_mse'],
                            'MAE' :['mae', 'val_mae'],
                            'LOSS':['loss','val_loss']}, save_as='01-history')

## Make a prediction

In [None]:
# Make a prediction

selection = 56

engine = x_test.iloc[selection]
engine_rul = y_test.iat[selection]
print('Data (denormalized):\n\n', engine.dropna(axis=0, how='any')  * std + mean, '\n\n')
print('RUL = ', engine_rul)

engine = np.array(engine).reshape(1, shape)

print('\n\n---\n\n')

predictions = model.predict(engine)
print('Prediction  : {:.0f} Cycles'.format(predictions[0][0]))
print('Real RUL    : {:.0f} Cycles'.format(engine_rul))

In [None]:
# TODO confusion matrix
predictions = []

for i in range(len(x_test)):
    engine = x_test.iloc[i]
    engine = np.array(engine).reshape(1, shape)
    prediction = model.predict(engine)
    predictions.append(prediction[0][0])

In [None]:
plt.figure(figsize=(12,12))
plt.scatter(predictions, y_test);

# Add a line
x = [0, 150]
y = x
plt.plot(x,y, color='lightgreen');

# Layout
plt.xlabel('Predictions');
plt.ylabel('Reality');

In [None]:
# Obviously the ML algo doesn't do much... but this was for benchmarking the DOKS infrastructures anyway :)