In [None]:
import numpy as np
import pandas as pd
from pickle import dump

from sklearn.preprocessing import MinMaxScaler
from keras_tuner import BayesianOptimization, Objective
from tensorflow.keras import callbacks, models

from progtools.preprocessing import RawFlightData, DataStructure
from progtools.modelling import gpu_check, RemainingUsefulLifeHyperModel
from progtools.scoring import rul_scoring
from progtools.visuals import unit_plots

In [None]:
#check that TensorFlow is able to detect the GPU
gpu_check()

# Import, split and scale the data

In [None]:
#read the raw flight data in
filename = "data/N-CMAPSS_DS05.h5"
raw_data_read = RawFlightData(filename=filename)

#create the development dataframes
df_W_dev = raw_data_read.dev_flight_data()
df_X_s_dev = raw_data_read.dev_sensor_data()
df_A_dev = raw_data_read.dev_aux_data()
df_Y_dev = raw_data_read.dev_RUL_data()

In [None]:
#downsample for faster model iteration
sample_index = np.arange(1,max(df_A_dev.index),10)

df_A_dev = df_A_dev.loc[sample_index]
df_W_dev = df_W_dev.loc[sample_index]
df_X_s_dev = df_X_s_dev.loc[sample_index]
df_Y_dev = df_Y_dev.loc[sample_index]

In [None]:
#join the scenario descriptors with the physical sensors to be included in the model
df_X=pd.concat([df_X_s_dev[["Wf","T24","T30","T48","T50","P24","Ps30","P40","P50"]], df_W_dev],axis=1)

In [None]:
#split out the input training and validation datasets
df_X_train = df_X[df_A_dev["unit"]<=5]
df_X_val = df_X[df_A_dev["unit"]>5]

df_Y_train = df_Y_dev[df_A_dev["unit"]<=5]
df_Y_val = df_Y_dev[df_A_dev["unit"]>5]

df_A_train = df_A_dev[df_A_dev["unit"]<=5]
df_A_val = df_A_dev[df_A_dev["unit"]>5]

In [None]:
#create a scaler
X_scaler = MinMaxScaler().fit(np.asarray(df_X_train))

#store the scaler
dump(X_scaler, open("prognostic_models/9_Parameter_Prognostic_Scaler.pkl","wb"))

#scale the training and validation inputs
X_train_scaled = X_scaler.transform(np.asarray(df_X_train))
X_val_scaled = X_scaler.transform(np.asarray(df_X_val))

In [None]:
#wrangle back into dataframes for structuring
df_X_train = pd.DataFrame(data=X_train_scaled,
                          index=df_X_train.index,
                          columns=df_X_train.columns)

df_X_val = pd.DataFrame(data=X_val_scaled,
                        index=df_X_val.index,
                        columns=df_X_val.columns)

# Structure the Data for Training

In [None]:
df_training = DataStructure(df_X_train,
                            df_Y_train,
                            df_A_train)

df_validation = DataStructure(df_X_val,
                              df_Y_val,
                              df_A_val)

X_train = df_training.create_X(2000)
y_train = np.asarray(df_training.create_y(piece_wise=True)["RUL"])

X_val = df_validation.create_X(2000)
y_val = np.asarray(df_validation.create_y(piece_wise=True)["RUL"])

In [None]:
#validation data is expected as a tuple
validation_data = (X_val, y_val)

#print out results to check all shapes match up
print(f"Training Predictor Data Shape:{np.shape(X_train)}")
print(f"Training Target Data Shape: {np.shape(y_train)}")
print(f"Validation Predictor Data Shape: {np.shape(X_val)}")
print(f"Validation Target Data Shape: {np.shape(y_val)}")

# Model Development

In [None]:
#configure a Bayesian Optimizer Tuner
hyper_model_search = BayesianOptimization(RemainingUsefulLifeHyperModel(features=13),
                                          objective=Objective("val_mse",
                                                              direction="min"),
                                          max_trials=400,
                                          seed=42,
                                          directory="pronostics_searches",
                                          project_name="9_parameter_ver0",
                                          overwrite=False)

In [None]:
#create an early stopping callback
callback = callbacks.EarlyStopping(monitor="val_loss",
                                   patience=10)

In [None]:
#run the hyper-parameter optimisaiton and using the validaiton tuple as a hold out
hyper_model_search.search(X_train,
                          y_train,
                          validation_data=validation_data,
                          callbacks=[callback])

In [None]:
#store the best model from the grid search as "best_model"
best_model = hyper_model_search.get_best_models(num_models=1)[0]

#save the best model for use later
best_model.save("prognostic_models/9_Parameter_Prognostic_Model.h5")

#display the summary of each layer of the best model
best_model.summary()

# Training Performance

In [None]:
#create predictions against the training data
try:
    y_train_pred = best_model.predict(X_train)
except(NameError):
    best_model = models.load_model("prognostic_models/9_Parameter_Prognostic_Model.h5")
    y_train_pred = best_model.predict(X_train)

In [None]:
#examine the results of the training predictions against the training ground-truth [the piece-wise target]
rul_scoring(y_train,y_train_pred)

# Validation Performance

In [None]:
#create predictions agains the validation data
y_val_pred = best_model.predict(X_val)

In [None]:
#exmaine the results of the validation predictions against the validation ground-truth [the piece-wise target]
rul_scoring(y_val,y_val_pred)

# Individual Unit Analysis

In [None]:
#create a dataframe joining the piece-wise target, with the predictions for the training data
df_train_results=df_training.create_y().join(pd.DataFrame(y_train_pred, columns=["yhat"]))

#calculate the square of the difference between prediction and groun-truth to calculate individual RMSE scores by unit
df_train_results["delta_sq"]=(df_train_results["RUL"]-df_train_results["yhat"])**2

In [None]:
#create a dataframe joining the piece-wise target, with the predictions for the validation data
df_val_results=df_validation.create_y().join(pd.DataFrame(y_val_pred, columns=["yhat"]))

#calculate the square of the difference between prediction and groun-truth to calculate individual RMSE scores by unit
df_val_results["delta_sq"]=(df_val_results["RUL"]-df_val_results["yhat"])**2

In [None]:
#concatenate the results into a single dataframe
df_results = pd.concat([df_train_results,df_val_results])

In [None]:
#plot out ground-truth versus prediction with RMSE by unit
unit_plots(df_results,title="9-Parameter Model")