In [1]:
import os
seed = 0
os.environ['PYTHONHASSEED'] = str(seed)

In [2]:
# Disable GPU
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random
import pickle

from tensorflow.random import set_seed

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model

In [4]:
# Set seed
random.seed(seed)
np.random.seed(seed)
set_seed(seed)

In [5]:
filename = '../data/turbofan_dataset/N-CMAPSS_DS02-006.h5'
output_path = 'DS02/experiment_set_9'

In [6]:
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [7]:
def load_dataset(filename, load_test_set=True):
    """ Reads a dataset from a given .h5 file and compose (in memory) the train and test data. 
    Args:
        filename(str): path to the .h5 file
    Returns:
        train_set(pd.DataFrame), test_set(pd.DataFrame)
    """
    with h5py.File(filename, 'r') as hdf:
        # Development set
        W_dev = np.array(hdf.get('W_dev'))             # W
        X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
        X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
        T_dev = np.array(hdf.get('T_dev'))             # T
        Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
        A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

        # Test set
        if load_test_set:
            W_test = np.array(hdf.get('W_test'))           # W
            X_s_test = np.array(hdf.get('X_s_test'))       # X_s
            X_v_test = np.array(hdf.get('X_v_test'))       # X_v
            T_test = np.array(hdf.get('T_test'))           # T
            Y_test = np.array(hdf.get('Y_test'))           # RUL  
            A_test = np.array(hdf.get('A_test'))           # Auxiliary
        
        # Column names
        W_var = np.array(hdf.get('W_var'))
        X_s_var = np.array(hdf.get('X_s_var'))  
        X_v_var = np.array(hdf.get('X_v_var')) 
        T_var = np.array(hdf.get('T_var'))
        A_var = np.array(hdf.get('A_var'))
        
        columns = []
        columns.append(list(np.array(A_var, dtype='U20')))
        columns.append(list(np.array(T_var, dtype='U20')))
        columns.append(list(np.array(X_s_var, dtype='U20')))
        columns.append(list(np.array(X_v_var, dtype='U20')))
        columns.append(list(np.array(W_var, dtype='U20')))
        columns.append(['RUL'])
        
        columns_list = []
        for columns_per_category in columns:
            columns_list += columns_per_category
        
    train_set = np.concatenate((A_dev, T_dev, X_s_dev, X_v_dev, W_dev, Y_dev), axis=1)
    if load_test_set:
        test_set = np.concatenate((A_test, T_test, X_s_test, X_v_test, W_test, Y_test), axis=1)
        return pd.DataFrame(data=train_set, columns=columns_list), pd.DataFrame(data=test_set, columns=columns_list), columns
    else:
        return pd.DataFrame(data=train_set, columns=columns_list), None, columns

In [8]:
def unit_cycle_info(df, compute_cycle_len=False):
    unit_ids = np.unique(df['unit'])
    print('Engine units in df: ', unit_ids)
    for i in unit_ids:
        num_cycles = len(np.unique(df.loc[df['unit'] == i, 'cycle']))
        print('Unit: ', i, ' - Number of flight cycles: ', num_cycles)
        
    if compute_cycle_len:
        cycle_ids = np.unique(df['cycle'])
        print('Total number of cycles: ', len(cycle_ids))
        min_len = np.inf
        max_len = 0
        for i in cycle_ids:
            cycle_len = len(df.loc[df['cycle'] == i, 'cycle'])
            if cycle_len < min_len:
                min_len = cycle_len
            elif cycle_len > max_len:
                max_len = cycle_len
        print('Min cycle length: ', min_len)
        print('Max cycle length: ', max_len)
    
    return unit_ids

In [9]:
# Filter constant and quasi-constant features
def get_quasi_constant_features(dataset, variance_th=0.01, debug=True):
    constant_filter = VarianceThreshold(threshold=variance_th)
    constant_filter.fit(dataset)
    constant_features = [col for col in dataset.columns 
                         if col not in dataset.columns[constant_filter.get_support()]]
    
    if debug:
        print("Number of non-constant features: ", len(dataset.columns[constant_filter.get_support()]))
        
        print("Number of quasi-constant features: ", len(constant_features))
        print("Quasi-constant features: ")
        for col in constant_features:
            print(col)
    return constant_features

def get_non_correlated_features(dataset, corr_th=0.9, debug=True):
    corr_mat = dataset.corr()
    corr_mat = np.abs(corr_mat)
    
    num_cols = corr_mat.shape[0]
    columns = np.full((num_cols,), True, dtype=bool)
    for i in range(num_cols):
        for j in range(i+1, num_cols):
            val = corr_mat.iloc[i, j]
            if val >= corr_th:
                if columns[j]:
                    columns[j] = False
                    if debug:
                        print(dataset.columns[i], "|", dataset.columns[j], "|", round(val, 2))
    if debug:        
        correlated_features = dataset.columns[~columns]
        print("Number of correlated features: ", len(correlated_features))
        print("Correlated features: ", list(correlated_features))
    
    selected_columns = dataset.columns[columns]
    if debug:
        print("Number of selected features: ", len(selected_columns))
        print("Selected features: ", list(selected_columns))
    return selected_columns

def cmapss_score_function(actual, predictions, normalize=True):
    # diff < 0 -> over-estimation
    # diff > 0 -> under-estimation
    diff = actual - predictions
    alpha = np.full_like(diff, 1/13)
    negative_diff_mask = diff < 0
    alpha[negative_diff_mask] = 1/10
    score = np.sum(np.exp(alpha * np.abs(diff)))
    
    if normalize:
        N = len(predictions)
        score /= N
    return score

def compute_evaluation_metrics(actual, predictions, label='Test'):
    mse = mean_squared_error(actual, predictions)
    rmse = np.sqrt(mse)
    cmapss_score = cmapss_score_function(actual, predictions)
    print('{} set:\nMSE: {:.2f}\nRMSE: {:.2f}\nCMAPSS score: {:.2f}\n'.format(label, mse, rmse, 
                                                                     cmapss_score))
    return mse, rmse, cmapss_score
    
def plot_loss_curves(history, output_path=None, y_lim=[0, 150]):
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.ylim(y_lim)
    plt.legend(['train', 'validation'], loc='upper left')
    
    if output_path is not None:
        plt.savefig(os.path.join(output_path, 'loss_curves.png'), format='png', dpi=300) 
    plt.show()
    
def plot_rul(expected, predicted):
    plt.figure()
    plt.plot(range(len(expected)), expected, label='Expected')
    plt.plot(range(len(predicted)), predicted, label='Predicted')
    plt.legend()
    
    
def create_mlp_model(input_dim, hidden_layer_sizes, activation='relu', output_weights_file=None):
    model = Sequential()
    model.add(Dense(hidden_layer_sizes[0], 
                    input_dim=input_dim, 
                    kernel_initializer='random_normal', 
                    activation=activation))

    for layer_size in hidden_layer_sizes[1:]:
        model.add(Dense(layer_size, 
                        kernel_initializer='random_normal', 
                        activation=activation))
    
    model.add(Dense(1, kernel_initializer='random_normal'))
    
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    if output_weights_file is not None:
        model.save_weights(output_weights_file)
    return model

def train_model_existing_weights(model, weights_file, x_train, y_train, x_val, y_val, epochs=200, batch_size=512, callbacks=[]):
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.load_weights(weights_file)
    return model.fit(x_train, y_train,
                     validation_data=(x_val, y_val),
                     epochs=epochs,
                     batch_size=batch_size,
                     verbose=1,
                     callbacks=callbacks)

def save_history(history, output_file=os.path.join(output_path, "history.pkl")):
    with open(output_file, 'wb') as file:
        pickle.dump(history.history, file)
    print("Saved training history to file: {}".format(output_file))

def load_history(file):
    return pickle.load(open(file, "rb"))

def save_object(obj, output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(obj, file)
    print("Saved object to file: {}".format(output_file))
    
def load_object(file):
    return pickle.load(open(file, "rb"))

def model_evaluation(model, x_test, y_test, x_train=None, y_train=None, plot_range=[0, 10**3]):
    if x_train is not None and y_train is not None:
        predictions_train = model.predict(x_train).flatten()
        compute_evaluation_metrics(predictions_train, y_train, 'Train')
        
        expected = y_train[plot_range[0]:plot_range[1]]
        predicted = predictions_train[plot_range[0]:plot_range[1]]
        plot_rul(expected, predicted)
        
    predictions_test = model.predict(x_test).flatten()
    compute_evaluation_metrics(predictions_test, y_test)
    
    expected = y_test[plot_range[0]:plot_range[1]]
    predicted = predictions_test[plot_range[0]:plot_range[1]]
    plot_rul(expected, predicted)

In [10]:
def write_list(string_list, output_file):
    output_file.write("[")
    n = len(string_list)
    for i in range(n - 1):
        output_file.write("{}, ".format(string_list[i]))
    output_file.write("{}]\n".format(string_list[-1]))
    
def feature_list_to_string(feature_list):
    return "__".join(feature_list)

def numbers_list_to_string(num_list):
    return " ".join([str(x) for x in num_list])

In [11]:
start_time = time.process_time()  
train_set, test_set, columns = load_dataset(filename)
print("Operation time (sec): " , (time.process_time() - start_time))
print()
print("Train set shape: " + str(train_set.shape))

columns_aux = columns[0] 
columns_health_params = columns[1] 
columns_sensor_measurements = columns[2] 
columns_virtual_sensors = columns[3]
columns_operating_conditions = columns[4] 
target_col = columns[5]

Operation time (sec):  3.5

Train set shape: (5263447, 47)


In [12]:
y_train = train_set['RUL']
x_train = train_set.drop(['RUL'], axis=1)

y_test = test_set['RUL']
x_test = test_set.drop(['RUL'], axis=1)

In [13]:
x_train.drop(labels=[x for x in columns_aux if x in x_train.columns], axis=1, inplace=True)

constant_features = get_quasi_constant_features(x_train, variance_th=0.0)
x_train.drop(labels=constant_features, axis=1, inplace=True)
print("Train shape: ", x_train.shape)

Number of non-constant features:  35
Number of quasi-constant features:  7
Quasi-constant features: 
fan_eff_mod
fan_flow_mod
LPC_eff_mod
LPC_flow_mod
HPC_eff_mod
HPC_flow_mod
HPT_flow_mod
Train shape:  (5263447, 35)


In [14]:
x_train, x_holdout, y_train, y_holdout = train_test_split(x_train, y_train, test_size=0.3, random_state=seed)

In [15]:
######################################################
# Test effect of correlation threshold + health params
######################################################
NUM_TRIALS = 3

batch_size = 512
epochs = 200
layer_sizes = [256, 256, 512, 64]

initial_columns = x_train.columns
# corr_th_list = [None, 0.99, 0.95, 0.9]
corr_th_list = [0.95, 0.9]

results_file = os.path.join(output_path, "results_corr_th_health_params.csv")
with open(results_file, "w") as file:
    file.write("corr_th,selected_features,num_features,mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")


for corr_th in corr_th_list:
    # Select features based on training set
    if corr_th is not None:
        selected_columns = get_non_correlated_features(x_train, corr_th=corr_th, debug=False)
    else:
        selected_columns = x_train.columns
    
    x_train_feature_selection = x_train[selected_columns]
    
    mse_vals = []
    rmse_vals = []
    cmapss_vals = []
    
    for random_seed in range(NUM_TRIALS):
        # Train-validation split for early stopping
        x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train_feature_selection, 
                                                                                  y_train, 
                                                                                  test_size=0.1, 
                                                                                  random_state=random_seed)
        # Create output path
        results_folder = "results_all" if corr_th is None else "results_{}".format(corr_th)
        results_path_crr_th = os.path.join(output_path, results_folder)
        results_path_crr_split = os.path.join(results_path_crr_th, "split_{}".format(random_seed))
        if not os.path.exists(results_path_crr_split):
            os.makedirs(results_path_crr_split)

        # Standardization
        scaler_file = os.path.join(results_path_crr_split, 'scaler.pkl')
        scaler = StandardScaler()
        x_train_scaled = scaler.fit_transform(x_train_split)
        x_val_scaled = scaler.transform(x_val_split)
        input_dim = x_train_scaled.shape[1]
        save_object(scaler, scaler_file)

        # Create model
        weights_file = os.path.join(results_path_crr_th, 'mlp_initial_weights.h5')
        model_path = os.path.join(results_path_crr_split, 'mlp_model_trained.h5')
        
        # Save initial weights
        if random_seed == 0:
            model = create_mlp_model(input_dim, layer_sizes, activation='tanh',
                                     output_weights_file=weights_file)
        else:
            model = create_mlp_model(input_dim, layer_sizes, activation='tanh')
        model.summary()

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
        mc = ModelCheckpoint(model_path, monitor='val_loss', mode='min', verbose=2, 
                             save_best_only=True)

        # Train model
        history = train_model_existing_weights(model, weights_file, 
                                               x_train_scaled, y_train_split, 
                                               x_val_scaled, y_val_split, 
                                               batch_size=batch_size, 
                                               epochs=epochs, 
                                               callbacks=[es, mc])

        history_file = os.path.join(results_path_crr_split, "history.pkl")
        save_history(history, history_file)

        # Performance evaluation
        x_holdout_feature_selection = x_holdout[selected_columns]
        x_holdout_scaled = scaler.transform(x_holdout_feature_selection)

        loaded_model = load_model(model_path)
        predictions_holdout = loaded_model.predict(x_holdout_scaled).flatten()
        mse, rmse, cmapss_score = compute_evaluation_metrics(predictions_holdout, y_holdout)
        
        mse_vals.append(mse)
        rmse_vals.append(rmse)
        cmapss_vals.append(cmapss_score)
    
    mse_mean = np.mean(mse_vals)
    mse_std = np.std(mse_vals)
    rmse_mean = np.mean(rmse_vals)
    rmse_std = np.std(rmse_vals)
    cmapss_mean = np.mean(cmapss_vals)
    cmapss_std = np.std(cmapss_vals)
    
    with open(results_file, "a") as file:
        th = "all" if corr_th is None else corr_th
        
        file.write(f"{th}, {feature_list_to_string(selected_columns)}, {len(selected_columns)}, {numbers_list_to_string(mse_vals)}, {numbers_list_to_string(rmse_vals)}, {numbers_list_to_string(cmapss_vals)}, {mse_mean}, {mse_std}, {rmse_mean}, {rmse_std}, {cmapss_mean}, {cmapss_std}\n")

Saved object to file: DS02/experiment_set_9\results_0.95\split_0\scaler.pkl
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               2816      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 233,089
Trainable params: 233,089
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 00001: val_loss improv

Epoch 28/200
Epoch 00028: val_loss did not improve from 1.23016
Epoch 29/200
Epoch 00029: val_loss did not improve from 1.23016
Epoch 30/200
Epoch 00030: val_loss did not improve from 1.23016
Epoch 31/200
Epoch 00031: val_loss did not improve from 1.23016
Epoch 32/200
Epoch 00032: val_loss did not improve from 1.23016
Epoch 33/200
Epoch 00033: val_loss did not improve from 1.23016
Epoch 34/200
Epoch 00034: val_loss did not improve from 1.23016
Epoch 35/200
Epoch 00035: val_loss did not improve from 1.23016
Epoch 36/200
Epoch 00036: val_loss did not improve from 1.23016
Epoch 37/200
Epoch 00037: val_loss did not improve from 1.23016
Epoch 00037: early stopping
Saved training history to file: DS02/experiment_set_9\results_0.95\split_0\history.pkl
Test set:
MSE: 1.24
RMSE: 1.11
CMAPSS score: 1.05

Saved object to file: DS02/experiment_set_9\results_0.95\split_1\scaler.pkl
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 O

Epoch 18/200
Epoch 00018: val_loss improved from 1.84312 to 1.74409, saving model to DS02/experiment_set_9\results_0.95\split_1\mlp_model_trained.h5
Epoch 19/200
Epoch 00019: val_loss improved from 1.74409 to 1.55508, saving model to DS02/experiment_set_9\results_0.95\split_1\mlp_model_trained.h5
Epoch 20/200
Epoch 00020: val_loss did not improve from 1.55508
Epoch 21/200
Epoch 00021: val_loss improved from 1.55508 to 1.47513, saving model to DS02/experiment_set_9\results_0.95\split_1\mlp_model_trained.h5
Epoch 22/200
Epoch 00022: val_loss did not improve from 1.47513
Epoch 23/200
Epoch 00023: val_loss did not improve from 1.47513
Epoch 24/200
Epoch 00024: val_loss did not improve from 1.47513
Epoch 25/200
Epoch 00025: val_loss improved from 1.47513 to 1.28984, saving model to DS02/experiment_set_9\results_0.95\split_1\mlp_model_trained.h5
Epoch 26/200
Epoch 00026: val_loss did not improve from 1.28984
Epoch 27/200
Epoch 00027: val_loss did not improve from 1.28984
Epoch 28/200
Epoch 0

Epoch 51/200
Epoch 00051: val_loss did not improve from 0.95739
Epoch 52/200
Epoch 00052: val_loss did not improve from 0.95739
Epoch 53/200
Epoch 00053: val_loss did not improve from 0.95739
Epoch 54/200
Epoch 00054: val_loss did not improve from 0.95739
Epoch 55/200
Epoch 00055: val_loss did not improve from 0.95739
Epoch 56/200
Epoch 00056: val_loss improved from 0.95739 to 0.80224, saving model to DS02/experiment_set_9\results_0.95\split_1\mlp_model_trained.h5
Epoch 57/200
Epoch 00057: val_loss did not improve from 0.80224
Epoch 58/200
Epoch 00058: val_loss did not improve from 0.80224
Epoch 59/200
Epoch 00059: val_loss did not improve from 0.80224
Epoch 60/200
Epoch 00060: val_loss did not improve from 0.80224
Epoch 61/200
Epoch 00061: val_loss did not improve from 0.80224
Epoch 62/200
Epoch 00062: val_loss did not improve from 0.80224
Epoch 63/200
Epoch 00063: val_loss did not improve from 0.80224
Epoch 64/200
Epoch 00064: val_loss did not improve from 0.80224
Epoch 65/200
Epoch 

Epoch 12/200
Epoch 00012: val_loss improved from 2.80598 to 2.36036, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 13/200
Epoch 00013: val_loss improved from 2.36036 to 2.06704, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 14/200
Epoch 00014: val_loss did not improve from 2.06704
Epoch 15/200
Epoch 00015: val_loss did not improve from 2.06704
Epoch 16/200
Epoch 00016: val_loss improved from 2.06704 to 2.01306, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 17/200
Epoch 00017: val_loss did not improve from 2.01306
Epoch 18/200
Epoch 00018: val_loss did not improve from 2.01306
Epoch 19/200
Epoch 00019: val_loss improved from 2.01306 to 1.95523, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 20/200
Epoch 00020: val_loss improved from 1.95523 to 1.84638, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5

Epoch 45/200
Epoch 00045: val_loss did not improve from 1.04944
Epoch 46/200
Epoch 00046: val_loss did not improve from 1.04944
Epoch 47/200
Epoch 00047: val_loss did not improve from 1.04944
Epoch 48/200
Epoch 00048: val_loss improved from 1.04944 to 1.04364, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 49/200
Epoch 00049: val_loss did not improve from 1.04364
Epoch 50/200
Epoch 00050: val_loss did not improve from 1.04364
Epoch 51/200
Epoch 00051: val_loss did not improve from 1.04364
Epoch 52/200
Epoch 00052: val_loss did not improve from 1.04364
Epoch 53/200
Epoch 00053: val_loss improved from 1.04364 to 0.87547, saving model to DS02/experiment_set_9\results_0.95\split_2\mlp_model_trained.h5
Epoch 54/200
Epoch 00054: val_loss did not improve from 0.87547
Epoch 55/200
Epoch 00055: val_loss did not improve from 0.87547
Epoch 56/200
Epoch 00056: val_loss did not improve from 0.87547
Epoch 57/200
Epoch 00057: val_loss did not improve from 0.8754

Epoch 00009: val_loss improved from 3.16722 to 3.10204, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 10/200
Epoch 00010: val_loss did not improve from 3.10204
Epoch 11/200
Epoch 00011: val_loss improved from 3.10204 to 2.93472, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 12/200
Epoch 00012: val_loss improved from 2.93472 to 2.44055, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 13/200
Epoch 00013: val_loss did not improve from 2.44055
Epoch 14/200
Epoch 00014: val_loss improved from 2.44055 to 2.40120, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 15/200
Epoch 00015: val_loss improved from 2.40120 to 2.31717, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 16/200
Epoch 00016: val_loss did not improve from 2.31717
Epoch 17/200
Epoch 00017: val_loss improved from 2.31717 to 2.16260, saving model

Epoch 41/200
Epoch 00041: val_loss did not improve from 1.31421
Epoch 42/200
Epoch 00042: val_loss did not improve from 1.31421
Epoch 43/200
Epoch 00043: val_loss did not improve from 1.31421
Epoch 44/200
Epoch 00044: val_loss did not improve from 1.31421
Epoch 45/200
Epoch 00045: val_loss improved from 1.31421 to 1.24733, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 46/200
Epoch 00046: val_loss did not improve from 1.24733
Epoch 47/200
Epoch 00047: val_loss did not improve from 1.24733
Epoch 48/200
Epoch 00048: val_loss did not improve from 1.24733
Epoch 49/200
Epoch 00049: val_loss did not improve from 1.24733
Epoch 50/200
Epoch 00050: val_loss improved from 1.24733 to 1.00316, saving model to DS02/experiment_set_9\results_0.9\split_0\mlp_model_trained.h5
Epoch 51/200
Epoch 00051: val_loss did not improve from 1.00316
Epoch 52/200
Epoch 00052: val_loss did not improve from 1.00316
Epoch 53/200
Epoch 00053: val_loss improved from 1.00316 to 0.99

Epoch 4/200
Epoch 00004: val_loss improved from 6.52382 to 6.48881, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 5/200
Epoch 00005: val_loss improved from 6.48881 to 4.25866, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 6/200
Epoch 00006: val_loss improved from 4.25866 to 3.77151, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 7/200
Epoch 00007: val_loss improved from 3.77151 to 3.34171, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 8/200
Epoch 00008: val_loss improved from 3.34171 to 2.98191, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 9/200
Epoch 00009: val_loss did not improve from 2.98191
Epoch 10/200
Epoch 00010: val_loss did not improve from 2.98191
Epoch 11/200
Epoch 00011: val_loss did not improve from 2.98191
Epoch 12/200
Epoch 00012: val_loss improved from 2.98191 to 2.79978, savin

Epoch 35/200
Epoch 00035: val_loss did not improve from 1.18587
Epoch 36/200
Epoch 00036: val_loss improved from 1.18587 to 1.13014, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 37/200
Epoch 00037: val_loss did not improve from 1.13014
Epoch 38/200
Epoch 00038: val_loss did not improve from 1.13014
Epoch 39/200
Epoch 00039: val_loss did not improve from 1.13014
Epoch 40/200
Epoch 00040: val_loss did not improve from 1.13014
Epoch 41/200
Epoch 00041: val_loss did not improve from 1.13014
Epoch 42/200
Epoch 00042: val_loss did not improve from 1.13014
Epoch 43/200
Epoch 00043: val_loss did not improve from 1.13014
Epoch 44/200
Epoch 00044: val_loss did not improve from 1.13014
Epoch 45/200
Epoch 00045: val_loss improved from 1.13014 to 0.93527, saving model to DS02/experiment_set_9\results_0.9\split_1\mlp_model_trained.h5
Epoch 46/200
Epoch 00046: val_loss did not improve from 0.93527
Epoch 47/200
Epoch 00047: val_loss improved from 0.93527 to 0.89

Epoch 6/200
Epoch 00006: val_loss did not improve from 5.19884
Epoch 7/200
Epoch 00007: val_loss improved from 5.19884 to 4.92447, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 8/200
Epoch 00008: val_loss improved from 4.92447 to 3.36219, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 9/200
Epoch 00009: val_loss did not improve from 3.36219
Epoch 10/200
Epoch 00010: val_loss improved from 3.36219 to 2.88589, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 11/200
Epoch 00011: val_loss did not improve from 2.88589
Epoch 12/200
Epoch 00012: val_loss improved from 2.88589 to 2.76981, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 13/200
Epoch 00013: val_loss improved from 2.76981 to 2.50019, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 14/200
Epoch 00014: val_loss improved from 2.50019 to 2.46100, sav

Epoch 39/200
Epoch 00039: val_loss did not improve from 1.29959
Epoch 40/200
Epoch 00040: val_loss did not improve from 1.29959
Epoch 41/200
Epoch 00041: val_loss improved from 1.29959 to 1.23430, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 42/200
Epoch 00042: val_loss did not improve from 1.23430
Epoch 43/200
Epoch 00043: val_loss did not improve from 1.23430
Epoch 44/200
Epoch 00044: val_loss did not improve from 1.23430
Epoch 45/200
Epoch 00045: val_loss did not improve from 1.23430
Epoch 46/200
Epoch 00046: val_loss improved from 1.23430 to 1.20560, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 47/200
Epoch 00047: val_loss did not improve from 1.20560
Epoch 48/200
Epoch 00048: val_loss did not improve from 1.20560
Epoch 49/200
Epoch 00049: val_loss improved from 1.20560 to 1.06831, saving model to DS02/experiment_set_9\results_0.9\split_2\mlp_model_trained.h5
Epoch 50/200
Epoch 00050: val_loss did not i

Epoch 73/200
Epoch 00073: val_loss did not improve from 0.85018
Epoch 74/200
Epoch 00074: val_loss did not improve from 0.85018
Epoch 75/200
Epoch 00075: val_loss did not improve from 0.85018
Epoch 76/200
Epoch 00076: val_loss did not improve from 0.85018
Epoch 77/200
Epoch 00077: val_loss did not improve from 0.85018
Epoch 78/200
Epoch 00078: val_loss did not improve from 0.85018
Epoch 79/200
Epoch 00079: val_loss did not improve from 0.85018
Epoch 80/200
Epoch 00080: val_loss did not improve from 0.85018
Epoch 81/200
Epoch 00081: val_loss did not improve from 0.85018
Epoch 00081: early stopping
Saved training history to file: DS02/experiment_set_9\results_0.9\split_2\history.pkl
Test set:
MSE: 0.87
RMSE: 0.93
CMAPSS score: 1.04



In [16]:
######################################################
# Results on test set
######################################################
NUM_TRIALS = 3

initial_columns = x_train.columns
# corr_th_list = [None, 0.99, 0.95, 0.9]
corr_th_list = [0.95, 0.9]

results_file = os.path.join(output_path, "results_corr_th_health_params_test_set.csv")
with open(results_file, "w") as file:
    file.write("corr_th,selected_features,num_features,mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")


for corr_th in corr_th_list:
    # Select features based on training set
    if corr_th is not None:
        selected_columns = get_non_correlated_features(x_train, corr_th=corr_th, debug=False)
    else:
        selected_columns = x_train.columns
    
    mse_vals = []
    rmse_vals = []
    cmapss_vals = []
    
    for random_seed in range(NUM_TRIALS):
        results_folder = "results_all" if corr_th is None else "results_{}".format(corr_th)
        results_path_crr_th = os.path.join(output_path, results_folder)
        results_path_crr_split = os.path.join(results_path_crr_th, "split_{}".format(random_seed))
        
        scaler_file = os.path.join(results_path_crr_split, 'scaler.pkl')
        scaler = load_object(scaler_file)

        model_path = os.path.join(results_path_crr_split, 'mlp_model_trained.h5')
        
        # Performance evaluation
        x_test_feature_selection = x_test[selected_columns]
        x_test_scaled = scaler.transform(x_test_feature_selection)

        loaded_model = load_model(model_path)
        predictions_test = loaded_model.predict(x_test_scaled).flatten()
        mse, rmse, cmapss_score = compute_evaluation_metrics(predictions_test, y_test)
        
        mse_vals.append(mse)
        rmse_vals.append(rmse)
        cmapss_vals.append(cmapss_score)
    
    mse_mean = np.mean(mse_vals)
    mse_std = np.std(mse_vals)
    rmse_mean = np.mean(rmse_vals)
    rmse_std = np.std(rmse_vals)
    cmapss_mean = np.mean(cmapss_vals)
    cmapss_std = np.std(cmapss_vals)
    
    with open(results_file, "a") as file:
        th = "all" if corr_th is None else corr_th
        
        file.write(f"{th}, {feature_list_to_string(selected_columns)}, {len(selected_columns)}, {numbers_list_to_string(mse_vals)}, {numbers_list_to_string(rmse_vals)}, {numbers_list_to_string(cmapss_vals)}, {mse_mean}, {mse_std}, {rmse_mean}, {rmse_std}, {cmapss_mean}, {cmapss_std}\n")

Test set:
MSE: 31.89
RMSE: 5.65
CMAPSS score: 1.52

Test set:
MSE: 32.34
RMSE: 5.69
CMAPSS score: 1.53

Test set:
MSE: 38.99
RMSE: 6.24
CMAPSS score: 1.59

Test set:
MSE: 28.58
RMSE: 5.35
CMAPSS score: 1.52

Test set:
MSE: 36.26
RMSE: 6.02
CMAPSS score: 1.58

Test set:
MSE: 37.41
RMSE: 6.12
CMAPSS score: 1.59

