In [1]:
import os
seed = 0
os.environ['PYTHONHASSEED'] = str(seed)

In [2]:
# Disable GPU
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random
import pickle

from tensorflow.random import set_seed

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import mutual_info_regression

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model

In [4]:
# Set seed
random.seed(seed)
np.random.seed(seed)
set_seed(seed)

In [5]:
filename = '../data/turbofan_dataset/N-CMAPSS_DS02-006.h5'
output_path = 'DS02/experiment_set_11/corr'

In [6]:
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [7]:
def load_dataset(filename, load_test_set=True):
    """ Reads a dataset from a given .h5 file and compose (in memory) the train and test data. 
    Args:
        filename(str): path to the .h5 file
    Returns:
        train_set(pd.DataFrame), test_set(pd.DataFrame)
    """
    with h5py.File(filename, 'r') as hdf:
        # Development set
        W_dev = np.array(hdf.get('W_dev'))             # W
        X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
        X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
        T_dev = np.array(hdf.get('T_dev'))             # T
        Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
        A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

        # Test set
        if load_test_set:
            W_test = np.array(hdf.get('W_test'))           # W
            X_s_test = np.array(hdf.get('X_s_test'))       # X_s
            X_v_test = np.array(hdf.get('X_v_test'))       # X_v
            T_test = np.array(hdf.get('T_test'))           # T
            Y_test = np.array(hdf.get('Y_test'))           # RUL  
            A_test = np.array(hdf.get('A_test'))           # Auxiliary
        
        # Column names
        W_var = np.array(hdf.get('W_var'))
        X_s_var = np.array(hdf.get('X_s_var'))  
        X_v_var = np.array(hdf.get('X_v_var')) 
        T_var = np.array(hdf.get('T_var'))
        A_var = np.array(hdf.get('A_var'))
        
        columns = []
        columns.append(list(np.array(A_var, dtype='U20')))
        columns.append(list(np.array(T_var, dtype='U20')))
        columns.append(list(np.array(X_s_var, dtype='U20')))
        columns.append(list(np.array(X_v_var, dtype='U20')))
        columns.append(list(np.array(W_var, dtype='U20')))
        columns.append(['RUL'])
        
        columns_list = []
        for columns_per_category in columns:
            columns_list += columns_per_category
        
    train_set = np.concatenate((A_dev, T_dev, X_s_dev, X_v_dev, W_dev, Y_dev), axis=1)
    if load_test_set:
        test_set = np.concatenate((A_test, T_test, X_s_test, X_v_test, W_test, Y_test), axis=1)
        return pd.DataFrame(data=train_set, columns=columns_list), pd.DataFrame(data=test_set, columns=columns_list), columns
    else:
        return pd.DataFrame(data=train_set, columns=columns_list), None, columns

In [8]:
def unit_cycle_info(df, compute_cycle_len=False):
    unit_ids = np.unique(df['unit'])
    print('Engine units in df: ', unit_ids)
    for i in unit_ids:
        num_cycles = len(np.unique(df.loc[df['unit'] == i, 'cycle']))
        print('Unit: ', i, ' - Number of flight cycles: ', num_cycles)
        
    if compute_cycle_len:
        cycle_ids = np.unique(df['cycle'])
        print('Total number of cycles: ', len(cycle_ids))
        min_len = np.inf
        max_len = 0
        for i in cycle_ids:
            cycle_len = len(df.loc[df['cycle'] == i, 'cycle'])
            if cycle_len < min_len:
                min_len = cycle_len
            elif cycle_len > max_len:
                max_len = cycle_len
        print('Min cycle length: ', min_len)
        print('Max cycle length: ', max_len)
    
    return unit_ids

In [9]:
# Filter constant and quasi-constant features
def get_quasi_constant_features(dataset, variance_th=0.01, debug=True):
    constant_filter = VarianceThreshold(threshold=variance_th)
    constant_filter.fit(dataset)
    constant_features = [col for col in dataset.columns 
                         if col not in dataset.columns[constant_filter.get_support()]]
    
    if debug:
        print("Number of non-constant features: ", len(dataset.columns[constant_filter.get_support()]))
        
        print("Number of quasi-constant features: ", len(constant_features))
        print("Quasi-constant features: ")
        for col in constant_features:
            print(col)
    return constant_features

def get_non_correlated_features(dataset, corr_th=0.9, debug=True):
    corr_mat = dataset.corr()
    corr_mat = np.abs(corr_mat)
    
    num_cols = corr_mat.shape[0]
    columns = np.full((num_cols,), True, dtype=bool)
    for i in range(num_cols):
        for j in range(i+1, num_cols):
            val = corr_mat.iloc[i, j]
            if val >= corr_th:
                if columns[j]:
                    columns[j] = False
                    if debug:
                        print(dataset.columns[i], "|", dataset.columns[j], "|", round(val, 2))
    if debug:        
        correlated_features = dataset.columns[~columns]
        print("Number of correlated features: ", len(correlated_features))
        print("Correlated features: ", list(correlated_features))
    
    selected_columns = dataset.columns[columns]
    if debug:
        print("Number of selected features: ", len(selected_columns))
        print("Selected features: ", list(selected_columns))
    return selected_columns

def cmapss_score_function(actual, predictions, normalize=True):
    # diff < 0 -> over-estimation
    # diff > 0 -> under-estimation
    diff = actual - predictions
    alpha = np.full_like(diff, 1/13)
    negative_diff_mask = diff < 0
    alpha[negative_diff_mask] = 1/10
    score = np.sum(np.exp(alpha * np.abs(diff)))
    
    if normalize:
        N = len(predictions)
        score /= N
    return score

def compute_evaluation_metrics(actual, predictions, label='Test'):
    mse = mean_squared_error(actual, predictions)
    rmse = np.sqrt(mse)
    cmapss_score = cmapss_score_function(actual, predictions)
    print('{} set:\nMSE: {:.2f}\nRMSE: {:.2f}\nCMAPSS score: {:.2f}\n'.format(label, mse, rmse, 
                                                                     cmapss_score))
    return mse, rmse, cmapss_score
    
def plot_loss_curves(history, output_path=None, y_lim=[0, 150]):
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.ylim(y_lim)
    plt.legend(['train', 'validation'], loc='upper left')
    
    if output_path is not None:
        plt.savefig(os.path.join(output_path, 'loss_curves.png'), format='png', dpi=300) 
    plt.show()
    
def plot_rul(expected, predicted):
    plt.figure()
    plt.plot(range(len(expected)), expected, label='Expected')
    plt.plot(range(len(predicted)), predicted, label='Predicted')
    plt.legend()
    
    
def create_mlp_model(input_dim, hidden_layer_sizes, activation='relu', output_weights_file=None):
    model = Sequential()
    model.add(Dense(hidden_layer_sizes[0], 
                    input_dim=input_dim, 
                    kernel_initializer='random_normal', 
                    activation=activation))

    for layer_size in hidden_layer_sizes[1:]:
        model.add(Dense(layer_size, 
                        kernel_initializer='random_normal', 
                        activation=activation))
    
    model.add(Dense(1, kernel_initializer='random_normal'))
    
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    if output_weights_file is not None:
        model.save_weights(output_weights_file)
    return model

def train_model_existing_weights(model, weights_file, x_train, y_train, x_val, y_val, epochs=200, batch_size=512, callbacks=[]):
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.load_weights(weights_file)
    return model.fit(x_train, y_train,
                     validation_data=(x_val, y_val),
                     epochs=epochs,
                     batch_size=batch_size,
                     verbose=1,
                     callbacks=callbacks)

def save_history(history, output_file=os.path.join(output_path, "history.pkl")):
    with open(output_file, 'wb') as file:
        pickle.dump(history.history, file)
    print("Saved training history to file: {}".format(output_file))

def load_history(file):
    return pickle.load(open(file, "rb"))

def save_object(obj, output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(obj, file)
    print("Saved object to file: {}".format(output_file))
    
def load_object(file):
    return pickle.load(open(file, "rb"))

def model_evaluation(model, x_test, y_test, x_train=None, y_train=None, plot_range=[0, 10**3]):
    if x_train is not None and y_train is not None:
        predictions_train = model.predict(x_train).flatten()
        compute_evaluation_metrics(predictions_train, y_train, 'Train')
        
        expected = y_train[plot_range[0]:plot_range[1]]
        predicted = predictions_train[plot_range[0]:plot_range[1]]
        plot_rul(expected, predicted)
        
    predictions_test = model.predict(x_test).flatten()
    compute_evaluation_metrics(predictions_test, y_test)
    
    expected = y_test[plot_range[0]:plot_range[1]]
    predicted = predictions_test[plot_range[0]:plot_range[1]]
    plot_rul(expected, predicted)

In [10]:
def write_list(string_list, output_file):
    output_file.write("[")
    n = len(string_list)
    for i in range(n - 1):
        output_file.write("{}, ".format(string_list[i]))
    output_file.write("{}]\n".format(string_list[-1]))
    
def feature_list_to_string(feature_list):
    return "__".join(feature_list)

def numbers_list_to_string(num_list):
    return " ".join([str(x) for x in num_list])

In [11]:
def get_mi_ranked_features(mi, n):
    mi_sorted = mi.sort_values(by="MI", ascending=False)
    return mi[:n]["Col"].tolist()

def time_window_processing_old(X, y, window_size, stride=1, drop_nan=True):
    df = X
    lag_min = stride
    lag_max = stride + window_size
    
    shifted_cols = {}
    for lag in range(lag_min, lag_max):
        cols = df.groupby("unit").shift(lag)
        shifted_cols[lag] = cols
    
    for lag, cols in shifted_cols.items():
        df = df.join(cols.rename(columns=lambda x: x + "_(t-{})".format(lag)))
    
    df = df.join(y)
    
    if drop_nan:
        df.dropna(inplace=True)
    df.drop(labels=['unit'], axis=1, inplace=True)
    
    feature_columns = [col for col in df.columns.values if col != y.name]    
    return df[feature_columns], df[y.name]

def time_window_processing(X, y, window_size, stride=1):
    df = X
    
    # Create lagged version for each column
    shifted_cols = {}
    for lag in range(window_size):
        cols = df.groupby("unit").shift(lag)
        shifted_cols[lag] = cols
    
    # Add lag columns to initial dataframe
    for lag, cols in shifted_cols.items():
        df = df.join(cols.rename(columns=lambda x: x + "_(t-{})".format(lag)))
    
    # Add output column
    df = df.join(y)
    
    # Drop rows with NaN and the unit column
    df.dropna(inplace=True)
    df.drop(labels=['unit'], axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # Remove overlapping windows with overlap less than stride
    if stride > 1:
        df = df[df.index % (stride + 1) == 0]
    
    feature_columns = [col for col in df.columns.values if col != y.name]    
    return df[feature_columns], df[y.name]

In [12]:
start_time = time.process_time()  
train_set, _, columns = load_dataset(filename, load_test_set=False)
print("Operation time (sec): " , (time.process_time() - start_time))
print("Train set shape: " + str(train_set.shape))

columns_aux = columns[0] 
columns_health_params = columns[1] 
columns_sensor_measurements = columns[2] 
columns_virtual_sensors = columns[3]
columns_operating_conditions = columns[4] 
target_col = columns[5]

Operation time (sec):  3.25
Train set shape: (5263447, 47)


In [13]:
y_train = train_set['RUL']
x_train = train_set.drop(['RUL'], axis=1)

In [14]:
#mutual_info_series = pd.read_csv(os.path.join(output_path, "mutual_info.csv"))
#selected_columns = get_mi_ranked_features(mutual_info_series, 10)
selected_columns = ['T24', 'T30', 'P15', 'SmFan', 'SmLPC']
selected_columns += ['unit']
x_train = x_train[selected_columns]

In [15]:
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)

In [16]:
x_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5263447 entries, 0 to 5263446
Data columns (total 6 columns):
 #   Column  Dtype  
---  ------  -----  
 0   T24     float32
 1   T30     float32
 2   P15     float32
 3   SmFan   float32
 4   SmLPC   float32
 5   unit    float32
dtypes: float32(6)
memory usage: 120.5 MB


In [17]:
x_train_tw, y_train_tw = time_window_processing(x_train, y_train, 10, 2)

In [18]:
x_train_tw

Unnamed: 0,T24,T30,P15,SmFan,SmLPC,T24_(t-0),T30_(t-0),P15_(t-0),SmFan_(t-0),SmLPC_(t-0),...,T24_(t-8),T30_(t-8),P15_(t-8),SmFan_(t-8),SmLPC_(t-8),T24_(t-9),T30_(t-9),P15_(t-9),SmFan_(t-9),SmLPC_(t-9)
0,600.111267,1438.689209,15.785221,16.642193,9.903013,600.111267,1438.689209,15.785221,16.642193,9.903013,...,600.055908,1438.350220,15.795477,16.639221,9.904926,600.148010,1438.498169,15.806267,16.648832,9.898130
3,600.142395,1438.692017,15.783556,16.658705,9.895153,600.142395,1438.692017,15.783556,16.658705,9.895153,...,600.298218,1439.063965,15.807512,16.649031,9.897465,600.369690,1439.240234,15.816360,16.653812,9.905518
6,600.082153,1438.551758,15.774821,16.660770,9.895080,600.082153,1438.551758,15.774821,16.660770,9.895080,...,600.241394,1438.947021,15.800091,16.654341,9.901053,600.153015,1438.802368,15.792666,16.639463,9.905753
9,599.990112,1438.344604,15.761504,16.660002,9.894007,599.990112,1438.344604,15.761504,16.660002,9.894007,...,600.205200,1438.843628,15.793442,16.660341,9.898022,600.111267,1438.689209,15.785221,16.642193,9.903013
12,600.022766,1438.354736,15.762060,16.676348,9.888162,600.022766,1438.354736,15.762060,16.676348,9.888162,...,600.119446,1438.638794,15.780783,16.658916,9.895327,600.142395,1438.692017,15.783556,16.658705,9.895153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5263380,567.638062,1285.592407,14.283421,20.427475,6.839921,567.638062,1285.592407,14.283421,20.427475,6.839921,...,567.418640,1285.076538,14.251586,20.444849,6.835948,567.386108,1285.004272,14.247150,20.445320,6.836019
5263383,567.729065,1285.798218,14.296276,20.422405,6.840520,567.729065,1285.798218,14.296276,20.422405,6.840520,...,567.494568,1285.264526,14.263375,20.434511,6.838491,567.462219,1285.195557,14.258837,20.436394,6.838428
5263386,567.778870,1285.943604,14.305333,20.408386,6.844467,567.778870,1285.943604,14.305333,20.408386,6.844467,...,567.592468,1285.474731,14.276195,20.434193,6.838289,567.560303,1285.404419,14.272107,20.433979,6.838126
5263389,567.942627,1287.089600,14.322270,20.402950,6.878830,567.942627,1287.089600,14.322270,20.402950,6.878830,...,567.689453,1285.684814,14.289452,20.431725,6.838095,567.638062,1285.592407,14.283421,20.427475,6.839921


In [19]:
x_train_tw.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1754465 entries, 0 to 5263392
Data columns (total 55 columns):
 #   Column       Dtype  
---  ------       -----  
 0   T24          float32
 1   T30          float32
 2   P15          float32
 3   SmFan        float32
 4   SmLPC        float32
 5   T24_(t-0)    float32
 6   T30_(t-0)    float32
 7   P15_(t-0)    float32
 8   SmFan_(t-0)  float32
 9   SmLPC_(t-0)  float32
 10  T24_(t-1)    float32
 11  T30_(t-1)    float32
 12  P15_(t-1)    float32
 13  SmFan_(t-1)  float32
 14  SmLPC_(t-1)  float32
 15  T24_(t-2)    float32
 16  T30_(t-2)    float32
 17  P15_(t-2)    float32
 18  SmFan_(t-2)  float32
 19  SmLPC_(t-2)  float32
 20  T24_(t-3)    float32
 21  T30_(t-3)    float32
 22  P15_(t-3)    float32
 23  SmFan_(t-3)  float32
 24  SmLPC_(t-3)  float32
 25  T24_(t-4)    float32
 26  T30_(t-4)    float32
 27  P15_(t-4)    float32
 28  SmFan_(t-4)  float32
 29  SmLPC_(t-4)  float32
 30  T24_(t-5)    float32
 31  T30_(t-5)    float32
 32

In [20]:
###########################################
# Test effect of window size
###########################################
NUM_TRIALS = 1

batch_size = 512
epochs = 200
layer_sizes = [256, 256, 512, 64]

window_size_list = [10]

results_file = os.path.join(output_path, "results_time_window.csv")
with open(results_file, "w") as file:
    file.write("window_size,mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")


for i in range(len(window_size_list)):
    # Time window processing
    window_size = window_size_list[i]
    stride = window_size // 2
    # stride = 1
    
    x_train_tw, y_train_tw = time_window_processing(x_train, y_train, window_size, stride)
    
    # Train-validation split
    x_train_tw, x_holdout, y_train_tw, y_holdout = train_test_split(x_train_tw, 
                                                                    y_train_tw, 
                                                                    test_size=0.3, 
                                                                    random_state=seed)
    
    mse_vals = []
    rmse_vals = []
    cmapss_vals = []
    
    for random_seed in range(NUM_TRIALS):
        # Train-validation split for early stopping
        x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train_tw, 
                                                                                  y_train_tw, 
                                                                                  test_size=0.1, 
                                                                                  random_state=random_seed)
        # Create output path
        results_folder ="results_{}".format(window_size)
        results_path_crr = os.path.join(output_path, results_folder)
        results_path_crr_split = os.path.join(results_path_crr, "split_{}".format(random_seed))
        if not os.path.exists(results_path_crr_split):
            os.makedirs(results_path_crr_split)

        # Standardization
        scaler_file = os.path.join(results_path_crr_split, 'scaler.pkl')
        scaler = StandardScaler()
        x_train_scaled = scaler.fit_transform(x_train_split)
        x_val_scaled = scaler.transform(x_val_split)
        input_dim = x_train_scaled.shape[1]
        save_object(scaler, scaler_file)

        # Create model
        weights_file = os.path.join(results_path_crr, 'mlp_initial_weights.h5')
        model_path = os.path.join(results_path_crr_split, 'mlp_model_trained.h5')
        
        # Save initial weights
        if random_seed == 0:
            model = create_mlp_model(input_dim, layer_sizes, activation='tanh',
                                     output_weights_file=weights_file)
        else:
            model = create_mlp_model(input_dim, layer_sizes, activation='tanh')
        model.summary()

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
        mc = ModelCheckpoint(model_path, monitor='val_loss', mode='min', verbose=2, 
                             save_best_only=True)

        # Train model
        history = train_model_existing_weights(model, weights_file, 
                                               x_train_scaled, y_train_split, 
                                               x_val_scaled, y_val_split, 
                                               batch_size=batch_size, 
                                               epochs=epochs, 
                                               callbacks=[es, mc])

        history_file = os.path.join(results_path_crr_split, "history.pkl")
        save_history(history, history_file)

        # Performance evaluation
        x_holdout_scaled = scaler.transform(x_holdout)

        loaded_model = load_model(model_path)
        predictions_holdout = loaded_model.predict(x_holdout_scaled).flatten()
        mse, rmse, cmapss_score = compute_evaluation_metrics(predictions_holdout, y_holdout)
        
        mse_vals.append(mse)
        rmse_vals.append(rmse)
        cmapss_vals.append(cmapss_score)
    
    mse_mean = np.mean(mse_vals)
    mse_std = np.std(mse_vals)
    rmse_mean = np.mean(rmse_vals)
    rmse_std = np.std(rmse_vals)
    cmapss_mean = np.mean(cmapss_vals)
    cmapss_std = np.std(cmapss_vals)
    
    with open(results_file, "a") as file:
        file.write(f"{window_size}, {numbers_list_to_string(mse_vals)}, {numbers_list_to_string(rmse_vals)}, {numbers_list_to_string(cmapss_vals)}, {mse_mean}, {mse_std}, {rmse_mean}, {rmse_std}, {cmapss_mean}, {cmapss_std}\n")

Saved object to file: DS02/experiment_set_11/corr\results_10\split_0\scaler.pkl
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               14336     
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 244,609
Trainable params: 244,609
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 00001: val_loss im

Epoch 00027: val_loss did not improve from 46.58361
Epoch 28/200
Epoch 00028: val_loss did not improve from 46.58361
Epoch 29/200
Epoch 00029: val_loss did not improve from 46.58361
Epoch 30/200
Epoch 00030: val_loss did not improve from 46.58361
Epoch 31/200
Epoch 00031: val_loss improved from 46.58361 to 44.39869, saving model to DS02/experiment_set_11/corr\results_10\split_0\mlp_model_trained.h5
Epoch 32/200
Epoch 00032: val_loss improved from 44.39869 to 41.76421, saving model to DS02/experiment_set_11/corr\results_10\split_0\mlp_model_trained.h5
Epoch 33/200
Epoch 00033: val_loss did not improve from 41.76421
Epoch 34/200
Epoch 00034: val_loss did not improve from 41.76421
Epoch 35/200
Epoch 00035: val_loss did not improve from 41.76421
Epoch 36/200
Epoch 00036: val_loss did not improve from 41.76421
Epoch 37/200
Epoch 00037: val_loss did not improve from 41.76421
Epoch 38/200
Epoch 00038: val_loss did not improve from 41.76421
Epoch 39/200
Epoch 00039: val_loss did not improve fr

In [21]:
train_set, test_set, columns = load_dataset(filename, load_test_set=True)
y_test = test_set['RUL']
x_test = test_set.drop(['RUL'], axis=1)

# mutual_info_series = pd.read_csv(os.path.join(output_path, "mutual_info.csv"))
# selected_columns = get_mi_ranked_features(mutual_info_series, 10) 
selected_columns = ['T24', 'T30', 'P15', 'SmFan', 'SmLPC']
selected_columns += ['unit']
x_test = x_test[selected_columns]

x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.float32)

In [22]:
##############################
# Final evaluation on test set
##############################
NUM_TRIALS = 1

batch_size = 512
epochs = 200
layer_sizes = [256, 256, 512, 64]

window_size_list = [10]

results_file = os.path.join(output_path, "results_time_window_test_set.csv")
with open(results_file, "w") as file:
    file.write("window_size,mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")


for i in range(len(window_size_list)):
    window_size = window_size_list[i]
    stride = window_size // 2
    x_test_tw, y_test = time_window_processing(x_test, y_test, window_size, stride)
    
    mse_vals = []
    rmse_vals = []
    cmapss_vals = []
    
    for random_seed in range(NUM_TRIALS):
        results_folder ="results_{}".format(window_size)
        results_path_crr = os.path.join(output_path, results_folder)
        results_path_crr_split = os.path.join(results_path_crr, "split_{}".format(random_seed))
        
        scaler_file = os.path.join(results_path_crr_split, 'scaler.pkl')
        scaler = load_object(scaler_file)

        model_path = os.path.join(results_path_crr_split, 'mlp_model_trained.h5')
        
        # Performance evaluation
        x_test_scaled = scaler.transform(x_test_tw)

        loaded_model = load_model(model_path)
        predictions_test = loaded_model.predict(x_test_scaled).flatten()
        mse, rmse, cmapss_score = compute_evaluation_metrics(predictions_test, y_test)
        
        mse_vals.append(mse)
        rmse_vals.append(rmse)
        cmapss_vals.append(cmapss_score)
    
    mse_mean = np.mean(mse_vals)
    mse_std = np.std(mse_vals)
    rmse_mean = np.mean(rmse_vals)
    rmse_std = np.std(rmse_vals)
    cmapss_mean = np.mean(cmapss_vals)
    cmapss_std = np.std(cmapss_vals)
    
    with open(results_file, "a") as file:
        file.write(f"{window_size}, {numbers_list_to_string(mse_vals)}, {numbers_list_to_string(rmse_vals)}, {numbers_list_to_string(cmapss_vals)}, {mse_mean}, {mse_std}, {rmse_mean}, {rmse_std}, {cmapss_mean}, {cmapss_std}\n")

Test set:
MSE: 56.06
RMSE: 7.49
CMAPSS score: 1.72

