In [1]:
import os
import sys

seed = 0
os.environ['PYTHONHASSEED'] = str(seed)

In [2]:
import numpy as np
import pandas as pd
import time
import random

from tensorflow.random import set_seed

from sklearn.model_selection import train_test_split

from keras.models import load_model

In [3]:
# Add modules path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from src.data_reader import DataReader
from src.feature_extraction import get_principal_components, dimensionality_reduction
from src.metrics import compute_evaluation_metrics
from src.model_evaluation import evaluate_mlp, evaluate_mlp_multiple_splits
from src.plotting import plot_loss_curves
from src.save_object import load_object
from src.training import MLPConfigParams

In [5]:
# Set seed
random.seed(seed)
np.random.seed(seed)
set_seed(seed)

In [6]:
# Set input and output paths
filename = '../data/N-CMAPSS_DS02-006.h5'
output_path = '../results/experiment_set_18'

if not os.path.exists(output_path):
    os.makedirs(output_path)

In [7]:
def train_evaluate_mlp_pca(x_train, y_train,
                           x_test, y_test,
                           num_trials,
                           mlp_config_params,
                           results_path,
                           epochs, batch_size, results_file=None):
    mse_vals = []
    rmse_vals = []
    cmapss_vals = []
    
    for trial_num in range(num_trials):
        # Train-validation split for early stopping
        x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train,
                                                                                  y_train,
                                                                                  test_size=0.1,
                                                                                  random_state=trial_num)
        # Create output path
        results_path_crr_split = os.path.join(results_path, f"split_{trial_num}")
        if not os.path.exists(results_path_crr_split):
            os.makedirs(results_path_crr_split)
    
        # Standardization
        scaler_file = os.path.join(results_path_crr_split, 'scaler.pkl')
        scaler = StandardScaler()
        x_train_scaled = scaler.fit_transform(x_train_split)
        x_val_scaled = scaler.transform(x_val_split)
        x_test_scaled = scaler.transform(x_test)
        save_object(scaler, scaler_file)
    
        # PCA
        pca = get_principal_components(x_train_scaled, debug=False)
        pca_file = os.path.join(results_path_crr_split, 'pca.pkl')
        save_object(pca, pca_file)
    
        x_train_final = dimensionality_reduction(x_train_scaled, pca)
        x_val_final = dimensionality_reduction(x_val_scaled, pca)
        x_test_final = dimensionality_reduction(x_test_scaled, pca)
        input_dim = x_train_final.shape[1]
    
        # Create model
        weights_file = os.path.join(results_path, 'mlp_initial_weights.h5')
        model_path = os.path.join(results_path_crr_split, 'mlp_model_trained.h5')
    
        # Save initial weights
        if trial_num == 0:
            model = create_mlp_model(input_dim,
                                     hidden_layer_sizes=mlp_config_params.layer_sizes,
                                     activation=mlp_config_params.activation,
                                     dropout=mlp_config_params.dropout,
                                     output_weights_file=weights_file)
        else:
            model = create_mlp_model(input_dim,
                                     hidden_layer_sizes=mlp_config_params.layer_sizes,
                                     activation=mlp_config_params.activation,
                                     dropout=mlp_config_params.dropout)
        model.summary()
    
        # Train model
        history = train_mlp(model,
                            x_train_scaled, y_train_split,
                            x_val_scaled, y_val_split,
                            weights_file=weights_file,
                            batch_size=batch_size,
                            epochs=epochs,
                            callbacks=get_callbacks(model_path))
    
        history_file = os.path.join(results_path_crr_split, f"history_{trial_num}.pkl")
        plot_loss_curves(history.history)
        save_object(history.history, history_file)
    
        # Performance evaluation
        loaded_model = load_model(model_path)
        predictions_test = loaded_model.predict(x_test_final).flatten()
        mse, rmse, cmapss_score = compute_evaluation_metrics(predictions_test, y_test)
    
        mse_vals.append(mse)
        rmse_vals.append(rmse)
        cmapss_vals.append(cmapss_score)
    
    mse_mean = np.mean(mse_vals)
    mse_std = np.std(mse_vals)
    rmse_mean = np.mean(rmse_vals)
    rmse_std = np.std(rmse_vals)
    cmapss_mean = np.mean(cmapss_vals)
    cmapss_std = np.std(cmapss_vals)

    if results_file is not None:
        with open(results_file, "a") as file:
            line_to_write = f"{numbers_list_to_string(mse_vals)}, {numbers_list_to_string(rmse_vals)},"
            line_to_write += f"{numbers_list_to_string(cmapss_vals)}, {mse_mean}, {mse_std}, {rmse_mean},"
            line_to_write += f"{rmse_std}, {cmapss_mean}, {cmapss_std}\n"
            file.write(line_to_write)
    
    print("MSE: mean = {:.2f}   stddev = {:.2f}".format(mse_mean, mse_std))
    print("RMSE: mean = {:.2f}   stddev = {:.2f}".format(rmse_mean, rmse_std))
    print("CMAPSS: mean = {:.2f}   stddev = {:.2f}".format(cmapss_mean, cmapss_std))

SyntaxError: invalid syntax (<ipython-input-7-4657cbf55546>, line 7)

In [None]:
data_reader = DataReader()

start_time = time.process_time()  
data_reader.load_dataset(filename, load_train=True, load_test=True)
print("Operation time (sec): " , (time.process_time() - start_time))

if data_reader.train_set is not None:
    print("Train set shape: " + str(data_reader.train_set.shape))
    
if data_reader.test_set is not None:   
    print("Test set shape: " + str(data_reader.test_set.shape))

In [None]:
train_set = data_reader.train_set
test_set = data_reader.test_set

In [None]:
y_train = train_set['RUL']
x_train = train_set.drop(['RUL'], axis=1)

In [8]:
selected_columns = data_reader.column_names.w_cols + data_reader.column_names.x_s_cols
x_train = x_train[selected_columns]

y_test = test_set['RUL']
x_test = test_set[x_train.columns]

NameError: name 'data_reader' is not defined

In [None]:
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)

x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.float32)

In [19]:
###########################################
# Test effect of PCA for feature extraction
###########################################
NUM_TRIALS = 3
EPOCHS = 60
BATCH_SIZE = 512

mlp_config_params = MLPConfigParams(layer_sizes=[256, 256, 512, 64], activation='tanh', dropout=0.0)

results_path = os.path.join(output_path, "results_pca_a1")
if not os.path.exists(results_path):
    os.makedirs(results_path)

results_file = os.path.join(output_path, "results_pca_a1.csv")
with open(results_file, "w") as file:
    file.write("mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")

train_evaluate_pca(
    x_train, y_train, 
    x_test, y_test,
    NUM_TRIALS, 
    mlp_params, 
    results_path, 
    EPOCHS, BATCH_SIZE,
    results_file=results_file)

Saved object to file: DS02/experiment_set_6\results_pca\split_0\scaler.pkl
Saved object to file: DS02/experiment_set_6\results_pca\split_0\pca.pkl
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               1792      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 232,065
Trainable params: 232,065
Non-trainable params: 0
___________________________________

Epoch 27/200
Epoch 00027: val_loss improved from 1.93020 to 1.89174, saving model to DS02/experiment_set_6\results_pca\split_0\mlp_model_trained.h5
Epoch 28/200
Epoch 00028: val_loss did not improve from 1.89174
Epoch 29/200
Epoch 00029: val_loss did not improve from 1.89174
Epoch 30/200
Epoch 00030: val_loss did not improve from 1.89174
Epoch 31/200
Epoch 00031: val_loss did not improve from 1.89174
Epoch 32/200
Epoch 00032: val_loss improved from 1.89174 to 1.82247, saving model to DS02/experiment_set_6\results_pca\split_0\mlp_model_trained.h5
Epoch 33/200
Epoch 00033: val_loss did not improve from 1.82247
Epoch 34/200
Epoch 00034: val_loss improved from 1.82247 to 1.75825, saving model to DS02/experiment_set_6\results_pca\split_0\mlp_model_trained.h5
Epoch 35/200
Epoch 00035: val_loss did not improve from 1.75825
Epoch 36/200
Epoch 00036: val_loss did not improve from 1.75825
Epoch 37/200
Epoch 00037: val_loss improved from 1.75825 to 1.74347, saving model to DS02/experiment_set_6\r

Epoch 00009: val_loss did not improve from 3.42666
Epoch 10/200
Epoch 00010: val_loss did not improve from 3.42666
Epoch 11/200
Epoch 00011: val_loss improved from 3.42666 to 3.28600, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 12/200
Epoch 00012: val_loss improved from 3.28600 to 3.25010, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 13/200
Epoch 00013: val_loss did not improve from 3.25010
Epoch 14/200
Epoch 00014: val_loss improved from 3.25010 to 2.68919, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 15/200
Epoch 00015: val_loss did not improve from 2.68919
Epoch 16/200
Epoch 00016: val_loss improved from 2.68919 to 2.26263, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 17/200
Epoch 00017: val_loss did not improve from 2.26263
Epoch 18/200
Epoch 00018: val_loss did not improve from 2.26263
Epoch 19/200
Epoch 00019: val_loss di

Epoch 42/200
Epoch 00042: val_loss improved from 1.58039 to 1.51682, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 43/200
Epoch 00043: val_loss did not improve from 1.51682
Epoch 44/200
Epoch 00044: val_loss did not improve from 1.51682
Epoch 45/200
Epoch 00045: val_loss did not improve from 1.51682
Epoch 46/200
Epoch 00046: val_loss did not improve from 1.51682
Epoch 47/200
Epoch 00047: val_loss did not improve from 1.51682
Epoch 48/200
Epoch 00048: val_loss did not improve from 1.51682
Epoch 49/200
Epoch 00049: val_loss did not improve from 1.51682
Epoch 50/200
Epoch 00050: val_loss did not improve from 1.51682
Epoch 51/200
Epoch 00051: val_loss did not improve from 1.51682
Epoch 52/200
Epoch 00052: val_loss improved from 1.51682 to 1.49611, saving model to DS02/experiment_set_6\results_pca\split_1\mlp_model_trained.h5
Epoch 53/200
Epoch 00053: val_loss did not improve from 1.49611
Epoch 54/200
Epoch 00054: val_loss improved from 1.49611 to 1.47

Epoch 6/200
Epoch 00006: val_loss improved from 3.94004 to 3.82042, saving model to DS02/experiment_set_6\results_pca\split_2\mlp_model_trained.h5
Epoch 7/200
Epoch 00007: val_loss improved from 3.82042 to 3.26827, saving model to DS02/experiment_set_6\results_pca\split_2\mlp_model_trained.h5
Epoch 8/200
Epoch 00008: val_loss did not improve from 3.26827
Epoch 9/200
Epoch 00009: val_loss did not improve from 3.26827
Epoch 10/200
Epoch 00010: val_loss improved from 3.26827 to 2.95126, saving model to DS02/experiment_set_6\results_pca\split_2\mlp_model_trained.h5
Epoch 11/200
Epoch 00011: val_loss did not improve from 2.95126
Epoch 12/200
Epoch 00012: val_loss improved from 2.95126 to 2.68940, saving model to DS02/experiment_set_6\results_pca\split_2\mlp_model_trained.h5
Epoch 13/200
Epoch 00013: val_loss did not improve from 2.68940
Epoch 14/200
Epoch 00014: val_loss did not improve from 2.68940
Epoch 15/200
Epoch 00015: val_loss improved from 2.68940 to 2.37209, saving model to DS02/ex

Saved object to file: DS02/experiment_set_6\results_pca\split_3\pca.pkl
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 256)               1792      
_________________________________________________________________
dense_16 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_17 (Dense)             (None, 512)               131584    
_________________________________________________________________
dense_18 (Dense)             (None, 64)                32832     
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 65        
Total params: 232,065
Trainable params: 232,065
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 00001: val_loss improved

Epoch 00012: val_loss did not improve from 2.56991
Epoch 13/200
Epoch 00013: val_loss did not improve from 2.56991
Epoch 14/200
Epoch 00014: val_loss improved from 2.56991 to 2.56137, saving model to DS02/experiment_set_6\results_pca\split_4\mlp_model_trained.h5
Epoch 15/200
Epoch 00015: val_loss did not improve from 2.56137
Epoch 16/200
Epoch 00016: val_loss improved from 2.56137 to 2.20523, saving model to DS02/experiment_set_6\results_pca\split_4\mlp_model_trained.h5
Epoch 17/200
Epoch 00017: val_loss improved from 2.20523 to 2.15376, saving model to DS02/experiment_set_6\results_pca\split_4\mlp_model_trained.h5
Epoch 18/200
Epoch 00018: val_loss improved from 2.15376 to 2.06571, saving model to DS02/experiment_set_6\results_pca\split_4\mlp_model_trained.h5
Epoch 19/200
Epoch 00019: val_loss improved from 2.06571 to 2.01537, saving model to DS02/experiment_set_6\results_pca\split_4\mlp_model_trained.h5
Epoch 20/200
Epoch 00020: val_loss did not improve from 2.01537
Epoch 21/200
Epoc

In [None]:
NUM_TRIALS = 3
EPOCHS = 60
BATCH_SIZE = 512

mlp_params = MLPConfigParams(layer_sizes=[128, 256, 64], activation='tanh', dropout=0.05)

results_path = os.path.join(output_path, "results_pca_a2")
if not os.path.exists(results_path):
    os.makedirs(results_path)

results_file = os.path.join(output_path, "results_pca_a2.csv")
with open(results_file, "w") as file:
    file.write("mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")

train_evaluate_pca(
    x_train, y_train, 
    x_test, y_test,
    NUM_TRIALS, 
    mlp_params, 
    results_path, 
    EPOCHS, BATCH_SIZE,
    results_file=results_file)