In [None]:
import os
import sys

seed = 0
os.environ['PYTHONHASSEED'] = str(seed)

In [None]:
import numpy as np
import pandas as pd
import time
import random

from tensorflow.random import set_seed

from sklearn.model_selection import train_test_split

from keras.models import load_model

In [None]:
# Add modules path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from src.data_reader import DataReader
from src.dataset_exploration import unit_cycle_info
from src.feature_engineering import add_time_lags
from src.metrics import compute_evaluation_metrics
from src.model_evaluation import evaluate_mlp, evaluate_mlp_multiple_splits
from src.plotting import plot_loss_curves
from src.save_object import load_object
from src.training import train_evaluate_mlp, MLPConfigParams
from src.utils import numbers_list_to_string

In [None]:
# Set seed
random.seed(seed)
np.random.seed(seed)
set_seed(seed)

In [None]:
# Set input and output paths
filename = '../data/N-CMAPSS_DS02-006.h5'
output_path = '../results/experiment_set_18'

if not os.path.exists(output_path):
    os.makedirs(output_path)

In [None]:
def preprocess_data(train_set, lags_list, selected_columns):
    train_set_lagged = add_time_lags(train_set, lags_list, selected_columns)
    x_train = train_set_lagged[[i for i in list(train_set_lagged.columns) if i != 'RUL']]
    y_train = train_set_lagged['RUL']
    return x_train, y_train

In [None]:
data_reader = DataReader()

start_time = time.process_time()  
data_reader.load_dataset(filename, load_train=True, load_test=True)
print("Operation time (sec): " , (time.process_time() - start_time))

if data_reader.train_set is not None:
    print("Train set shape: " + str(data_reader.train_set.shape))
    
if data_reader.test_set is not None:   
    print("Test set shape: " + str(data_reader.test_set.shape))

In [None]:
train_set = data_reader.train_set

In [None]:
selected_columns = data_reader.column_names.w_cols + data_reader.column_names.x_s_cols
selected_columns_with_unit = selected_columns + ['unit']
train_set = train_set[selected_columns_with_unit + ['RUL']]

In [None]:
train_set = train_set.astype(np.float32)

In [None]:
#########################################
# Effect of time lags (experiment set 18)
#########################################
NUM_TRIALS = 3
EPOCHS = 15
BATCH_SIZE = 512

mlp_params = MLPConfigParams(layer_sizes=[128, 256, 64], activation='tanh', dropout=0.05)
lags_lists = [[1, 2, 3, 4, 5], [1, 5, 10, 15], [5, 10, 15, 20], [10, 15, 20, 25], [10, 20, 30, 40]]

results_file = os.path.join(output_path, "results_time_lags.csv")
with open(results_file, "w") as file:
    file.write("time_lags,mse,rmse,cmapss,mse(mean),mse(std),rmse(mean),rmse(std),cmapss(mean),cmapss(std)\n")

for idx, lags in enumerate(lags_lists):
    results_folder = f"results_lags_{idx}"
    results_path = os.path.join(output_path, results_folder)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    
    with open(results_file, "a") as file:
        file.write(f"{numbers_list_to_string(lags)},")
    
    # Add time lags
    x_train, y_train = preprocess_data(train_set, lags, selected_columns) 
    
    # Validation split
    x_train_split, x_holdout_split, y_train_split, y_holdout_split = train_test_split(
        x_train, 
        y_train, 
        test_size=0.3, 
        random_state=seed)
    
    mse_vals, rmse_vals, cmapss_vals = train_evaluate_mlp(
        x_train_split, y_train_split, 
        x_holdout_split, y_holdout_split,
        NUM_TRIALS, 
        mlp_params, 
        results_path, 
        EPOCHS, BATCH_SIZE,
        results_file=results_file)