## Reference: https://github.com/LahiruJayasinghe/RUL-Net

In [5]:
!pip install tensorflow==1.15.5



In [6]:
!pip install "scikit-learn==0.19.1"

Collecting scikit-learn==0.19.1
  Downloading scikit-learn-0.19.1.tar.gz (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 12.0 MB/s 
[?25hBuilding wheels for collected packages: scikit-learn
  Building wheel for scikit-learn (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for scikit-learn[0m
[?25h  Running setup.py clean for scikit-learn
Failed to build scikit-learn
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.2
    Uninstalling scikit-learn-1.0.2:
      Successfully uninstalled scikit-learn-1.0.2
    Running setup.py install for scikit-learn ... [?25l[?25herror
  Rolling back uninstall of scikit-learn
  Moving to /usr/local/lib/python3.7/dist-packages/scikit_learn-1.0.2.dist-info/
   from /usr/local/lib/python3.7/dist-packages/~cikit_learn-1.0.2.dist-info
  Moving to /usr/local/lib/python3.7/dist-packages/scikit_learn.libs/
   from /usr/local/lib/python3.7/dist-packages/

In [7]:
!pip install process-data

Collecting process-data
  Downloading process_data-0.3-py3-none-any.whl (3.5 kB)
Installing collected packages: process-data
Successfully installed process-data-0.3


## utils_laj.py

In [8]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
# from data_processing import MAXLIFE

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [9]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/fourthbrain/capstone_code/')

Mounted at /content/drive


In [10]:
def dense_layer(x, size,activation_fn, batch_norm = False,phase=False, drop_out=False, keep_prob=None, scope="fc_layer"):
    """
    Helper function to create a fully connected layer with or without batch normalization or dropout regularization

    :param x: previous layer
    :param size: fully connected layer size
    :param activation_fn: activation function
    :param batch_norm: bool to set batch normalization
    :param phase: if batch normalization is set, then phase variable is to mention the 'training' and 'testing' phases
    :param drop_out: bool to set drop-out regularization
    :param keep_prob: if drop-out is set, then to mention the keep probability of dropout
    :param scope: variable scope name
    :return: fully connected layer
    """
    with tf.variable_scope(scope):
        if batch_norm:
            dence_layer = tf.contrib.layers.fully_connected(x, size, activation_fn=None)
            dence_layer_bn = BatchNorm(name="batch_norm_" + scope)(dence_layer, train=phase)
            return_layer = activation_fn(dence_layer_bn)
        else:
            return_layer = tf.layers.dense(x, size,
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           activation=activation_fn)
        if drop_out:
            return_layer = tf.nn.dropout(return_layer, keep_prob)

        return return_layer


def get_RNNCell(cell_types, keep_prob, state_size, build_with_dropout=True):
    """
    Helper function to get a different types of RNN cells with or without dropout wrapper
    :param cell_types: cell_type can be 'GRU' or 'LSTM' or 'LSTM_LN' or 'GLSTMCell' or 'LSTM_BF' or 'None'
    :param keep_prob: dropout keeping probability
    :param state_size: number of cells in a layer
    :param build_with_dropout: to enable the dropout for rnn layers
    :return:
    """
    cells = []
    for cell_type in cell_types:
        if cell_type == 'GRU':
            cell = tf.contrib.rnn.GRUCell(num_units=state_size,
                                          bias_initializer=tf.zeros_initializer())  # Or GRU(num_units)
        elif cell_type == 'LSTM':
            cell = tf.contrib.rnn.LSTMCell(num_units=state_size, use_peepholes=True, state_is_tuple=True,
                                           initializer=tf.contrib.layers.xavier_initializer())
        elif cell_type == 'LSTM_LN':
            cell = tf.contrib.rnn.LayerNormBasicLSTMCell(state_size)
        elif cell_type == 'GLSTMCell':
            cell = tf.contrib.rnn.GLSTMCell(num_units=state_size, initializer=tf.contrib.layers.xavier_initializer())
        elif cell_type == 'LSTM_BF':
            cell = tf.contrib.rnn.LSTMBlockFusedCell(num_units=state_size, use_peephole=True)
        else:
            cell = tf.nn.rnn_cell.BasicRNNCell(state_size)

        if build_with_dropout:
            cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
        cells.append(cell)

    cell = tf.contrib.rnn.MultiRNNCell(cells)

    if build_with_dropout:
        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)

    return cell


class BatchNorm(object):
    """
    usage : dence_layer_bn = BatchNorm(name="batch_norm_" + scope)(previous_layer, train=is_train)
    """
    def __init__(self, epsilon=1e-5, momentum=0.999, name="batch_norm"):
        with tf.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name

    def __call__(self, x, train=True):
        return tf.contrib.layers.batch_norm(x,
                                            decay=self.momentum,
                                            updates_collections=None,
                                            epsilon=self.epsilon,
                                            scale=True,
                                            is_training=train,
                                            scope=self.name)


def batch_generator(x_train, y_train, batch_size, sequence_length, online=False, online_shift=1):
    """
    Generator function for creating random batches of training-data for many to many models
    """
    num_x_sensors = x_train.shape[1]
    num_train = x_train.shape[0]
    idx = 0

    # Infinite loop.
    while True:
        # Allocate a new array for the batch of input-signals.
        x_shape = (batch_size, sequence_length, num_x_sensors)
        x_batch = np.zeros(shape=x_shape, dtype=np.float32)
        # print(idx)
        # Allocate a new array for the batch of output-signals.
        y_shape = (batch_size, sequence_length)
        y_batch = np.zeros(shape=y_shape, dtype=np.float32)

        # Fill the batch with random sequences of data.
        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            if online == True and (idx >= num_train or (idx + sequence_length) > num_train):
                idx = 0
            elif online == False:
                idx = np.random.randint(num_train - sequence_length)

            # Copy the sequences of data starting at this index.
            x_batch[i] = x_train[idx:idx + sequence_length]
            y_batch[i] = y_train[idx:idx + sequence_length]
            # print(i,idx)
            if online:
                idx = idx + online_shift  # check if its nee to be idx=idx+1
                # print(idx)
        # print(idx)
        yield (x_batch, y_batch)


def trjectory_generator(x_train, y_train, test_engine_id, sequence_length, graph_batch_size, lower_bound):
    """
    Extract training trjectories one by one
    test_engine_id = [11111111...,22222222....,...]
    """
    DEBUG = False
    num_x_sensors = x_train.shape[1]
    idx = 0
    engine_ids = test_engine_id.unique()
    if DEBUG: print("total trjectories: ", len(engine_ids))

    while True:
        for id in engine_ids:

            indexes = test_engine_id[test_engine_id == id].index
            training_data = x_train[indexes]
            if DEBUG: print("engine_id: ", id, "start", indexes[0], "end", indexes[-1], "trjectory_len:", len(indexes))
            batch_size = int(training_data.shape[0] / sequence_length) + 1
            idx = indexes[0]

            x_batch = np.zeros(shape=(batch_size, sequence_length, num_x_sensors), dtype=np.float32)
            y_batch = np.zeros(shape=(batch_size, sequence_length), dtype=np.float32)

            for i in range(batch_size):

                # Copy the sequences of data starting at this index.
                if DEBUG: print("current idx=", idx)
                if idx >= x_train.shape[0]:
                    if DEBUG: print("BREAK")
                    break
                elif (idx + sequence_length) > x_train.shape[0]:
                    if DEBUG: print("BREAK", idx, x_train.shape[0], idx + sequence_length - x_train.shape[0])
                    x_tmp = x_train[idx:]
                    y_tmp = y_train[idx:]
                    remain = idx + sequence_length - x_train.shape[0]
                    x_batch[i] = np.concatenate((x_tmp, x_train[0:remain]))
                    y_batch[i] = np.concatenate((y_tmp, y_train[0:remain]))
                    break

                x_batch[i] = x_train[idx:idx + sequence_length]

                if idx > indexes[-1] - sequence_length:
                    y_tmp = np.copy(y_train[idx:idx + sequence_length])
                    remain = sequence_length - (indexes[-1] - idx + 1)  # abs(training_data.shape[0]-sequence_length)
                    if DEBUG: print("(idx + sequence_length) > trj_len:", "remain", remain)
                    y_tmp[-remain:] = lower_bound
                    y_batch[i] = y_tmp
                else:
                    y_batch[i] = y_train[idx:idx + sequence_length]

                idx = idx + sequence_length

            batch_size_gap = graph_batch_size - x_batch.shape[0]
            if batch_size_gap > 0:
                for i in range(batch_size_gap):
                    x_tmp = -0.01 * np.ones(shape=(sequence_length, num_x_sensors), dtype=np.float32)
                    y_tmp = -0.01 * np.ones(shape=(sequence_length), dtype=np.float32)
                    xx = np.append(x_batch, x_tmp)
                    x_batch = np.reshape(xx, [x_batch.shape[0] + 1, x_batch.shape[1], x_batch.shape[2]])
                    yy = np.append(y_batch, y_tmp)
                    y_batch = np.reshape(yy, [y_batch.shape[0] + 1, x_batch.shape[1]])
            yield (x_batch, y_batch)


def plot_data(data, label=""):
    """
    Plot every plot on top of each other
    """
    from matplotlib import pyplot as plt
    if type(data) is list:
        for x in data:
            plt.plot(x, label=label)
    else:
        plt.plot(data, label=label)
    plt.show()


def model_summary(learning_rate,batch_size,lstm_layers,lstm_layer_size,fc_layer_size,sequence_length,n_channels,path_checkpoint,spacial_note=''):
    path_checkpoint=path_checkpoint + ".txt"
    if not os.path.exists(os.path.dirname(path_checkpoint)):
        os.makedirs(os.path.dirname(path_checkpoint))

    with open(path_checkpoint, "w") as text_file:
        variables = tf.trainable_variables()

        print('---------', file=text_file)
        print(path_checkpoint, file=text_file)
        print(spacial_note, file=text_file)
        print('---------', '\n', file=text_file)

        print('---------', file=text_file)
        print('MAXLIFE: ', MAXLIFE,'\n',  file=text_file)
        print('learning_rate: ', learning_rate, file=text_file)
        print('batch_size: ', batch_size, file=text_file)
        print('lstm_layers: ', lstm_layers, file=text_file)
        print('lstm_layer_size: ', lstm_layer_size, file=text_file)
        print('fc_layer_size: ', fc_layer_size, '\n', file=text_file)
        print('sequence_length: ', sequence_length, file=text_file)
        print('n_channels: ', n_channels, file=text_file)
        print('---------', '\n', file=text_file)

        print('---------', file=text_file)
        print('Variables: name (type shape) [size]', file=text_file)
        print('---------', '\n', file=text_file)
        total_size = 0
        total_bytes = 0
        for var in variables:
            # if var.num_elements() is None or [] assume size 0.
            var_size = var.get_shape().num_elements() or 0
            var_bytes = var_size * var.dtype.size
            total_size += var_size
            total_bytes += var_bytes
            print(var.name, slim.model_analyzer.tensor_description(var), '[%d, bytes: %d]' %
                      (var_size, var_bytes), file=text_file)

        print('\nTotal size of variables: %d' % total_size, file=text_file)
        print('Total bytes of variables: %d' % total_bytes, file=text_file)


def scoring_func(error_arr):
    '''

    :param error_arr: a list of errors for each training trajectory
    :return: standered score value for RUL
    '''
    import math
    # print(error_arr)
    pos_error_arr = error_arr[error_arr >= 0]
    neg_error_arr = error_arr[error_arr < 0]

    score = 0
    # print(neg_error_arr)
    for error in neg_error_arr:
        score = math.exp(-(error / 13)) - 1 + score
        # print(math.exp(-(error / 13)),score,error)

    # print(pos_error_arr)
    for error in pos_error_arr:
        score = math.exp(error / 10) - 1 + score
        # print(math.exp(error / 10),score, error)
    return score


def conv_layer(X,filters,kernel_size,strides,padding,batch_norm,is_train,scope):
    """
    1D convolutional layer with or without dropout or batch normalization

    :param batch_norm:  bool, enable batch normalization
    :param is_train: bool, mention if current phase is training phase
    :param scope: variable scope
    :return: 1D-convolutional layer
    """
    with tf.variable_scope(scope):
        if batch_norm:
            conv1 = tf.layers.conv1d(inputs=X, filters=filters, kernel_size=kernel_size, strides=strides,
                                     padding=padding, kernel_initializer=tf.contrib.layers.xavier_initializer())
            return tf.nn.relu(BatchNorm(name="norm_"+scope)(conv1, train=is_train))
        else:
            return tf.layers.conv1d(inputs=X, filters=filters, kernel_size=kernel_size, strides=strides,
                                     padding=padding, kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                     activation=tf.nn.relu)


def get_predicted_expected_RUL(__y, __y_pred, lower_bound=-1):
    trj_end = np.argmax(__y == lower_bound) - 1
    trj_pred = __y_pred[:trj_end]
    trj_pred[trj_pred < 0] = 0
    # if trj_pred[-1] < 0: print(trj_pred[-1])
    RUL_predict = round(trj_pred[-1], 0)
    RUL_expected = round(__y[trj_end], 0)

    return RUL_predict, RUL_expected


## data_processing.py

In [11]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import random

MAXLIFE = 120
SCALE = 1
RESCALE = 1
true_rul = []
test_engine_id = 0
training_engine_id = 0

In [12]:
def kink_RUL(cycle_list, max_cycle):
    '''
    Piecewise linear function with zero gradient and unit gradient

            ^
            |
    MAXLIFE |-----------
            |            \
            |             \
            |              \
            |               \
            |                \
            |----------------------->
    '''
    knee_point = max_cycle - MAXLIFE
    kink_RUL = []
    stable_life = MAXLIFE
    for i in range(0, len(cycle_list)):
        if i < knee_point:
            kink_RUL.append(MAXLIFE)
        else:
            tmp = kink_RUL[i - 1] - (stable_life / (max_cycle - knee_point))
            kink_RUL.append(tmp)

    return kink_RUL


def compute_rul_of_one_id(FD00X_of_one_id, max_cycle_rul=None):
    '''
    Enter the data of an engine_id of train_FD001 and output the corresponding RUL (remaining life) of these data.
    type is list
    '''

    cycle_list = FD00X_of_one_id['cycle'].tolist()
    if max_cycle_rul is None:
        max_cycle = max(cycle_list)  # Failure cycle
    else:
        max_cycle = max(cycle_list) + max_cycle_rul
        # print(max(cycle_list), max_cycle_rul)

    # return kink_RUL(cycle_list,max_cycle)
    return kink_RUL(cycle_list, max_cycle)


def compute_rul_of_one_file(FD00X, id='engine_id', RUL_FD00X=None):
    '''
    Input train_FD001, output a list
    '''
    rul = []
    # In the loop train, each id value of the 'engine_id' column
    if RUL_FD00X is None:
        for _id in set(FD00X[id]):
            rul.extend(compute_rul_of_one_id(FD00X[FD00X[id] == _id]))
        return rul
    else:
        rul = []
        for _id in set(FD00X[id]):
            # print("#### id ####", int(RUL_FD00X.iloc[_id - 1]))
            true_rul.append(int(RUL_FD00X.iloc[_id - 1]))
            rul.extend(compute_rul_of_one_id(FD00X[FD00X[id] == _id], int(RUL_FD00X.iloc[_id - 1])))
        return rul


def get_CMAPSSData(save=False, save_training_data=True, save_testing_data=True, files=[1, 2, 3, 4, 5],
                   min_max_norm=False):
    '''
    :param save: switch to load the already preprocessed data or begin preprocessing of raw data
    :param save_training_data: same functionality as 'save' but for training data only
    :param save_testing_data: same functionality as 'save' but for testing data only
    :param files: to indicate which sub dataset needed to be loaded for operations
    :param min_max_norm: switch to enable min-max normalization
    :return: function will save the preprocessed training and testing data as numpy objects
    '''

    if save == False:
        return np.load("normalized_train_data.npy"), np.load("normalized_test_data.npy"), pd.read_csv(
            'normalized_train_data.csv', index_col=[0]), pd.read_csv('normalized_test_data.csv', index_col=[0])

    column_name = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                   's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                   's15', 's16', 's17', 's18', 's19', 's20', 's21']

    if save_training_data:  ### Training ###

        train_FD001 = pd.read_table("./CMAPSSData/train_FD001.txt", header=None, delim_whitespace=True)
        train_FD002 = pd.read_table("./CMAPSSData/train_FD002.txt", header=None, delim_whitespace=True)
        train_FD003 = pd.read_table("./CMAPSSData/train_FD003.txt", header=None, delim_whitespace=True)
        train_FD004 = pd.read_table("./CMAPSSData/train_FD004.txt", header=None, delim_whitespace=True)
        train_FD001.columns = column_name
        train_FD002.columns = column_name
        train_FD003.columns = column_name
        train_FD004.columns = column_name

        previous_len = 0
        frames = []
        for data_file in ['train_FD00' + str(i) for i in files]:  # load subdataset by subdataset

            #### standard normalization ####
            mean = eval(data_file).iloc[:, 2:len(list(eval(data_file)))].mean()
            std = eval(data_file).iloc[:, 2:len(list(eval(data_file)))].std()
            std.replace(0, 1, inplace=True)
            # print("std", std)
            ################################

            if min_max_norm:
                scaler = MinMaxScaler()
                eval(data_file).iloc[:, 2:len(list(eval(data_file)))] = scaler.fit_transform(
                    eval(data_file).iloc[:, 2:len(list(eval(data_file)))])
            else:
                eval(data_file).iloc[:, 2:len(list(eval(data_file)))] = (eval(data_file).iloc[:, 2:len(
                    list(eval(data_file)))] - mean) / std

            eval(data_file)['RUL'] = compute_rul_of_one_file(eval(data_file))
            current_len = len(eval(data_file))
            # print(eval(data_file).index)
            eval(data_file).index = range(previous_len, previous_len + current_len)
            previous_len = previous_len + current_len
            # print(eval(data_file).index)
            frames.append(eval(data_file))
            print(data_file)

        train = pd.concat(frames)
        global training_engine_id
        training_engine_id = train['engine_id']
        train = train.drop('engine_id', 1)
        train = train.drop('cycle', 1)
        # if files[0] == 1 or files[0] == 3:
        #     train = train.drop('setting3', 1)
        #     train = train.drop('s18', 1)
        #     train = train.drop('s19', 1)

        train_values = train.values * SCALE
        np.save('normalized_train_data.npy', train_values)
        train.to_csv('normalized_train_data.csv')
        ###########
    else:
        train = pd.read_csv('normalized_train_data.csv', index_col=[0])
        train_values = train.values

    if save_testing_data:  ### testing ###

        test_FD001 = pd.read_table("./CMAPSSData/test_FD001.txt", header=None, delim_whitespace=True)
        test_FD002 = pd.read_table("./CMAPSSData/test_FD002.txt", header=None, delim_whitespace=True)
        test_FD003 = pd.read_table("./CMAPSSData/test_FD003.txt", header=None, delim_whitespace=True)
        test_FD004 = pd.read_table("./CMAPSSData/test_FD004.txt", header=None, delim_whitespace=True)
        test_FD001.columns = column_name
        test_FD002.columns = column_name
        test_FD003.columns = column_name
        test_FD004.columns = column_name

        # load RUL data
        RUL_FD001 = pd.read_table("./CMAPSSData/RUL_FD001.txt", header=None, delim_whitespace=True)
        RUL_FD002 = pd.read_table("./CMAPSSData/RUL_FD002.txt", header=None, delim_whitespace=True)
        RUL_FD003 = pd.read_table("./CMAPSSData/RUL_FD003.txt", header=None, delim_whitespace=True)
        RUL_FD004 = pd.read_table("./CMAPSSData/RUL_FD004.txt", header=None, delim_whitespace=True)
        RUL_FD001.columns = ['RUL']
        RUL_FD002.columns = ['RUL']
        RUL_FD003.columns = ['RUL']
        RUL_FD004.columns = ['RUL']

        previous_len = 0
        frames = []
        for (data_file, rul_file) in [('test_FD00' + str(i), 'RUL_FD00' + str(i)) for i in files]:
            mean = eval(data_file).iloc[:, 2:len(list(eval(data_file)))].mean()
            std = eval(data_file).iloc[:, 2:len(list(eval(data_file)))].std()
            std.replace(0, 1, inplace=True)

            if min_max_norm:
                scaler = MinMaxScaler()
                eval(data_file).iloc[:, 2:len(list(eval(data_file)))] = scaler.fit_transform(
                    eval(data_file).iloc[:, 2:len(list(eval(data_file)))])
            else:
                eval(data_file).iloc[:, 2:len(list(eval(data_file)))] = (eval(data_file).iloc[:, 2:len(
                    list(eval(data_file)))] - mean) / std

            eval(data_file)['RUL'] = compute_rul_of_one_file(eval(data_file), RUL_FD00X=eval(rul_file))
            current_len = len(eval(data_file))
            eval(data_file).index = range(previous_len, previous_len + current_len)
            previous_len = previous_len + current_len
            frames.append(eval(data_file))
            print(data_file)
            if len(files) == 1:
                global test_engine_id
                test_engine_id = eval(data_file)['engine_id']

        test = pd.concat(frames)
        test = test.drop('engine_id', 1)
        test = test.drop('cycle', 1)
        # if files[0] == 1 or files[0] == 3:
        #     test = test.drop('setting3', 1)
        #     test = test.drop('s18', 1)
        #     test = test.drop('s19', 1)

        test_values = test.values * SCALE
        np.save('normalized_test_data.npy', test_values)
        test.to_csv('normalized_test_data.csv')
        ###########
    else:
        test = pd.read_csv('normalized_test_data.csv', index_col=[0])
        test_values = test.values

    return train_values, test_values, train, test


def get_PHM08Data(save=False):
    """
    Function is to load PHM 2008 challenge dataset

    """

    if save == False:
        return np.load("./PHM08/processed_data/phm_training_data.npy"), np.load("./PHM08/processed_data/phm_testing_data.npy"), np.load(
            "./PHM08/processed_data/phm_original_testing_data.npy")

    column_name = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                   's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                   's15', 's16', 's17', 's18', 's19', 's20', 's21']
    phm_training_data = pd.read_table("./PHM08/train.txt", header=None, delim_whitespace=True)
    phm_training_data.columns = column_name
    phm_testing_data = pd.read_table("./PHM08/final_test.txt", header=None, delim_whitespace=True)
    phm_testing_data.columns = column_name

    print("phm training")
    mean = phm_training_data.iloc[:, 2:len(list(phm_training_data))].mean()
    std = phm_training_data.iloc[:, 2:len(list(phm_training_data))].std()
    phm_training_data.iloc[:, 2:len(list(phm_training_data))] = (phm_training_data.iloc[:, 2:len(
        list(phm_training_data))] - mean) / std
    phm_training_data['RUL'] = compute_rul_of_one_file(phm_training_data)

    print("phm testing")
    mean = phm_testing_data.iloc[:, 2:len(list(phm_testing_data))].mean()
    std = phm_testing_data.iloc[:, 2:len(list(phm_testing_data))].std()
    phm_testing_data.iloc[:, 2:len(list(phm_testing_data))] = (phm_testing_data.iloc[:, 2:len(
        list(phm_testing_data))] - mean) / std
    phm_testing_data['RUL'] = 0
    #phm_testing_data['RUL'] = compute_rul_of_one_file(phm_testing_data)

    train_engine_id = phm_training_data['engine_id']
    # print(phm_training_engine_id[phm_training_engine_id==1].index)
    phm_training_data = phm_training_data.drop('engine_id', 1)
    phm_training_data = phm_training_data.drop('cycle', 1)

    global test_engine_id
    test_engine_id = phm_testing_data['engine_id']
    phm_testing_data = phm_testing_data.drop('engine_id', 1)
    phm_testing_data = phm_testing_data.drop('cycle', 1)

    phm_training_data = phm_training_data.values
    phm_testing_data = phm_testing_data.values

    engine_ids = train_engine_id.unique()
    train_test_split = np.random.rand(len(engine_ids)) < 0.80
    train_engine_ids = engine_ids[train_test_split]
    test_engine_ids = engine_ids[~train_test_split]

    # test_engine_id = pd.Series(test_engine_ids)


    training_data = phm_training_data[train_engine_id[train_engine_id == train_engine_ids[0]].index]
    for id in train_engine_ids[1:]:
        tmp = phm_training_data[train_engine_id[train_engine_id == id].index]
        training_data = np.concatenate((training_data, tmp))
    # print(training_data.shape)

    testing_data = phm_training_data[train_engine_id[train_engine_id == test_engine_ids[0]].index]
    for id in test_engine_ids[1:]:
        tmp = phm_training_data[train_engine_id[train_engine_id == id].index]
        testing_data = np.concatenate((testing_data, tmp))
    # print(testing_data.shape)

    print(phm_training_data.shape, phm_testing_data.shape, training_data.shape, testing_data.shape)

    np.save("./PHM08/processed_data/phm_training_data.npy", training_data)
    np.savetxt("./PHM08/processed_data/phm_training_data.txt", training_data, delimiter=" ")
    np.save("./PHM08/processed_data/phm_testing_data.npy", testing_data)
    np.savetxt("./PHM08/processed_data/phm_testing_data.txt", testing_data, delimiter=" ")
    np.save("./PHM08/processed_data/phm_original_testing_data.npy", phm_testing_data)
    np.savetxt("./PHM08/processed_data/phm_original_testing_data.csv", phm_testing_data, delimiter=",")

    return training_data, testing_data, phm_testing_data


def data_augmentation(files=1, low=[10, 40, 90, 170], high=[35, 85, 160, 250], plot=False, combine=False):
    '''
    This helper function only augments the training data to look like testing data.
    Training data always run to a failure. But testing data is mostly stop before a failure.
    Therefore, training data augmented to have scenarios without failure

    :param files: select wich sub CMPASS dataset
    :param low: lower bound for the random selection of the engine cycle
    :param high: upper bound for the random selection of the engine cycle
    :param plot: switch to plot the augmented data
    :return:
    '''

    DEBUG = False

    column_name = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                   's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                   's15', 's16', 's17', 's18', 's19', 's20', 's21']

    ### Loading original data ###
    if files == "phm":
        train_FD00x = pd.read_table("./PHM08/processed_data/phm_training_data.txt", header=None, delim_whitespace=True)
        train_FD00x.drop(train_FD00x.columns[len(train_FD00x.columns) - 1], axis=1, inplace=True)
        train_FD00x.columns = column_name
    else:
        if combine:
            train_FD00x,_,_ = combine_FD001_and_FD003()
        else:
            file_path = "./CMAPSSData/train_FD00" + str(files) + ".txt"
            train_FD00x = pd.read_table(file_path, header=None, delim_whitespace=True)
            train_FD00x.columns = column_name
            print(file_path.split("/")[-1])

        ### Standered Normal ###
        mean = train_FD00x.iloc[:, 2:len(list(train_FD00x))].mean()
        std = train_FD00x.iloc[:, 2:len(list(train_FD00x))].std()
        std.replace(0, 1, inplace=True)
        train_FD00x.iloc[:, 2:len(list(train_FD00x))] = (train_FD00x.iloc[:, 2:len(list(train_FD00x))] - mean) / std

    final_train_FD = train_FD00x.copy()
    previous_len = 0
    frames = []
    for i in range(len(high)):
        train_FD = train_FD00x.copy()
        train_engine_id = train_FD['engine_id']
        engine_ids = train_engine_id.unique()
        total_ids = len(engine_ids)
        train_rul = []
        print("*************", final_train_FD.shape, total_ids, low[i], high[i], "*****************")

        for id in range(1, total_ids + 1):

            train_engine_id = train_FD['engine_id']
            indexes = train_engine_id[train_engine_id == id].index  ### filter indexes related to id
            traj_data = train_FD.loc[indexes]  ### filter trajectory data

            cutoff_cycle = random.randint(low[i], high[i])  ### randomly selecting the cutoff point of the engine cycle

            if cutoff_cycle > max(traj_data['cycle']):
                cutoff_cycle = max(traj_data['cycle'])

            train_rul.append(max(traj_data['cycle']) - cutoff_cycle)  ### collecting remaining cycles

            cutoff_cycle_index = traj_data['cycle'][traj_data['cycle'] == cutoff_cycle].index  ### cutoff cycle index

            if DEBUG:
                print("traj_shape: ", traj_data.shape, "current_engine_id:", id, "cutoff_cycle:", cutoff_cycle,
                      "cutoff_index", cutoff_cycle_index, "engine_fist_index", indexes[0], "engine_last_index",
                      indexes[-1])

            ### removing rows after cutoff cycle index ###
            if cutoff_cycle_index[0] != indexes[-1]:
                drop_range = list(range(cutoff_cycle_index[0] + 1, indexes[-1] + 1))
                train_FD.drop(train_FD.index[drop_range], inplace=True)
                train_FD.reset_index(drop=True, inplace=True)

        ### calculating the RUL for augmented data
        train_rul = pd.DataFrame.from_dict({'RUL': train_rul})
        train_FD['RUL'] = compute_rul_of_one_file(train_FD, RUL_FD00X=train_rul)

        ### changing the engine_id for augmented data
        train_engine_id = train_FD['engine_id']
        for id in range(1, total_ids + 1):
            indexes = train_engine_id[train_engine_id == id].index
            train_FD.loc[indexes, 'engine_id'] = id + total_ids * (i + 1)

        if i == 0:  # should only execute at the first iteration
            final_train_FD['RUL'] = compute_rul_of_one_file(final_train_FD)
            current_len = len(final_train_FD)
            final_train_FD.index = range(previous_len, previous_len + current_len)
            previous_len = previous_len + current_len

        ### Re-indexing the augmented data
        train_FD['RUL'].index = range(previous_len, previous_len + len(train_FD))
        previous_len = previous_len + len(train_FD)

        final_train_FD = pd.concat(
            [final_train_FD, train_FD])  # concatanete the newly augmented data with previous data

    frames.append(final_train_FD)
    train = pd.concat(frames)
    train.reset_index(drop=True, inplace=True)

    train_engine_id = train['engine_id']
    # print(train_engine_id)
    engine_ids = train_engine_id.unique()
    # print(engine_ids[1:])
    np.random.shuffle(engine_ids)
    # print(engine_ids)

    training_data = train.loc[train_engine_id[train_engine_id == engine_ids[0]].index]
    training_data.reset_index(drop=True, inplace=True)
    previous_len = len(training_data)
    for id in engine_ids[1:]:
        traj_data = train.loc[train_engine_id[train_engine_id == id].index]
        current_len = len(traj_data)
        traj_data.index = range(previous_len, previous_len + current_len)
        previous_len = previous_len + current_len
        training_data = pd.concat([training_data, traj_data])


    global training_engine_id
    training_engine_id = training_data['engine_id']

    training_data = training_data.drop('engine_id', 1)
    training_data = training_data.drop('cycle', 1)
    # if files == 1 or files == 3:
    #     training_data = training_data.drop('setting3', 1)
    #     training_data = training_data.drop('s18', 1)
    #     training_data = training_data.drop('s19', 1)

    training_data_values = training_data.values * SCALE
    np.save('normalized_train_data.npy', training_data_values)
    training_data.to_csv('normalized_train_data.csv')


    train = training_data_values
    x_train = train[:, :train.shape[1] - 1]
    y_train = train[:, train.shape[1] - 1] * RESCALE
    print("training in augmentation", x_train.shape, y_train.shape)

    if plot:
        plt.plot(y_train, label="train")

        plt.figure()
        plt.plot(x_train)
        plt.title("train")
        # plt.figure()
        # plt.plot(y_train)
        # plt.title("test")

        plt.show()


def analyse_Data(dataset, files=None, plot=True, min_max=False):
    '''
    Generate pre-processed data according to the given dataset
    :param dataset: choose between "phm" for PHM 2008 dataset or "cmapss" for CMAPSS data set with file number
    :param files: Only for CMAPSS dataset to select sub dataset
    :param min_max: switch to allow min-max normalization
    :return:
    '''

    if dataset == "phm":
        training_data, testing_data, phm_testing_data = get_PHM08Data(save=True)

        x_phmtrain = training_data[:, :training_data.shape[1] - 1]
        y_phmtrain = training_data[:, training_data.shape[1] - 1]

        x_phmtest = testing_data[:, :testing_data.shape[1] - 1]
        y_phmtest = testing_data[:, testing_data.shape[1] - 1]

        print("phmtrain", x_phmtrain.shape, y_phmtrain.shape)

        print("phmtest", x_phmtrain.shape, y_phmtrain.shape)
        print("phmtest", phm_testing_data.shape)

        if plot:
            # plt.plot(x_phmtrain, label="phmtrain_x")
            plt.figure()
            plt.plot(y_phmtrain, label="phmtrain_y")

            # plt.figure()
            # plt.plot(x_phmtest, label="phmtest_x")
            plt.figure()
            plt.plot(y_phmtest, label="phmtest_y")

            # plt.figure()
            # plt.plot(phm_testing_data, label="test")
            plt.show()

    elif dataset == "cmapss":
        training_data, testing_data, training_pd, testing_pd = get_CMAPSSData(save=True, files=files,
                                                                              min_max_norm=min_max)
        x_train = training_data[:, :training_data.shape[1] - 1]
        y_train = training_data[:, training_data.shape[1] - 1]
        print("training", x_train.shape, y_train.shape)

        x_test = testing_data[:, :testing_data.shape[1] - 1]
        y_test = testing_data[:, testing_data.shape[1] - 1]
        print("testing", x_test.shape, y_test.shape)

        if plot:
            plt.plot(y_train, label="train")
            plt.figure()
            plt.plot(y_test, label="test")

            plt.figure()
            plt.plot(x_train)
            plt.title("train: FD00" + str(files[0]))
            plt.figure()
            plt.plot(y_train)
            plt.title("train: FD00" + str(files[0]))
            plt.show()


def combine_FD001_and_FD003():
    column_name = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                   's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                   's15', 's16', 's17', 's18', 's19', 's20', 's21']

    train_FD001 = pd.read_table("./CMAPSSData/train_FD001.txt", header=None, delim_whitespace=True)
    train_FD003 = pd.read_table("./CMAPSSData/train_FD003.txt", header=None, delim_whitespace=True)
    train_FD001.columns = column_name
    train_FD003.columns = column_name

    FD001_max_engine_id = max(train_FD001['engine_id'])
    train_FD003['engine_id'] = train_FD003['engine_id'] + FD001_max_engine_id
    train_FD003.index = range(len(train_FD001), len(train_FD001) + len(train_FD003))
    train_FD001_FD002 = pd.concat([train_FD001,train_FD003])

    test_FD001 = pd.read_table("./CMAPSSData/test_FD001.txt", header=None, delim_whitespace=True)
    test_FD003 = pd.read_table("./CMAPSSData/test_FD003.txt", header=None, delim_whitespace=True)
    test_FD001.columns = column_name
    test_FD003.columns = column_name

    FD001_max_engine_id = max(test_FD001['engine_id'])
    test_FD003['engine_id'] = test_FD003['engine_id'] + FD001_max_engine_id
    test_FD003.index = range(len(test_FD001), len(test_FD001) + len(test_FD003))
    test_FD001_FD002 = pd.concat([test_FD001,test_FD003])

    RUL_FD001 = pd.read_table("./CMAPSSData/RUL_FD001.txt", header=None, delim_whitespace=True)
    RUL_FD003 = pd.read_table("./CMAPSSData/RUL_FD003.txt", header=None, delim_whitespace=True)
    RUL_FD001.columns = ['RUL']
    RUL_FD003.columns = ['RUL']
    RUL_FD003.index = range(len(RUL_FD001), len(RUL_FD001) + len(RUL_FD003))
    RUL_FD001_FD002 = pd.concat([test_FD001, test_FD003])

    return train_FD001_FD002,test_FD001_FD002,RUL_FD001_FD002


## model.py

In [13]:
from matplotlib import pyplot as plt
import time
import datetime
# from utils_laj import *
# from data_processing import get_CMAPSSData, get_PHM08Data, data_augmentation, analyse_Data

today = datetime.date.today()

In [14]:
def CNNLSTM(dataset, file_no, Train=False, trj_wise=False, plot=False):
    '''
    The architecture is a Meny-to-meny model combining CNN and LSTM models
    :param dataset: select the specific dataset between PHM08 or CMAPSS
    :param Train: select between training and testing
    :param trj_wise: Trajectorywise calculate RMSE and scores
    '''

    #### checkpoint saving path ####
    if file_no == 1:
        path_checkpoint = './Save/Save_CNNLSTM/CNNLSTM_ML120_GRAD1_kinkRUL_FD001/CNN1D_3_lstm_2_layers'
    elif file_no == 2:
        path_checkpoint = './Save/Save_CNNLSTM/CNNLSTM_ML120_GRAD1_kinkRUL_FD002/CNN1D_3_lstm_2_layers'
    elif file_no == 3:
        path_checkpoint = './Save/Save_CNNLSTM/CNNLSTM_ML120_GRAD1_kinkRUL_FD003/CNN1D_3_lstm_2_layers'
    elif file_no == 4:
        path_checkpoint = './Save/Save_CNNLSTM/CNNLSTM_ML120_GRAD1_kinkRUL_FD004/CNN1D_3_lstm_2_layers'
    else:
        raise ValueError("Save path not defined")
    ##################################


    if dataset == "cmapss":
        training_data, testing_data, training_pd, testing_pd = get_CMAPSSData(save=False)
        x_train = training_data[:, :training_data.shape[1] - 1]
        y_train = training_data[:, training_data.shape[1] - 1]
        print("training data CNNLSTM: ", x_train.shape, y_train.shape)

        x_test = testing_data[:, :testing_data.shape[1] - 1]
        y_test = testing_data[:, testing_data.shape[1] - 1]
        print("testing data CNNLSTM: ", x_test.shape, y_test.shape)

    elif dataset == "phm":
        training_data, testing_data, phm_testing_data = get_PHM08Data(save=False)
        x_validation = phm_testing_data[:, :phm_testing_data.shape[1] - 1]
        y_validation = phm_testing_data[:, phm_testing_data.shape[1] - 1]
        print("testing data: ", x_validation.shape, y_validation.shape)

    batch_size = 1024  # Batch size
    if Train == False: batch_size = 5

    sequence_length = 100  # Number of steps
    learning_rate = 0.001  # 0.0001
    # epochs = 5000
    epochs = 10

    ann_hidden = 50

    n_channels = 24

    lstm_size = n_channels * 3  # 3 times the amount of channels
    num_layers = 2  # 2  # Number of layers

    X = tf.placeholder(tf.float32, [None, sequence_length, n_channels], name='inputs')
    Y = tf.placeholder(tf.float32, [None, sequence_length], name='labels')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    learning_rate_ = tf.placeholder(tf.float32, name='learning_rate')
    is_train = tf.placeholder(dtype=tf.bool, shape=None, name="is_train")

    conv1 = conv_layer(X, filters=18, kernel_size=2, strides=1, padding='same', batch_norm=False, is_train=is_train,
                       scope='conv_1')
    max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same', name='maxpool_1')

    conv2 = conv_layer(max_pool_1, filters=36, kernel_size=2, strides=1, padding='same', batch_norm=False,
                       is_train=is_train, scope='conv_2')
    max_pool_2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding='same', name='maxpool_2')

    conv3 = conv_layer(max_pool_2, filters=72, kernel_size=2, strides=1, padding='same', batch_norm=False,
                       is_train=is_train, scope='conv_3')
    max_pool_3 = tf.layers.max_pooling1d(inputs=conv3, pool_size=2, strides=2, padding='same', name='maxpool_3')

    conv_last_layer = max_pool_3

    shape = conv_last_layer.get_shape().as_list()
    CNN_flat = tf.reshape(conv_last_layer, [-1, shape[1] * shape[2]])

    dence_layer_1 = dense_layer(CNN_flat, size=sequence_length * n_channels, activation_fn=tf.nn.relu, batch_norm=False,
                                phase=is_train, drop_out=True, keep_prob=keep_prob,
                                scope="fc_1")
    lstm_input = tf.reshape(dence_layer_1, [-1, sequence_length, n_channels])

    cell = get_RNNCell(['LSTM'] * num_layers, keep_prob=keep_prob, state_size=lstm_size)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_output, states = tf.nn.dynamic_rnn(cell, lstm_input, dtype=tf.float32, initial_state=init_state)
    stacked_rnn_output = tf.reshape(rnn_output, [-1, lstm_size])  # change the form into a tensor

    dence_layer_2 = dense_layer(stacked_rnn_output, size=ann_hidden, activation_fn=tf.nn.relu, batch_norm=False,
                                phase=is_train, drop_out=True, keep_prob=keep_prob,
                                scope="fc_2")

    output = dense_layer(dence_layer_2, size=1, activation_fn=None, batch_norm=False, phase=is_train, drop_out=False,
                         keep_prob=keep_prob,
                         scope="fc_3_output")

    prediction = tf.reshape(output, [-1])
    y_flat = tf.reshape(Y, [-1])

    h = prediction - y_flat

    cost_function = tf.reduce_sum(tf.square(h))
    RMSE = tf.sqrt(tf.reduce_mean(tf.square(h)))
    optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(cost_function)

    saver = tf.train.Saver()
    training_generator = batch_generator(x_train, y_train, batch_size, sequence_length, online=True)
    testing_generator = batch_generator(x_test, y_test, batch_size, sequence_length, online=False)

    if Train: model_summary(learning_rate=learning_rate, batch_size=batch_size, lstm_layers=num_layers,
                            lstm_layer_size=lstm_size, fc_layer_size=ann_hidden, sequence_length=sequence_length,
                            n_channels=n_channels, path_checkpoint=path_checkpoint, spacial_note='')

    with tf.Session() as session:
        tf.global_variables_initializer().run()

        if Train == True:
            cost = []
            iteration = int(x_train.shape[0] / batch_size)
            print("Training set MSE")
            print("No epoches: ", epochs, "No itr: ", iteration)
            __start = time.time()
            for ep in range(epochs):

                for itr in range(iteration):
                    ## training ##
                    batch_x, batch_y = next(training_generator)
                    session.run(optimizer,
                                feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8, learning_rate_: learning_rate})
                    cost.append(
                        RMSE.eval(feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0, learning_rate_: learning_rate}))

                x_test_batch, y_test_batch = next(testing_generator)
                mse_train, rmse_train = session.run([cost_function, RMSE],
                                                    feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0,
                                                               learning_rate_: learning_rate})
                mse_test, rmse_test = session.run([cost_function, RMSE],
                                                  feed_dict={X: x_test_batch, Y: y_test_batch, keep_prob: 1.0,
                                                             learning_rate_: learning_rate})

                time_per_ep = (time.time() - __start)
                time_remaining = ((epochs - ep) * time_per_ep) / 3600
                print("CNNLSTM", "epoch:", ep, "\tTrainig-",
                      "MSE:", mse_train, "RMSE:", rmse_train, "\tTesting-", "MSE", mse_test, "RMSE", rmse_test,
                      "\ttime/epoch:", round(time_per_ep, 2), "\ttime_remaining: ",
                      int(time_remaining), " hr:", round((time_remaining % 1) * 60, 1), " min", "\ttime_stamp: ",
                      datetime.datetime.now().strftime("%Y.%m.%d-%H:%M:%S"))
                __start = time.time()

                if ep % 10 == 0 and ep != 0:
                    save_path = saver.save(session, path_checkpoint)
                    if os.path.exists(path_checkpoint + '.meta'):
                        print("Model saved to file: %s" % path_checkpoint)
                    else:
                        print("NOT SAVED!!!", path_checkpoint)

                if ep % 1000 == 0 and ep != 0: learning_rate = learning_rate / 10

            save_path = saver.save(session, path_checkpoint)
            if os.path.exists(path_checkpoint + '.meta'):
                print("Model saved to file: %s" % path_checkpoint)
            else:
                print("NOT SAVED!!!", path_checkpoint)
            plt.plot(cost)
            plt.show()
        else:
            saver.restore(session, path_checkpoint)
            print("Model restored from file: %s" % path_checkpoint)

            if trj_wise:
                trj_iteration = len(test_engine_id.unique())
                print("total trajectories: ", trj_iteration)
                error_list = []
                pred_list = []
                expected_list = []
                lower_bound = -0.01
                test_trjectory_generator = trjectory_generator(x_test, y_test, test_engine_id, sequence_length,
                                                               batch_size, lower_bound)
                for itr in range(trj_iteration):
                    trj_x, trj_y = next(test_trjectory_generator)

                    __y_pred, error, __y = session.run([prediction, h, y_flat],
                                                       feed_dict={X: trj_x, Y: trj_y, keep_prob: 1.0})

                    RUL_predict, RUL_expected = get_predicted_expected_RUL(__y, __y_pred, lower_bound)

                    error_list.append(RUL_predict - RUL_expected)
                    pred_list.append(RUL_predict)
                    expected_list.append(RUL_expected)

                    print("id: ", itr + 1, "expected: ", RUL_expected, "\t", "predict: ", RUL_predict, "\t", "error: ",
                          RUL_predict - RUL_expected)
                    # plt.plot(__y_pred* RESCALE, label="prediction")
                    # plt.plot(__y* RESCALE, label="expected")
                    # plt.show()
                error_list = np.array(error_list)
                error_list = error_list.ravel()
                rmse = np.sqrt(np.sum(np.square(error_list)) / len(error_list))  # RMSE
                print(rmse, scoring_func(error_list))
                if plot:
                    plt.figure()
                    # plt.plot(expected_list, 'o', color='black', label="expected")
                    # plt.plot(pred_list, 'o', color='red', label="predicted")
                    # plt.figure()
                    plt.plot(np.sort(error_list), 'o', color='red', label="error")
                    plt.legend()
                    plt.show()
                fig, ax = plt.subplots()
                ax.stem(expected_list, linefmt='b-', label="expected")
                ax.stem(pred_list, linefmt='r-', label="predicted")
                plt.legend()
                plt.show()

            else:
                x_validation = x_test
                y_validation = y_test

                validation_generator = batch_generator(x_validation, y_validation, batch_size, sequence_length,
                                                       online=True,
                                                       online_shift=sequence_length)

                full_prediction = []
                actual_rul = []
                error_list = []

                iteration = int(x_validation.shape[0] / (batch_size * sequence_length))
                print("#of validation points:", x_validation.shape[0], "#datapoints covers from minibatch:",
                      batch_size * sequence_length, "iterations/epoch", iteration)

                for itr in range(iteration):
                    x_validate_batch, y_validate_batch = next(validation_generator)
                    __y_pred, error, __y = session.run([prediction, h, y_flat],
                                                       feed_dict={X: x_validate_batch, Y: y_validate_batch,
                                                                  keep_prob: 1.0})
                    full_prediction.append(__y_pred * RESCALE)
                    actual_rul.append(__y * RESCALE)
                    error_list.append(error * RESCALE)
                full_prediction = np.array(full_prediction)
                full_prediction = full_prediction.ravel()
                actual_rul = np.array(actual_rul)
                actual_rul = actual_rul.ravel()
                error_list = np.array(error_list)
                error_list = error_list.ravel()
                rmse = np.sqrt(np.sum(np.square(error_list)) / len(error_list))  # RMSE

                print(y_validation.shape, full_prediction.shape, "RMSE:", rmse, "Score:", scoring_func(error_list))
                if plot:
                    plt.plot(full_prediction, label="prediction")
                    plt.plot(actual_rul, label="expected")
                    plt.legend()
                    plt.show()

In [None]:
if __name__ == "__main__":

    dataset = "cmapss" 
    file = 4 # represent the sub-dataset for cmapss
    TRAIN = True
    # TRAIN = False
    
    TRJ_WISE = True
    PLOT = True

    analyse_Data(dataset=dataset, files=[file], plot=False, min_max=False)

    if TRAIN: data_augmentation(files=file,
                                low=[10, 35, 50, 70, 90, 110, 130, 150, 170, 190, 210, 230, 250, 270, 290, 310, 330],
                                high=[35, 50, 70, 90, 110, 130, 150, 170, 190, 210, 230, 250, 270, 290, 310, 330, 350],
                                plot=False,
                                combine=False)

    # from data_processing import RESCALE, test_engine_id

    CNNLSTM(dataset=dataset, file_no=file, Train=TRAIN, trj_wise=TRJ_WISE, plot=PLOT)

train_FD004




test_FD004




training (61249, 24) (61249,)
testing (41214, 24) (41214,)
train_FD004.txt
************* (61249, 26) 249 10 35 *****************
************* (66746, 27) 249 35 50 *****************
************* (77304, 27) 249 50 70 *****************
************* (92194, 27) 249 70 90 *****************
************* (112057, 27) 249 90 110 *****************
************* (136928, 27) 249 110 130 *****************
************* (166814, 27) 249 130 150 *****************
************* (201702, 27) 249 150 170 *****************
************* (241374, 27) 249 170 190 *****************
************* (285196, 27) 249 190 210 *****************
************* (332784, 27) 249 210 230 *****************
************* (383400, 27) 249 230 250 *****************
************* (436593, 27) 249 250 270 *****************
************* (491849, 27) 249 270 290 *****************
************* (548933, 27) 249 290 310 *****************
************* (607207, 27) 249 310 330 *****************
************* (666383, 27)



training in augmentation (726236, 24) (726236,)
training data CNNLSTM:  (726236, 24) (726236,)
testing data CNNLSTM:  (41214, 24) (41214,)
Instructions for updating:
Use `tf.keras.layers.Conv1D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.MaxPooling1D instead.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instanc