# Initialization

Test notebook for the C-MAPPS benchmark. Test different MLP architectures. 

First we import the necessary packages and create the global variables.

In [1]:
import math
import numpy as np
import csv
import copy
from scipy import stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sys

sys.path.append('/Users/davidlaredorazo/Documents/University_of_California/Research/Projects')
#sys.path.append('/media/controlslab/DATA/Projects')

from ann_framework.data_handlers.data_handler_CMAPSS import CMAPSSDataHandler
from ann_framework.tunable_model.tunable_model import SequenceTunableModelRegression
from ann_framework import aux_functions

#import custom_scores

from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Reshape, Conv2D, Flatten, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
from keras import backend as K
from keras import regularizers
from keras.layers import LSTM, CuDNNLSTM

Using TensorFlow backend.


# Define architectures

Define each one of the different architectures to be tested.

In [2]:
K.clear_session()  #Clear the previous tensorflow graph

l2_lambda_regularization = 0.20
l1_lambda_regularization = 0.20

def RULmodel_LSTM(input_shape):
    """Define the RNN model"""
    
    #Create a sequential model
    model = Sequential()
    #model.add(Masking(mask_value=0, imput))
    #model.add(LSTM(input_shape=input_shape, units=100, return_sequences=True, name='lstm1')))
    model.add(CuDNNLSTM(input_shape=input_shape, units=20, return_sequences=False, name='lstm2'))
    model.add(Dense(10, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', 
                    kernel_regularizer=regularizers.l2(l2_lambda_regularization), name='fc1'))
    model.add(Dense(1, activation='linear', name='out'))
    
    return model

def RULmodel_SN_5(input_shape):
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', 
                    kernel_regularizer=regularizers.l2(l2_lambda_regularization), 
                    name='fc1'))
    model.add(Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', 
                    kernel_regularizer=regularizers.l2(l2_lambda_regularization), 
                    name='fc2'))
    model.add(Dense(1, activation='linear', name='out'))
    
    return model

In [3]:
def get_compiled_model(model_def, shape, model_type='lstm'):

    #Shared parameters for the models
    optimizer = Adam(lr=0, beta_1=0.5)
    lossFunction = "mean_squared_error"
    metrics = ["mse"]
    model = None

    #Create and compile the models

    if model_type=='ann':
        model = model_def(shape)
        model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)
    elif model_type=='lstm':
        model = RULmodel_LSTM(shape)
        model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)
    else:
        pass

    return model

In [4]:
#Define the usable models for this notebook

#models = {'shallow-20':RULmodel_SN_5,'rnn-20-10':RULmodel_LSTM}
models = {'shallow-20':RULmodel_SN_5}

# Process Data

In [5]:
features = ['T2', 'T24', 'T30', 'T50', 'P2', 'P15', 'P30', 'Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 
                     'BPR', 'farB', 'htBleed', 'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32']
selected_indices = np.array([2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 17, 20, 21])
selected_features = list(features[i] for i in selected_indices-1)
data_folder = '../CMAPSSData'

window_size = 30
window_stride = 1
max_rul = 128

min_max_scaler = MinMaxScaler(feature_range=(-1, 1))

dHandler_cmaps = CMAPSSDataHandler(data_folder, 1, selected_features, max_rul, 
                                  window_size, window_stride)

# Build the model

In [6]:
optimizer = Adam(lr=0, beta_1=0.5)
lossFunction = "mean_squared_error"
metrics = ["mse"]


#Create and compile the models
nFeatures = len(selected_features)
shapeSN = nFeatures*window_size
shapeLSTM = (window_size,nFeatures)
model = get_compiled_model(models['shallow-20'], shapeSN, model_type='ann')

tModel = SequenceTunableModelRegression('mlpnn', model, lib_type='keras', data_handler=dHandler_cmaps)

# Load Data

In [7]:
#For LSTM
tModel.data_handler.data_scaler = min_max_scaler
tModel.data_scaler = None

#For ANN
#tModel.data_handler.data_scaler = min_max_scaler
#tModel.data_scaler = min_max_scaler

tModel.data_handler.sequence_length = 30
#tModel.data_handler.sequence_length = maxWindowSize[datasetNumber]
tModel.data_handler.sequence_stride = 1
tModel.data_handler.max_rul = 128

tModel.load_data(unroll=False, verbose=1, cross_validation_ratio=0)
tModel.print_data()

Loading data for the first time
Loading data for dataset 1 with window_size of 30, stride of 1 and maxRUL of 128. Cros-Validation ratio 0
Loading data from file and computing dataframes
Printing shapes

Training data (X, y)
(17731, 30, 14)
(17731, 1)
Testing data (X, y)
(100, 30, 14)
(100, 1)
Printing first 5 elements

Training data (X, y)
[[[-0.63253012 -0.18639634 -0.38048616 ... -0.33333333  0.42635659
    0.44932339]
  [-0.43373494 -0.09396119 -0.29473329 ... -0.33333333  0.33333333
    0.46202706]
  [-0.31325301 -0.26095487 -0.25894666 ... -0.66666667  0.25581395
    0.24275062]
  ...
  [-0.31325301 -0.48550251 -0.43011479 ... -0.66666667  0.34883721
    0.07677437]
  [-0.57831325 -0.39873556 -0.36731938 ... -0.16666667  0.2248062
    0.28555648]
  [-0.40361446 -0.01983867 -0.53308575 ... -0.66666667  0.41085271
    0.42723005]]

 [[-0.43373494 -0.09396119 -0.29473329 ... -0.33333333  0.33333333
    0.46202706]
  [-0.31325301 -0.26095487 -0.25894666 ... -0.66666667  0.25581395
   

# Test on dataset 1

In [8]:
iterations = 10
tModel.epochs = 100
lrate = LearningRateScheduler(aux_functions.step_decay)
num_features = len(selected_features)

windowSize = 30
windowStride = 1
constRul = 140

file = open("results/MLP/ResultsDatasets_1_test.csv", "w")
csvfile = csv.writer(file, lineterminator='\n')

for key, model_def in models.items():
  
    print("For model "+str(key))
    #file.write("For model "+str(key)+'\n\n')
  
    for i in range(1,2):

        dataset = i
        print("Computing for dataset "+str(i))
        #file.write("Computing for dataset "+str(i)+'\n\n')
      
        tempScoresRMSE = np.zeros((iterations,1))
        tempScoresRHS = np.zeros((iterations,1))
        tempTime = np.zeros((iterations,1))
      
        input_shape = windowSize*num_features #For simple ANN
      
        tModel.data_handler.change_dataset(i)
        tModel.data_handler.sequence_length = windowSize
        tModel.data_handler.sequence_stride = windowStride
        tModel.data_handler.max_rul = constRul
        tModel.load_data(unroll=True, verbose=0, cross_validation_ratio=0)
        #tModel.print_data()

        for j in range(iterations):

            #Model needs to be recompiled everytime since they are different runs so weights should be reinit
            model = get_compiled_model(model_def, input_shape, model_type='ann')

            tModel.change_model(key, model, 'keras')
            tModel.train_model(learningRate_scheduler=lrate, verbose=0)
            tModel.evaluate_model(['rhs', 'rmse'], round=2)
            #print("scores")
          
            #print(j)

            cScores = tModel.scores
            rmse = math.sqrt(cScores['score_1'])
            rmse2 = cScores['rmse']
            rhs = cScores['rhs']
            time = tModel.train_time
          
            tempScoresRMSE[j] = rmse2
            tempScoresRHS[j] = rhs
            tempTime[j] = time

        print("Results for model " + key)
  
        print(stats.describe(tempScoresRMSE))
        print(stats.describe(tempScoresRHS))
        print(stats.describe(tempTime))
          
        tempScoresRMSE = np.reshape(tempScoresRMSE, (iterations,))
        tempScoresRHS = np.reshape(tempScoresRHS, (iterations,))
        tempTime = np.reshape(tempTime, (iterations,))
        csvfile.writerow(tempScoresRMSE)
        csvfile.writerow(tempScoresRHS)
        csvfile.writerow(tempTime)
        
    file.close()

For model shallow-20
Computing for dataset 1
Reloading data due to parameter change
training without cv
training without cv
training without cv
training without cv
training without cv
training without cv
training without cv
training without cv
training without cv
training without cv
Results for model shallow-20
DescribeResult(nobs=10, minmax=(array([15.91917083]), array([17.67116295])), mean=array([16.78943174]), variance=array([0.37331329]), skewness=array([0.23510294]), kurtosis=array([-1.18510939]))
DescribeResult(nobs=10, minmax=(array([5.94056037]), array([10.04350626])), mean=array([7.63331569]), variance=array([2.07192254]), skewness=array([0.64582011]), kurtosis=array([-1.09664502]))
DescribeResult(nobs=10, minmax=(array([19.991515]), array([21.508875])), mean=array([20.8381423]), variance=array([0.3510754]), skewness=array([-0.27872828]), kurtosis=array([-1.41885278]))


# Test on all Datasets

In [10]:
datasets = [1,2,3,4]
iterations = 2
tModel.epochs = 150
lrate = LearningRateScheduler(aux_functions.step_decay)
scores ={1:[], 2:[], 3:[], 4:[]}
window_sizes = {1:30,2:20,3:30,4:18}
strides = {1:1,2:2,3:1,4:2}
max_ruls = {1:140, 2:134, 3:128, 4:134}
num_features = len(selected_features)

input_shape = None

#For each model
for key, model_def in models.items():
    file = open("results/MLP/ResultsDatasets_1_test"+key+".csv", "w")
    csvfile = csv.writer(file, lineterminator='\n')
    
    print(model.summary())
    
    print("Generating statistics for model " + key)

    #For each dataset
    for i in range(1,2):
        
        print("Working on dataset " + str(i))
        
        tempScoresRMSE = np.zeros((iterations,1))
        tempScoresRHS = np.zeros((iterations,1))
        tempTime = np.zeros((iterations,1))
        
        input_shape = window_sizes[i]*num_features #For simple ANN
        #input_shape = (window_sizes[i],num_features) #For RNN
        
        print(input_shape)
        
        tModel.data_handler.change_dataset(i)
        tModel.data_handler.sequence_length = window_sizes[i]
        tModel.data_handler.sequence_stride = strides[i]
        tModel.data_handler.max_rul = max_ruls[i]
        tModel.load_data(unroll=True, verbose=0, cross_validation_ratio=0)
        #tModel.print_data()
        
        #tModel.print_data()
        
        for j in range(iterations):

            #Model needs to be recompiled everytime since they are different runs so weights should be reinit
            model = get_compiled_model(model_def, input_shape, model_type='ann')

            tModel.change_model(key, model, 'keras')
            tModel.train_model(learningRate_scheduler=lrate, verbose=0)
            tModel.evaluate_model(['rhs', 'rmse'], round=2)
            #print("scores")
            
            #print(j)

            cScores = tModel.scores
            rmse = math.sqrt(cScores['score_1'])
            rmse2 = cScores['rmse']
            rhs = cScores['rhs']
            time = tModel.train_time
            
            tempScoresRMSE[j] = rmse2
            tempScoresRHS[j] = rhs
            tempTime[j] = time
            
        print("Results for model " + key)
    
        print(stats.describe(tempScoresRMSE))
        print(stats.describe(tempScoresRHS))
        print(stats.describe(tempTime))
            
        tempScoresRMSE = np.reshape(tempScoresRMSE, (iterations,))
        tempScoresRHS = np.reshape(tempScoresRHS, (iterations,))
        tempTime = np.reshape(tempTime, (iterations,))
        csvfile.writerow(tempScoresRMSE)
        csvfile.writerow(tempScoresRHS)
        csvfile.writerow(tempTime)
    
    file.close()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 20)                8420      
_________________________________________________________________
fc2 (Dense)                  (None, 20)                420       
_________________________________________________________________
out (Dense)                  (None, 1)                 21        
Total params: 8,861
Trainable params: 8,861
Non-trainable params: 0
_________________________________________________________________
None
Generating statistics for model shallow-20
Working on dataset 1
420
Reloading data due to parameter change
training without cv
training without cv
Results for model shallow-20
DescribeResult(nobs=2, minmax=(array([16.31073266]), array([16.43045952])), mean=array([16.37059609]), variance=array([0.00716726]), skewness=array([0.]), kurtosis=array([-2.]))
DescribeResult(nobs=2, minmax=(array([6.64598