In [5]:
!pip install talos

Collecting talos
  Downloading https://files.pythonhosted.org/packages/1c/df/fff224d37a79f58f32a19427f6672d817915e9d8e92505f371e18aa12932/talos-0.4.8.tar.gz
Collecting astetik (from talos)
  Downloading https://files.pythonhosted.org/packages/44/5d/2bef4aca2ac9bdd1910abbc09732d5904ab4d841bee71f3835522a29367a/astetik-1.9.7.tar.gz
Collecting chances (from talos)
[?25l  Downloading https://files.pythonhosted.org/packages/ce/71/b0d7d471a812af47f89f016e9f94aade861b8e19a828f3e8a5ec69a88403/chances-0.1.4.tar.gz (45kB)
[K    100% |████████████████████████████████| 51kB 17.4MB/s 
[?25hCollecting kerasplotlib (from talos)
  Downloading https://files.pythonhosted.org/packages/e8/2e/b8628bfef6a817da9be863f650cf67187676b10d27d94b23f248da35d2b4/kerasplotlib-0.1.4.tar.gz
Collecting wrangle (from talos)
  Downloading https://files.pythonhosted.org/packages/71/9b/11fec37414435e24d2592672540a1c3e358c35fe05a98c8df2e08df91000/wrangle-0.6.2.tar.gz
Collecting geonamescache (from astetik->talos)
[?25l  D

In [6]:
import talos as ta
import keras
import time
import keras.backend as K
from keras.layers.core import Activation
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Conv2D, Flatten,Conv1D,Reshape

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn import preprocessing

# Setting seed for reproducibility
np.random.seed(1234)  
PYTHONHASHSEED = 0

# define path to save model
model_path = 'cnnid_regression_model.h5'

  from pandas.core import datetools
Using TensorFlow backend.


Data Ingestion

In [None]:
# read training data - It is the aircraft engine run-to-failure data.
train_df = pd.read_csv('train_FD003.txt', sep=" ", header=None)
test_df = pd.read_csv('test_FD003.txt', sep=" ", header=None)
truth_df = pd.read_csv('RUL_FD003.txt', sep=" ", header=None)
# pick a window size of 50 cycles
sequence_length = 50


In [None]:
# function to reshape features into (samples, time steps, features) 
def gen_sequence(id_df, seq_length, seq_cols):
    """ Only sequences that meet the window-length are considered, no padding is used. This means for testing
    we need to drop those which are below the window-length. An alternative would be to pad sequences so that
    we can use shorter ones """
    # for one id I put all the rows in a single matrix
    data_matrix = id_df[seq_cols].values
    num_elements = data_matrix.shape[0]
    # Iterate over two lists in parallel.
    # For example id1 have 192 rows and sequence_length is equal to 50
    # so zip iterate over two following list of numbers (0,112),(50,192)
    # 0 50 -> from row 0 to row 50
    # 1 51 -> from row 1 to row 51
    # 2 52 -> from row 2 to row 52
    # ...
    # 111 191 -> from row 111 to 191
    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        yield data_matrix[start:stop, :]

In [None]:
# function to generate labels
def gen_labels(id_df, seq_length, label):
    """ Only sequences that meet the window-length are considered, no padding is used. This means for testing
    we need to drop those which are below the window-length. An alternative would be to pad sequences so that
    we can use shorter ones """
    # For one id I put all the labels in a single matrix.
    # For example:
    # [[1]
    # [4]
    # [1]
    # [5]
    # [9]
    # ...
    # [200]] 
    data_matrix = id_df[label].values
    num_elements = data_matrix.shape[0]
    # I have to remove the first seq_length labels
    # because for one id the first sequence of seq_length size have as target
    # the last label (the previus ones are discarded).
    # All the next id's sequences will have associated step by step one label as target.
    return data_matrix[(seq_length-1):(num_elements-1), :]

In [None]:
# Using the scoring function

def scoring_function(true, pred):
    d = pred - true
    length = len(d)
    s = 0
    for i in range(length):
        if (d[i] < 0):
            s += np.exp((-d[i]/10))-1
        else:
            s += np.exp((d[i]/13))-1
    return s
def rmse(true, pred):
    d_squared = (pred - true)**2
    length = len(d_squared)
    err = 0
    for i in range(length):
        err += d_squared[i]
    err = np.sqrt(err/length)
    return err
def merror(true,pred):
    d=np.abs(pred-true)
    length = len(d)
    err=np.sum(d)/length
    return err
    
    

In [None]:
#Preprocessing
def prep_deeplearn(train_df,test_df,truth_df,sequence_length):
    #column names
    col_names=['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                     's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                     's15', 's16', 's17', 's18', 's19', 's20', 's21']
    #remove the columns 26 and 27 because of NAN values
    train_df=train_df.loc[:,0:25]
    test_df=test_df.loc[:,0:25]
    
    train_df.columns = col_names
    test_df.columns=col_names
    train_df = train_df.sort_values(['id','cycle'])
    test_df = test_df.sort_values(['id','cycle'])
    
    #remove the columns 1 because of NAN values
    truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)
    truth_df.columns = ['cycle']
    
    #Data Preprocessing
    #Training dataset
    rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()
    rul.columns = ['id', 'max']
    
    train_df = train_df.merge(rul, on=['id'], how='left')
    train_df['RUL'] = train_df['max'] - train_df['cycle']
    train_df.drop('max', axis=1, inplace=True)
    
    #check columns with repeated values: setting3, s1, s5, s10, s16,s18,s19
    
    train_df.drop(train_df.columns[[4,5,9,14,20,22,23]], axis=1, inplace=True)    
    
    test_df.drop(test_df.columns[[4,5,9,14,20,22,23]], axis=1, inplace=True)
    

    # We use the ground truth dataset to generate labels for the test data.
    # generate column max for test data
    rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()
    rul.columns = ['id', 'max']
    
    truth_df.columns = ['more']
    truth_df['id'] = truth_df.index + 1
    truth_df['max'] = rul['max'] + truth_df['more']
    truth_df.drop('more', axis=1, inplace=True)
    
    # generate RUL for test data
    test_df = test_df.merge(truth_df, on=['id'], how='left')
    test_df['RUL'] = test_df['max'] - test_df['cycle']
    test_df.drop('max', axis=1, inplace=True)
    
    # MinMax normalization (from 0 to 1)
    min_max_scaler = preprocessing.MinMaxScaler()
    ###TRAIN#####
    train_df['cycle_norm'] = train_df['cycle']
    cols_normalize = train_df.columns.difference(['id','cycle','RUL','label1','label2'])
    norm_train_df = pd.DataFrame(min_max_scaler.fit_transform(train_df[cols_normalize]), 
                                 columns=cols_normalize, 
                                 index=train_df.index)
    join_df = train_df[train_df.columns.difference(cols_normalize)].join(norm_train_df)
    train_df = join_df.reindex(columns = train_df.columns)
    ####TEST#####
    test_df['cycle_norm'] = test_df['cycle']
    norm_test_df = pd.DataFrame(min_max_scaler.transform(test_df[cols_normalize]), 
                                columns=cols_normalize, 
                                index=test_df.index)
    test_join_df = test_df[test_df.columns.difference(cols_normalize)].join(norm_test_df)
    test_df = test_join_df.reindex(columns = test_df.columns)
    test_df = test_df.reset_index(drop=True)
    test_df.head(2)
    
    
    # pick the feature columns 
    sequence_cols = train_df.columns.difference(['id','cycle','RUL'])
    
    seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) 
           for id in train_df['id'].unique() if train_df[train_df['id']==id]['cycle'].max()>sequence_length)
    
    seq_array = np.concatenate(list(seq_gen)).astype(np.float32)
    
    # transform each id of the test dataset in a sequence
    seq_test_gen = (list(gen_sequence(test_df[test_df['id']==id], sequence_length, sequence_cols))
           for id in test_df['id'].unique() if test_df[test_df['id']==id]['cycle'].max()>sequence_length)
    
    seq_test_array= np.concatenate(list(seq_test_gen)).astype(np.float32)
    
    label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['RUL']) 
             for id in train_df['id'].unique() if train_df[train_df['id']==id]['cycle'].max()>sequence_length]
    
    #merge labels to form an array of all labels
    label_array = np.concatenate(label_gen).astype(np.float32)
    
    return seq_array,seq_test_array,label_array,test_df




    

CNN architecture

In [None]:
def minimal():
    p = {'activation':['relu', 'elu', 'tanh'],
         'optimizer': ['Nadam', 'Adam'],
         'losses': ['logcosh','mean_squared_error'],
         'kernel':[3,5,10],
         'batch_size': [16,32,128,512],
         'hidden_layers': [50,100,200],
         'dropout': [0.2,0.5]
        }
    
    def cnn1d_train(seq_array,label_array,x_val, y_val,params):
        nb_in=seq_array.shape[0]
        nb_features = seq_array.shape[2]
        nb_out = label_array.shape[1]
        model = Sequential()

        #model.add(Reshape((sequence_length, nb_features)))
        model.add(Conv1D(10,kernel_size=params['kernel'],activation=params['activation'],
                           data_format = 'channels_last',
                           input_shape = (sequence_length, nb_features)))

        model.add(Conv1D(10,kernel_size=params['kernel'], activation=params['activation']))

        model.add(Conv1D(10,kernel_size=params['kernel'], activation=params['activation']))

        model.add(Conv1D(10,kernel_size=params['kernel'], activation=params['activation']))

        model.add(Conv1D(10,kernel_size=params['kernel'], activation=params['activation']))

        model.add(Flatten())
        model.add(Dropout(rate=params['dropout']))
        model.add(Dense(units=params['hidden_layers'], activation=params['activation']))
        model.add(Dense(nb_out, activation=params['activation']))

        model.compile(loss=params['losses'], optimizer=params['optimizer'],metrics=['mae','acc'])

        start=time.time()
        history = model.fit(seq_array, label_array, epochs=100, batch_size=params['batch_size'], validation_split=0.1, verbose=2,
                         callbacks = [keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=15, verbose=0, mode='max'),
                                      keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min', verbose=0)])
                  
        end=time.time()
        run_time=end-start

        return history,model
     def cnn2d_train(seq_array,label_array,x_val, y_val,params):
        # Expand training data to picture format with 1 representing greyscale picture
        seq_array_expanded = seq_array.reshape(nb_in, sequence_length, nb_features, 1)
        nb_in=seq_array.shape[0]
        nb_features = seq_array.shape[2]
        nb_out = label_array.shape[1]
        model = Sequential()

        #model.add(Reshape((sequence_length, nb_features)))
        model.add(Conv2D(filters=10, kernel_size=params['kernel'], strides=(1,1),
                 padding='same',
                 activation=params['activation'],
                 data_format = 'channels_last',
                 input_shape = (sequence_length, nb_features, 1)))
        model.add(Conv2D(filters=10, kernel_size=params['kernel'], strides=(1,1),
                         activation=params['activation'],
                         padding='same'))
        model.add(Conv2D(filters=10, kernel_size=params['kernel'], strides=(1,1),
                         activation=params['activation'],
                         padding='same',))
        model.add(Conv2D(filters=10, kernel_size=params['kernel'], strides=(1,1),
                         activation=params['activation'],
                         padding='same'))
        model.add(Conv2D(filters=10, kernel_size=params['kernel'], strides=(1,1),
                         activation=params['activation'],
                         padding='same'))
        model.add(Flatten())
        model.add(Dropout(rate=params['dropout']))
        model.add(Dense(units=params['hidden_layers'], activation=params['activation']))
        model.add(Dense(nb_out, activation=params['activation']))

        model.compile(loss=params['losses'], optimizer=params['optimizer'],metrics=['mae','acc'])

        start=time.time()
        history = model.fit(seq_array_expanded, label_array, epochs=100, batch_size=params['batch_size'], validation_split=0.1, verbose=2,
                         callbacks = [keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=15, verbose=0, mode='max'),
                                      keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min', verbose=0)])
                  
        end=time.time()
        run_time=end-start

        return history,model
                                                                            
    scan_object = ta.Scan(seq_array, label_array, model=cnn1d_train, params=p, grid_downsample=0.1)

    return scan_object


    
    
        
    



In [None]:
seq_array,seq_test_array,label_array,test_df=prep_deeplearn(train_df,test_df,truth_df,sequence_length)

In [97]:
scan_object=minimal()







  0%|          | 0/14 [00:00<?, ?it/s][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 6375.4294 - mean_absolute_error: 61.8029 - acc: 0.0067 - val_loss: 3353.5899 - val_mean_absolute_error: 47.1712 - val_acc: 0.0064
Epoch 2/100
 - 1s - loss: 3299.0989 - mean_absolute_error: 46.5291 - acc: 0.0059 - val_loss: 3336.2069 - val_mean_absolute_error: 46.8358 - val_acc: 0.0046
Epoch 3/100
 - 1s - loss: 3049.1879 - mean_absolute_error: 44.0009 - acc: 0.0075 - val_loss: 2187.2907 - val_mean_absolute_error: 34.3823 - val_acc: 0.0100
Epoch 4/100
 - 1s - loss: 1996.6153 - mean_absolute_error: 34.2068 - acc: 0.0100 - val_loss: 1818.9579 - val_mean_absolute_error: 31.6100 - val_acc: 0.0082
Epoch 5/100
 - 1s - loss: 1663.9523 - mean_absolute_error: 31.0858 - acc: 0.0117 - val_loss: 1748.4571 - val_mean_absolute_error: 30.6691 - val_acc: 0.0119
Epoch 6/100
 - 1s - loss: 1633.4552 - mean_absolute_error: 30.5011 - acc: 0.0127 - val_loss: 1549.0139 - val_mean_absolute_error: 29.4801 - val_acc: 0.0137
Epoch 7/100
 - 1







  7%|▋         | 1/14 [00:38<08:16, 38.16s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 3s - loss: 30.4694 - mean_absolute_error: 31.1503 - acc: 0.0160 - val_loss: 22.4517 - val_mean_absolute_error: 23.1307 - val_acc: 0.0137
Epoch 2/100
 - 3s - loss: 21.1856 - mean_absolute_error: 21.8623 - acc: 0.0196 - val_loss: 19.1387 - val_mean_absolute_error: 19.8102 - val_acc: 0.0201
Epoch 3/100
 - 3s - loss: 19.1119 - mean_absolute_error: 19.7864 - acc: 0.0232 - val_loss: 17.5792 - val_mean_absolute_error: 18.2505 - val_acc: 0.0292
Epoch 4/100
 - 3s - loss: 18.3173 - mean_absolute_error: 18.9878 - acc: 0.0270 - val_loss: 16.0757 - val_mean_absolute_error: 16.7350 - val_acc: 0.0438
Epoch 5/100
 - 3s - loss: 17.7746 - mean_absolute_error: 18.4427 - acc: 0.0321 - val_loss: 17.1846 - val_mean_absolute_error: 17.8521 - val_acc: 0.0292
Epoch 6/100
 - 3s - loss: 17.4542 - mean_absolute_error: 18.1239 - acc: 0.0272 - val_loss: 15.1916 - val_mean_absolute_error: 15.8514 - val_acc: 0.0438
Epoch 7/100
 - 3s - loss: 17.1595 - mean







 14%|█▍        | 2/14 [02:15<11:11, 56.00s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 10186.9903 - mean_absolute_error: 83.0467 - acc: 0.0036 - val_loss: 7354.2954 - val_mean_absolute_error: 65.9643 - val_acc: 0.0027
Epoch 2/100
 - 1s - loss: 4660.3405 - mean_absolute_error: 54.0635 - acc: 0.0067 - val_loss: 3937.9362 - val_mean_absolute_error: 48.5579 - val_acc: 0.0046
Epoch 3/100
 - 1s - loss: 3582.7153 - mean_absolute_error: 48.0184 - acc: 0.0051 - val_loss: 3309.0533 - val_mean_absolute_error: 45.4300 - val_acc: 0.0037
Epoch 4/100
 - 1s - loss: 3061.5726 - mean_absolute_error: 44.3178 - acc: 0.0072 - val_loss: 2783.6003 - val_mean_absolute_error: 42.1555 - val_acc: 0.0064
Epoch 5/100
 - 1s - loss: 2497.9246 - mean_absolute_error: 39.8898 - acc: 0.0067 - val_loss: 2128.9072 - val_mean_absolute_error: 35.9329 - val_acc: 0.0110
Epoch 6/100
 - 1s - loss: 1888.5001 - mean_absolute_error: 33.5138 - acc: 0.0082 - val_loss: 1774.9076 - val_mean_absolute_error: 30.8885 - val_acc: 0.0110
Epoch 7/100
 - 







 21%|██▏       | 3/14 [03:11<10:15, 56.00s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 55.0503 - mean_absolute_error: 55.7386 - acc: 0.0059 - val_loss: 34.9930 - val_mean_absolute_error: 35.6808 - val_acc: 0.0064
Epoch 2/100
 - 2s - loss: 30.7551 - mean_absolute_error: 31.4382 - acc: 0.0122 - val_loss: 29.0304 - val_mean_absolute_error: 29.7122 - val_acc: 0.0119
Epoch 3/100
 - 1s - loss: 27.2442 - mean_absolute_error: 27.9266 - acc: 0.0123 - val_loss: 26.8110 - val_mean_absolute_error: 27.4921 - val_acc: 0.0174
Epoch 4/100
 - 1s - loss: 25.1559 - mean_absolute_error: 25.8355 - acc: 0.0171 - val_loss: 22.7777 - val_mean_absolute_error: 23.4502 - val_acc: 0.0301
Epoch 5/100
 - 1s - loss: 21.9286 - mean_absolute_error: 22.6060 - acc: 0.0191 - val_loss: 19.7534 - val_mean_absolute_error: 20.4265 - val_acc: 0.0265
Epoch 6/100
 - 1s - loss: 20.7394 - mean_absolute_error: 21.4160 - acc: 0.0205 - val_loss: 19.9374 - val_mean_absolute_error: 20.6147 - val_acc: 0.0164
Epoch 7/100
 - 1s - loss: 20.4354 - mean







 29%|██▊       | 4/14 [04:19<09:55, 59.54s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 4s - loss: 2550.8366 - mean_absolute_error: 37.5879 - acc: 0.0100 - val_loss: 1459.6878 - val_mean_absolute_error: 27.6258 - val_acc: 0.0183
Epoch 2/100
 - 3s - loss: 1267.0654 - mean_absolute_error: 25.9337 - acc: 0.0145 - val_loss: 1031.2172 - val_mean_absolute_error: 22.8312 - val_acc: 0.0219
Epoch 3/100
 - 3s - loss: 1078.2683 - mean_absolute_error: 23.3824 - acc: 0.0166 - val_loss: 991.5731 - val_mean_absolute_error: 20.1078 - val_acc: 0.0192
Epoch 4/100
 - 3s - loss: 993.6076 - mean_absolute_error: 22.1335 - acc: 0.0165 - val_loss: 811.6858 - val_mean_absolute_error: 18.8421 - val_acc: 0.0365
Epoch 5/100
 - 3s - loss: 923.4182 - mean_absolute_error: 21.2710 - acc: 0.0190 - val_loss: 827.4164 - val_mean_absolute_error: 20.0649 - val_acc: 0.0237
Epoch 6/100
 - 3s - loss: 916.7382 - mean_absolute_error: 21.1946 - acc: 0.0192 - val_loss: 868.2907 - val_mean_absolute_error: 18.8266 - val_acc: 0.0311
Epoch 7/100
 - 3s - los







 36%|███▌      | 5/14 [07:08<13:51, 92.42s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 5423.0853 - mean_absolute_error: 57.2193 - acc: 0.0067 - val_loss: 3323.9872 - val_mean_absolute_error: 46.9356 - val_acc: 0.0037
Epoch 2/100
 - 1s - loss: 3014.4362 - mean_absolute_error: 43.7327 - acc: 0.0074 - val_loss: 1889.2676 - val_mean_absolute_error: 33.4698 - val_acc: 0.0082
Epoch 3/100
 - 1s - loss: 1727.1825 - mean_absolute_error: 31.7069 - acc: 0.0096 - val_loss: 1786.5114 - val_mean_absolute_error: 31.0370 - val_acc: 0.0128
Epoch 4/100
 - 1s - loss: 1662.7051 - mean_absolute_error: 30.5325 - acc: 0.0122 - val_loss: 1565.2011 - val_mean_absolute_error: 30.1032 - val_acc: 0.0137
Epoch 5/100
 - 1s - loss: 1612.3911 - mean_absolute_error: 30.0098 - acc: 0.0105 - val_loss: 1607.6742 - val_mean_absolute_error: 31.4261 - val_acc: 0.0128
Epoch 6/100
 - 1s - loss: 1484.0895 - mean_absolute_error: 28.4611 - acc: 0.0146 - val_loss: 1452.6280 - val_mean_absolute_error: 28.1386 - val_acc: 0.0155
Epoch 7/100
 - 1







 43%|████▎     | 6/14 [07:57<10:35, 79.38s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 5s - loss: 33.7303 - mean_absolute_error: 34.4128 - acc: 0.0136 - val_loss: 26.4352 - val_mean_absolute_error: 27.1114 - val_acc: 0.0201
Epoch 2/100
 - 4s - loss: 20.9678 - mean_absolute_error: 21.6431 - acc: 0.0200 - val_loss: 17.2068 - val_mean_absolute_error: 17.8746 - val_acc: 0.0274
Epoch 3/100
 - 4s - loss: 19.0880 - mean_absolute_error: 19.7596 - acc: 0.0246 - val_loss: 20.1138 - val_mean_absolute_error: 20.7955 - val_acc: 0.0128
Epoch 4/100
 - 4s - loss: 17.8416 - mean_absolute_error: 18.5108 - acc: 0.0282 - val_loss: 18.2321 - val_mean_absolute_error: 18.9009 - val_acc: 0.0292
Epoch 5/100
 - 4s - loss: 17.2218 - mean_absolute_error: 17.8889 - acc: 0.0332 - val_loss: 15.3383 - val_mean_absolute_error: 15.9951 - val_acc: 0.0411
Epoch 6/100
 - 4s - loss: 16.8338 - mean_absolute_error: 17.4995 - acc: 0.0329 - val_loss: 17.0643 - val_mean_absolute_error: 17.7251 - val_acc: 0.0429
Epoch 7/100
 - 4s - loss: 16.7006 - mean







 50%|█████     | 7/14 [10:54<12:39, 108.54s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 5s - loss: 32.3412 - mean_absolute_error: 33.0238 - acc: 0.0126 - val_loss: 27.9532 - val_mean_absolute_error: 28.6388 - val_acc: 0.0091
Epoch 2/100
 - 4s - loss: 25.8838 - mean_absolute_error: 26.5627 - acc: 0.0173 - val_loss: 26.4896 - val_mean_absolute_error: 27.1662 - val_acc: 0.0183
Epoch 3/100
 - 4s - loss: 24.0090 - mean_absolute_error: 24.6846 - acc: 0.0221 - val_loss: 23.6924 - val_mean_absolute_error: 24.3631 - val_acc: 0.0274
Epoch 4/100
 - 4s - loss: 22.4104 - mean_absolute_error: 23.0839 - acc: 0.0239 - val_loss: 23.7870 - val_mean_absolute_error: 24.4565 - val_acc: 0.0329
Epoch 5/100
 - 4s - loss: 21.3997 - mean_absolute_error: 22.0723 - acc: 0.0235 - val_loss: 25.9795 - val_mean_absolute_error: 26.6583 - val_acc: 0.0192
Epoch 6/100
 - 4s - loss: 20.5863 - mean_absolute_error: 21.2553 - acc: 0.0310 - val_loss: 18.3523 - val_mean_absolute_error: 19.0185 - val_acc: 0.0356
Epoch 7/100
 - 4s - loss: 19.4420 - mean







 57%|█████▋    | 8/14 [12:30<10:29, 104.87s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 76.4578 - mean_absolute_error: 77.1466 - acc: 0.0058 - val_loss: 48.6985 - val_mean_absolute_error: 49.3864 - val_acc: 0.0073
Epoch 2/100
 - 1s - loss: 48.7415 - mean_absolute_error: 49.4294 - acc: 0.0062 - val_loss: 45.6553 - val_mean_absolute_error: 46.3448 - val_acc: 0.0027
Epoch 3/100
 - 1s - loss: 43.2697 - mean_absolute_error: 43.9579 - acc: 0.0061 - val_loss: 40.4388 - val_mean_absolute_error: 41.1284 - val_acc: 0.0018
Epoch 4/100
 - 1s - loss: 36.1430 - mean_absolute_error: 36.8294 - acc: 0.0079 - val_loss: 31.0749 - val_mean_absolute_error: 31.7599 - val_acc: 0.0128
Epoch 5/100
 - 1s - loss: 30.4914 - mean_absolute_error: 31.1727 - acc: 0.0139 - val_loss: 29.4919 - val_mean_absolute_error: 30.1737 - val_acc: 0.0128
Epoch 6/100
 - 1s - loss: 28.8049 - mean_absolute_error: 29.4855 - acc: 0.0155 - val_loss: 28.5284 - val_mean_absolute_error: 29.2115 - val_acc: 0.0110
Epoch 7/100
 - 1s - loss: 27.8988 - mean







 64%|██████▍   | 9/14 [14:02<08:25, 101.02s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 5s - loss: 38.3435 - mean_absolute_error: 39.0274 - acc: 0.0103 - val_loss: 22.9558 - val_mean_absolute_error: 23.6337 - val_acc: 0.0183
Epoch 2/100
 - 4s - loss: 21.6554 - mean_absolute_error: 22.3321 - acc: 0.0195 - val_loss: 19.1390 - val_mean_absolute_error: 19.8132 - val_acc: 0.0183
Epoch 3/100
 - 5s - loss: 19.9785 - mean_absolute_error: 20.6512 - acc: 0.0252 - val_loss: 18.9097 - val_mean_absolute_error: 19.5715 - val_acc: 0.0356
Epoch 4/100
 - 4s - loss: 19.5903 - mean_absolute_error: 20.2617 - acc: 0.0283 - val_loss: 19.2206 - val_mean_absolute_error: 19.8874 - val_acc: 0.0301
Epoch 5/100
 - 4s - loss: 18.9932 - mean_absolute_error: 19.6620 - acc: 0.0295 - val_loss: 16.9952 - val_mean_absolute_error: 17.6539 - val_acc: 0.0493
Epoch 6/100
 - 4s - loss: 18.9467 - mean_absolute_error: 19.6156 - acc: 0.0315 - val_loss: 18.0129 - val_mean_absolute_error: 18.6886 - val_acc: 0.0164
Epoch 7/100
 - 4s - loss: 18.2582 - mean







 71%|███████▏  | 10/14 [16:28<07:37, 114.44s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 3s - loss: 42.1929 - mean_absolute_error: 42.8793 - acc: 0.0079 - val_loss: 29.3670 - val_mean_absolute_error: 30.0414 - val_acc: 0.0256
Epoch 2/100
 - 3s - loss: 26.0959 - mean_absolute_error: 26.7748 - acc: 0.0174 - val_loss: 24.9670 - val_mean_absolute_error: 25.6448 - val_acc: 0.0192
Epoch 3/100
 - 3s - loss: 23.8084 - mean_absolute_error: 24.4854 - acc: 0.0191 - val_loss: 22.3374 - val_mean_absolute_error: 23.0127 - val_acc: 0.0174
Epoch 4/100
 - 3s - loss: 21.9160 - mean_absolute_error: 22.5900 - acc: 0.0248 - val_loss: 20.0478 - val_mean_absolute_error: 20.7213 - val_acc: 0.0228
Epoch 5/100
 - 3s - loss: 20.6582 - mean_absolute_error: 21.3324 - acc: 0.0242 - val_loss: 18.7442 - val_mean_absolute_error: 19.4156 - val_acc: 0.0265
Epoch 6/100
 - 3s - loss: 19.5958 - mean_absolute_error: 20.2698 - acc: 0.0219 - val_loss: 17.6698 - val_mean_absolute_error: 18.3397 - val_acc: 0.0256
Epoch 7/100
 - 3s - loss: 19.2082 - mean







 79%|███████▊  | 11/14 [18:30<05:50, 116.68s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 5s - loss: 3320.9431 - mean_absolute_error: 45.6662 - acc: 0.0070 - val_loss: 1745.0690 - val_mean_absolute_error: 33.0654 - val_acc: 0.0100
Epoch 2/100
 - 4s - loss: 1403.9024 - mean_absolute_error: 27.4556 - acc: 0.0131 - val_loss: 1265.9291 - val_mean_absolute_error: 26.1027 - val_acc: 0.0228
Epoch 3/100
 - 4s - loss: 1149.2722 - mean_absolute_error: 24.2596 - acc: 0.0182 - val_loss: 1033.6536 - val_mean_absolute_error: 23.0775 - val_acc: 0.0183
Epoch 4/100
 - 4s - loss: 1014.4106 - mean_absolute_error: 22.6845 - acc: 0.0181 - val_loss: 922.3241 - val_mean_absolute_error: 22.5267 - val_acc: 0.0073
Epoch 5/100
 - 4s - loss: 937.9676 - mean_absolute_error: 21.6859 - acc: 0.0163 - val_loss: 792.7070 - val_mean_absolute_error: 19.8361 - val_acc: 0.0210
Epoch 6/100
 - 4s - loss: 909.5128 - mean_absolute_error: 21.3494 - acc: 0.0175 - val_loss: 767.6908 - val_mean_absolute_error: 18.8186 - val_acc: 0.0183
Epoch 7/100
 - 4s - l







 86%|████████▌ | 12/14 [21:44<04:39, 139.95s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 9285.5708 - mean_absolute_error: 77.8568 - acc: 0.0066 - val_loss: 7349.3252 - val_mean_absolute_error: 65.9848 - val_acc: 0.0073
Epoch 2/100
 - 1s - loss: 5912.7544 - mean_absolute_error: 58.6832 - acc: 0.0061 - val_loss: 4337.6644 - val_mean_absolute_error: 49.4292 - val_acc: 0.0073
Epoch 3/100
 - 1s - loss: 3746.2234 - mean_absolute_error: 47.5741 - acc: 0.0066 - val_loss: 3373.6891 - val_mean_absolute_error: 46.3824 - val_acc: 0.0046
Epoch 4/100
 - 1s - loss: 3323.7240 - mean_absolute_error: 46.3179 - acc: 0.0049 - val_loss: 3344.4208 - val_mean_absolute_error: 46.9727 - val_acc: 0.0037
Epoch 5/100
 - 1s - loss: 3312.9978 - mean_absolute_error: 46.4251 - acc: 0.0060 - val_loss: 3343.4130 - val_mean_absolute_error: 46.9299 - val_acc: 0.0037
Epoch 6/100
 - 1s - loss: 3312.9648 - mean_absolute_error: 46.3851 - acc: 0.0070 - val_loss: 3342.1685 - val_mean_absolute_error: 46.8436 - val_acc: 0.0046
Epoch 7/100
 - 1







 93%|█████████▎| 13/14 [22:07<01:44, 104.97s/it][A[A[A[A[A[A

Train on 9846 samples, validate on 1095 samples
Epoch 1/100
 - 2s - loss: 4748.7363 - mean_absolute_error: 53.2382 - acc: 0.0056 - val_loss: 2015.8236 - val_mean_absolute_error: 31.8381 - val_acc: 0.0073
Epoch 2/100
 - 1s - loss: 1544.5216 - mean_absolute_error: 29.4831 - acc: 0.0128 - val_loss: 1484.5902 - val_mean_absolute_error: 29.1318 - val_acc: 0.0110
Epoch 3/100
 - 1s - loss: 1407.5442 - mean_absolute_error: 27.9123 - acc: 0.0152 - val_loss: 1492.4281 - val_mean_absolute_error: 28.2404 - val_acc: 0.0146
Epoch 4/100
 - 1s - loss: 1379.0758 - mean_absolute_error: 27.4990 - acc: 0.0172 - val_loss: 1382.1009 - val_mean_absolute_error: 27.5546 - val_acc: 0.0201
Epoch 5/100
 - 1s - loss: 1303.9067 - mean_absolute_error: 26.4459 - acc: 0.0156 - val_loss: 1328.7662 - val_mean_absolute_error: 26.4914 - val_acc: 0.0201
Epoch 6/100
 - 1s - loss: 1271.1697 - mean_absolute_error: 25.8803 - acc: 0.0159 - val_loss: 1310.4832 - val_mean_absolute_error: 25.7487 - val_acc: 0.0201
Epoch 7/100
 - 1







100%|██████████| 14/14 [23:05<00:00, 90.78s/it] [A[A[A[A[A[A





[A[A[A[A[A[A

TESTING RESULTS

In [None]:
p=ta.Predict(scan_object)
e=ta.Evaluate(scan_object)

In [None]:
r=ta.Reporting('030519215433_.csv')


In [101]:
r.best_params()

array([[13.661055597975919, 15.443959118170685, 'logcosh', 'Nadam', 3,
        'tanh', 32, 0],
       [12.94378938805567, 15.045963096928599, 'logcosh', 'Adam', 5,
        'tanh', 32, 1],
       [12.98980499163066, 14.795931387790306, 'logcosh', 'Nadam', 3,
        'tanh', 16, 2],
       [15.14410452472565, 16.938595340766724, 'logcosh', 'Adam', 3,
        'relu', 128, 3],
       [14.235895648067947, 16.23408677222984, 'logcosh', 'Nadam', 5,
        'elu', 16, 4],
       [11.702946186501142, 13.406005870998086, 'mean_squared_error',
        'Adam', 5, 'elu', 32, 5],
       [15.102110302720439, 16.04134943483807, 'logcosh', 'Adam', 10,
        'relu', 16, 6],
       [17.61267311061354, 17.855643389599365, 'logcosh', 'Adam', 10,
        'relu', 512, 7],
       [10.2986853185854, 12.249598861685035, 'mean_squared_error',
        'Adam', 5, 'tanh', 16, 8],
       [16.468773960305132, 17.977037238416067, 'mean_squared_error',
        'Adam', 3, 'tanh', 128, 9]], dtype=object)

In [None]:
def cnn1d_val(pred,eva,test_df,sequence_length):
    sequence_cols = test_df.columns.difference(['id','cycle','RUL'])
    # We pick the last sequence for each id in the test data
    seq_array_test_last = [[test_df[test_df['id']==id][sequence_cols].values[-sequence_length:]]
                           for id in test_df['id'].unique() if test_df[test_df['id']==id]['cycle'].max()>sequence_length]

    seq_array_test_last = np.concatenate(seq_array_test_last).astype(np.float32)

    smp=seq_array_test_last.shape[0]
    n=seq_array_test_last.shape[2]

    y_mask = [test_df[test_df['id']==id]['cycle'].max()>sequence_length for id in test_df['id'].unique()]
    label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values
    label_array_test_last = label_array_test_last.reshape(smp,1).astype(np.float32)
    
    scores_test = eva.evaluate(seq_array_test_last, label_array_test_last)
    
    y_pred_test = pred.predict(seq_array_test_last)
    y_true_test = label_array_test_last
    
    sf = scoring_function(y_true_test,y_pred_test)
    rmse_err = rmse(y_true_test,y_pred_test)
    m_err = merror(y_true_test,y_pred_test)
    
    print(m_err,sf,rmse_err)

In [None]:
def cnn2d_val(pred,eva,test_df,sequence_length):
    sequence_cols = test_df.columns.difference(['id','cycle','RUL'])
    # We pick the last sequence for each id in the test data
    seq_array_test_last = [[test_df[test_df['id']==id][sequence_cols].values[-sequence_length:]]
                           for id in test_df['id'].unique() if test_df[test_df['id']==id]['cycle'].max()>sequence_length]

    seq_array_test_last = np.concatenate(seq_array_test_last).astype(np.float32)

    smp=seq_array_test_last.shape[0]
    n=seq_array_test_last.shape[2]
    
    seq_array_test_last = seq_array_test_last.reshape(smp, sequence_length, n, 1)

    y_mask = [test_df[test_df['id']==id]['cycle'].max()>sequence_length for id in test_df['id'].unique()]
    label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values
    label_array_test_last = label_array_test_last.reshape(smp,1).astype(np.float32)
    
    scores_test = eva.evaluate(seq_array_test_last, label_array_test_last)
    
    y_pred_test = pred.predict(seq_array_test_last)
    y_true_test = label_array_test_last
    
    sf = scoring_function(y_true_test,y_pred_test)
    rmse_err = rmse(y_true_test,y_pred_test)
    m_err = merror(y_true_test,y_pred_test)
    
    print(m_err,sf,rmse_err)

In [102]:
cnn1d_val(p,e,test_df,sequence_length)

11.423786992612092 [1153.0573] [18.002577]


In [None]:
print(run_time)

300.3336408138275
