<a href="https://colab.research.google.com/github/arthursl12/POC1/blob/main/POC2_FD001_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
%pip install scikeras;
%pip install -U tensorflow-addons;

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikeras
  Downloading scikeras-0.9.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.9.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-addons
  Downloading tensorflow_addons-0.18.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 29.8 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.18.0


In [2]:
import os
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import datetime

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.compose import TransformedTargetRegressor

from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [4]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

from tensorflow.keras.metrics import RootMeanSquaredError as RMSE

from keras.callbacks import LambdaCallback
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.optimizers.schedules import ExponentialDecay

import tensorflow_addons as tfa
from tensorflow_addons.metrics import RSquare as R2



from scikeras.wrappers import KerasRegressor

In [5]:
sns.set_palette('colorblind')

In [6]:
# Reproducibility
seed = 42
os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [7]:
tf.config.experimental.enable_op_determinism()

# Data Preparation

In [8]:
# Dataset Download 
os.system('git clone https://github.com/arthursl12/dataset_2')
os.system('mv /content/dataset_2/CMaps /content/CMaps')
os.system('mv /content/dataset_2/data_processing /content/data_processing')
os.system('rm -rf dataset_2')

0

In [9]:
from data_processing.processing import DatasetProcessing
from data_processing.training import HyperparameterSearch, reclipper_scorer
from data_processing.eval import Evaluation

In [10]:
proc = DatasetProcessing()

## Data Integration

The data are provided as a zip-compressed text file with 26 columns of numbers, separated by spaces. Each row is a snapshot of data taken during a single operational cycle, each column is a different variable. The columns correspond to:  

1) unit number   
2) time, in cycles  
3) operational setting 1  
4) operational setting 2  
5) operational setting 3    
6) sensor measurement 1    
7) sensor measurement 2  
...  
26) sensor measurement 20


There are 6 conditions (or combinations) which the 3 operational settings can take.  
Condition 1: Altitude = 0, Mach Number = 0, TRA = 100  
Condition 2: Altitude = 10, Mach Number = 0.25, TRA = 100  
Condition 3: Altitude = 20, Mach Number = 0.7 TRA = 100  
Condition 4: Altitude = 25, Mach Number = 0.62, TRA = 60  
Condition 5: Altitude = 35 Mach Number = 0.84, TRA = 100  
Condition 6: Altitude = 42, Mach Number = 0.84, TRA = 100  
  
There is slight variation in all these conditions so you may get numbers like 24.453 instead of 25 exactly.

FD001: Condition 1 only  
FD002: Mix of all the conditions  
FD003: Condition 1 only  
FD004: Mix of all conditions  


In [11]:
index_cols, settings_cols, sensors_cols, cols = proc.column_names()
train, test, y_test = proc.read_dataset(1)
train

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,100.0,518.67,643.49,1597.98,1428.63,14.62,...,519.49,2388.26,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735
20627,100,197,-0.0016,-0.0005,100.0,518.67,643.54,1604.50,1433.58,14.62,...,519.68,2388.22,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594
20628,100,198,0.0004,0.0000,100.0,518.67,643.42,1602.46,1428.18,14.62,...,520.01,2388.24,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333
20629,100,199,-0.0011,0.0003,100.0,518.67,643.23,1605.26,1426.53,14.62,...,519.67,2388.23,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640


## Preprocessing

### Validation Set

In [12]:
from sklearn.model_selection import GroupShuffleSplit

def train_val_split(train):
    gss = GroupShuffleSplit(n_splits=1, train_size=0.80, random_state=42)  
    for idx_train, idx_val in gss.split(train,groups=train["unit_number"]):
        # print('train_split_engines', train.iloc[idx_train]['unit_number'].unique(), '\n')
        # print('validate_split_engines', train.iloc[idx_val]['unit_number'].unique(), '\n')

        df_train = train.iloc[idx_train].copy()
        df_val = train.iloc[idx_val].copy()

    return df_train, df_val

In [13]:
df_train, df_val = train_val_split(train)

In [14]:
X_t_train, y_t_train = proc.X_y_train_divide(df_train)
X_t_val, y_t_val = proc.X_y_train_divide(df_val)

### Test Set Transformation 
Test set has samples for all cycles, but has annotations only for last one

In [15]:
test.shape, y_test.shape

((13096, 26), (100, 1))

In [16]:
test_last = proc.transform_test(test)
test_last.head()

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,518.67,642.58,1581.22,1398.91,14.62,21.61,554.42,2388.08,9056.4,1.3,...,521.79,2388.06,8130.11,8.4024,0.03,393,2388,100.0,38.81,23.3552
1,518.67,642.55,1586.59,1410.83,14.62,21.61,553.52,2388.1,9044.77,1.3,...,521.74,2388.09,8126.9,8.4505,0.03,391,2388,100.0,38.81,23.2618
2,518.67,642.88,1589.75,1418.89,14.62,21.61,552.59,2388.16,9049.26,1.3,...,520.83,2388.14,8131.46,8.4119,0.03,395,2388,100.0,38.93,23.274
3,518.67,642.78,1594.53,1406.88,14.62,21.61,552.64,2388.13,9051.3,1.3,...,521.88,2388.11,8133.64,8.4634,0.03,395,2388,100.0,38.58,23.2581
4,518.67,642.27,1589.94,1419.36,14.62,21.61,553.29,2388.1,9053.99,1.3,...,521.0,2388.15,8125.74,8.4362,0.03,394,2388,100.0,38.75,23.4117


In [17]:
X_test = test_last

### Remaining Useful Life (RUL)

In [18]:
train = proc.add_remaining_useful_life_linear(train)
train[index_cols+['RUL']].head()

Unnamed: 0,unit_number,time,RUL
0,1,1,191
1,1,2,190
2,1,3,189
3,1,4,188
4,1,5,187


## Attributes and target separation

In [19]:
X_train, y_train = proc.X_y_train_divide(train)

In [20]:
y_train.head()

Unnamed: 0,RUL
0,191
1,190
2,189
3,188
4,187


In [21]:
X_train.head()

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,518.67,641.82,1589.7,1400.6,14.62,21.61,554.36,2388.06,9046.19,1.3,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,2388.04,9044.07,1.3,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,518.67,642.35,1587.99,1404.2,14.62,21.61,554.26,2388.08,9052.94,1.3,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,2388.11,9049.48,1.3,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,518.67,642.37,1582.85,1406.22,14.62,21.61,554.0,2388.06,9055.15,1.3,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


## Training and Evaluation functions

In [22]:
eval = Evaluation()

In [23]:
search = HyperparameterSearch()

# MLP Construction

## Callbacks

In [None]:
# Early Stopping Callback
es = tf.keras.callbacks.EarlyStopping(monitor='loss', 
                                      patience=5, restore_best_weights=True)

In [None]:
# Printing Callback
def printLog(epoch, logs):
    print(
        f"E {epoch}\t: loss={logs['loss']:.3f}, "+
        f"rmse={logs['root_mean_squared_error']:.3f}, "+
        f"r2={logs['r_square']:.3f}; "+
        f"v_loss={logs['val_loss']:.3f}, "+
        f"v_rmse={logs['val_root_mean_squared_error']:.3f}, "+
        f"v_r2={logs['val_r_square']:.3f}; "
    )

printerCallback = LambdaCallback(on_epoch_end=printLog)

## Wrapper

In [120]:
from sklearn.base import BaseEstimator,RegressorMixin

class LSTMWrapperRegressor(BaseEstimator,RegressorMixin):
    def __init__(self, basemodel=None, clip_y=50, scaler=StandardScaler(), seq_length=40):
        self.basemodel = basemodel
        self.clip_y = clip_y
        self.scaler = scaler
        self.seq_length = seq_length

    def fit(self, X=None, y=None):
        # Merge features and target again
        data = X.copy()
        data["RUL"] = y

        # Transform into time series 
        data[settings_cols+sensors_cols] = self.scaler.fit_transform(data[settings_cols+sensors_cols])
        # print(data)
        print(">> Wrapping")
        X_train = self.gen_X_wrapper(data,self.seq_length,SEQ_COLS)
        data2 = data.copy()
        data2["RUL"].clip(upper=self.clip_y, inplace=True)
        y_train = self.gen_y_wrapper(data2,self.seq_length,["RUL"])

        global INPUT_SHAPE
        INPUT_SHAPE = (X_train.shape[1],X_train.shape[2])

        self.basemodel.fit(X_train,y_train)

        return self
        
    def predict(self, X=None):
        X_train = self.gen_X_wrapper(X,self.seq_length,SEQ_COLS)
        return self.basemodel.predict(X_train)

    def gen_X_data(self, df, sequence_length, columns):
        data = df[columns].values
        num_elements = data.shape[0]

        # -1 and +1 because of Python indexing
        for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
            yield data[start:stop, :]

    def gen_X_wrapper(self, df, sequence_length, columns, unit_nrs=np.array([]), idx_col="unit_number"):
        if unit_nrs.size <= 0:
            unit_nrs = df[idx_col].unique()
            
        data_gen = (list(self.gen_X_data(df[df[idx_col]==unit_nr], sequence_length, columns))
                for unit_nr in unit_nrs)
        data_array = np.concatenate(list(data_gen)).astype(np.float32)
        return data_array

    def gen_y(self, df, sequence_length, label):
        data_matrix = df[label].values
        num_elements = data_matrix.shape[0]

        # -1 because I want to predict the rul of that last row in the sequence, not the next row
        return data_matrix[sequence_length-1:num_elements, :]  

    def gen_y_wrapper(self, df, sequence_length, label, unit_nrs=np.array([]), idx_col="unit_number"):
        if unit_nrs.size <= 0:
            unit_nrs = df[idx_col].unique()
            
        label_gen = [self.gen_y(df[df[idx_col]==unit_nr], sequence_length, label) 
                    for unit_nr in unit_nrs]
        label_array = np.concatenate(label_gen).astype(np.float32)
        return label_array

In [86]:
index_cols, settings_cols, sensors_cols, cols
seq_cols = [index_cols[1]]+settings_cols+sensors_cols
SEQ_COLS = seq_cols

In [123]:
train3 = train.copy()

# Including indices as well
X_train_ = train3.drop(columns=["RUL"])

In [121]:
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, TimeDistributed

def create_model():
    print(">> Model Creation")
    model = Sequential()
    model.add(Masking(mask_value=-99., input_shape=INPUT_SHAPE))
    model.add(LSTM(32, activation='tanh'))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
model = LSTMWrapperRegressor(
        clip_y=105,
        seq_length=45,
        basemodel=
            KerasRegressor(model=create_model,
                           validation_split=0.1, 
                           epochs=10, 
                           batch_size=16,
                           )
    )
model.fit(X_train_, y_train)

>> Wrapping
>> Model Creation
Epoch 1/10


## Test Data

In [126]:
def gen_test_data(df, sequence_length, columns, mask_value):
    if df.shape[0] < sequence_length:
        data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value) # pad
        idx = data_matrix.shape[0] - df.shape[0]
        data_matrix[idx:,:] = df[columns].values  # fill with available data
    else:
        data_matrix = df[columns].values
        
    # specifically yield the last possible sequence
    stop = num_elements = data_matrix.shape[0]
    start = stop - sequence_length
    for i in list(range(1)):
        yield data_matrix[start:stop, :]

# OFF

In [99]:
train3 = train.copy()

X_train_ = train3.drop(columns=["RUL"])
X_train_

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,100.0,518.67,643.49,1597.98,1428.63,14.62,...,519.49,2388.26,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735
20627,100,197,-0.0016,-0.0005,100.0,518.67,643.54,1604.50,1433.58,14.62,...,519.68,2388.22,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594
20628,100,198,0.0004,0.0000,100.0,518.67,643.42,1602.46,1428.18,14.62,...,520.01,2388.24,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333
20629,100,199,-0.0011,0.0003,100.0,518.67,643.23,1605.26,1426.53,14.62,...,519.67,2388.23,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640


In [97]:
train3 = train.copy()

X_train_ = train3.drop(columns=["RUL"])
pd.DataFrame(model.fit_transform(X_train_, y_train))

KeyError: ignored

In [75]:
model.get_feature_names_out()

array(['__op_1', '__op_2', '__op_3', '__s_0', '__s_1', '__s_2', '__s_3',
       '__s_4', '__s_5', '__s_6', '__s_7', '__s_8', '__s_9', '__s_10',
       '__s_11', '__s_12', '__s_13', '__s_14', '__s_15', '__s_16',
       '__s_17', '__s_18', '__s_19', '__s_20', 'remainder__time'],
      dtype=object)

In [58]:
train3

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL
0,1,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.058890,-0.269071,-0.603816,-1.387779e-17,-0.781710,0.0,0.0,1.348493,1.194427,191
1,1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.781710,0.0,0.0,1.016528,1.236922,190
2,1,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.781710,0.0,0.0,0.352598,0.777792,188
4,1,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,-1.776357e-15,...,2.278282,-0.322542,1.425294,-1.387779e-17,2.446751,0.0,0.0,-1.805173,-2.921113,4
20627,100,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,-1.776357e-15,...,1.722087,-0.380207,1.913240,-1.387779e-17,1.155367,0.0,0.0,-2.856395,-1.203764,3
20628,100,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,-1.776357e-15,...,2.000184,-0.141684,3.265092,-1.387779e-17,3.092444,0.0,0.0,-2.081810,-3.292481,2
20629,100,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,-1.776357e-15,...,1.861136,-0.233948,2.579834,-1.387779e-17,1.155367,0.0,0.0,-2.911722,-2.085072,1


In [65]:
model.get_feature_names_out()

array(['num_transformer__time', 'num_transformer__op_1',
       'num_transformer__op_2', 'num_transformer__op_3',
       'num_transformer__s_0', 'num_transformer__s_1',
       'num_transformer__s_2', 'num_transformer__s_3',
       'num_transformer__s_4', 'num_transformer__s_5',
       'num_transformer__s_6', 'num_transformer__s_7',
       'num_transformer__s_8', 'num_transformer__s_9',
       'num_transformer__s_10', 'num_transformer__s_11',
       'num_transformer__s_12', 'num_transformer__s_13',
       'num_transformer__s_14', 'num_transformer__s_15',
       'num_transformer__s_16', 'num_transformer__s_17',
       'num_transformer__s_18', 'num_transformer__s_19',
       'num_transformer__s_20'], dtype=object)

In [68]:
train3

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL
0,1,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.058890,-0.269071,-0.603816,-1.387779e-17,-0.781710,0.0,0.0,1.348493,1.194427,191
1,1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.781710,0.0,0.0,1.016528,1.236922,190
2,1,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.781710,0.0,0.0,0.352598,0.777792,188
4,1,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,-1.776357e-15,...,2.278282,-0.322542,1.425294,-1.387779e-17,2.446751,0.0,0.0,-1.805173,-2.921113,4
20627,100,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,-1.776357e-15,...,1.722087,-0.380207,1.913240,-1.387779e-17,1.155367,0.0,0.0,-2.856395,-1.203764,3
20628,100,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,-1.776357e-15,...,2.000184,-0.141684,3.265092,-1.387779e-17,3.092444,0.0,0.0,-2.081810,-3.292481,2
20629,100,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,-1.776357e-15,...,1.861136,-0.233948,2.579834,-1.387779e-17,1.155367,0.0,0.0,-2.911722,-2.085072,1


In [66]:
pd.DataFrame(model.fit_transform(X_train_, y_train))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,0.0,0.141683,...,0.334262,-1.058890,-0.269071,-0.603816,0.0,-0.781710,0.0,0.0,1.348493,1.194427
1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,0.0,0.141683,...,1.174899,-0.363646,-0.642845,-0.275852,0.0,-0.781710,0.0,0.0,1.016528,1.236922
2,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,0.0,0.141683,...,1.364721,-0.919841,-0.551629,-0.649144,0.0,-2.073094,0.0,0.0,0.739891,0.503423
3,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,0.0,0.141683,...,1.961302,-0.224597,-0.520176,-1.971665,0.0,-0.781710,0.0,0.0,0.352598,0.777792
4,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,0.0,0.141683,...,1.052871,-0.780793,-0.521748,-0.339845,0.0,-0.136018,0.0,0.0,0.463253,1.059552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,0.0,0.141683,...,-2.607969,2.278282,-0.322542,1.425294,0.0,2.446751,0.0,0.0,-1.805173,-2.921113
20627,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,0.0,0.141683,...,-2.350355,1.722087,-0.380207,1.913240,0.0,1.155367,0.0,0.0,-2.856395,-1.203764
20628,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,0.0,0.141683,...,-1.902919,2.000184,-0.141684,3.265092,0.0,3.092444,0.0,0.0,-2.081810,-3.292481
20629,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,0.0,0.141683,...,-2.363913,1.861136,-0.233948,2.579834,0.0,1.155367,0.0,0.0,-2.911722,-2.085072


In [52]:
train3

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL
0,1,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.058890,-0.269071,-0.603816,-1.387779e-17,-0.781710,0.0,0.0,1.348493,1.194427,191
1,1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.781710,0.0,0.0,1.016528,1.236922,190
2,1,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.781710,0.0,0.0,0.352598,0.777792,188
4,1,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,-1.776357e-15,...,2.278282,-0.322542,1.425294,-1.387779e-17,2.446751,0.0,0.0,-1.805173,-2.921113,4
20627,100,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,-1.776357e-15,...,1.722087,-0.380207,1.913240,-1.387779e-17,1.155367,0.0,0.0,-2.856395,-1.203764,3
20628,100,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,-1.776357e-15,...,2.000184,-0.141684,3.265092,-1.387779e-17,3.092444,0.0,0.0,-2.081810,-3.292481,2
20629,100,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,-1.776357e-15,...,1.861136,-0.233948,2.579834,-1.387779e-17,1.155367,0.0,0.0,-2.911722,-2.085072,1


In [61]:
sc.fit_transform(X_train, y_train).shape

(20631, 21)

In [46]:
train3 = train.copy()

# SCALING
sc = StandardScaler()
train3[SEQ_COLS] = sc.fit_transform(train3[SEQ_COLS])

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL
0,1,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.058890,-0.269071,-0.603816,-1.387779e-17,-0.781710,0.0,0.0,1.348493,1.194427,191
1,1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.781710,0.0,0.0,1.016528,1.236922,190
2,1,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.781710,0.0,0.0,0.352598,0.777792,188
4,1,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,-1.776357e-15,...,2.278282,-0.322542,1.425294,-1.387779e-17,2.446751,0.0,0.0,-1.805173,-2.921113,4
20627,100,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,-1.776357e-15,...,1.722087,-0.380207,1.913240,-1.387779e-17,1.155367,0.0,0.0,-2.856395,-1.203764,3
20628,100,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,-1.776357e-15,...,2.000184,-0.141684,3.265092,-1.387779e-17,3.092444,0.0,0.0,-2.081810,-3.292481,2
20629,100,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,-1.776357e-15,...,1.861136,-0.233948,2.579834,-1.387779e-17,1.155367,0.0,0.0,-2.911722,-2.085072,1


In [63]:
X_train_ = train3[SEQ_COLS]

In [45]:
X2 = X_train.copy()
X2["RUL"] = y_train
X2["time"] = train["time"]
X2

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL,time
0,518.67,641.82,1589.70,1400.60,14.62,21.61,554.36,2388.06,9046.19,1.3,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,191,1
1,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,2388.04,9044.07,1.3,...,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,190,2
2,518.67,642.35,1587.99,1404.20,14.62,21.61,554.26,2388.08,9052.94,1.3,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189,3
3,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,2388.11,9049.48,1.3,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188,4
4,518.67,642.37,1582.85,1406.22,14.62,21.61,554.00,2388.06,9055.15,1.3,...,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,187,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,518.67,643.49,1597.98,1428.63,14.62,21.61,551.43,2388.19,9065.52,1.3,...,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735,4,196
20627,518.67,643.54,1604.50,1433.58,14.62,21.61,550.86,2388.23,9065.11,1.3,...,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594,3,197
20628,518.67,643.42,1602.46,1428.18,14.62,21.61,550.94,2388.24,9065.90,1.3,...,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333,2,198
20629,518.67,643.23,1605.26,1426.53,14.62,21.61,550.68,2388.25,9073.72,1.3,...,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640,1,199


In [40]:
X_train, y_train

(          s_0     s_1      s_2      s_3    s_4    s_5     s_6      s_7  \
 0      518.67  641.82  1589.70  1400.60  14.62  21.61  554.36  2388.06   
 1      518.67  642.15  1591.82  1403.14  14.62  21.61  553.75  2388.04   
 2      518.67  642.35  1587.99  1404.20  14.62  21.61  554.26  2388.08   
 3      518.67  642.35  1582.79  1401.87  14.62  21.61  554.45  2388.11   
 4      518.67  642.37  1582.85  1406.22  14.62  21.61  554.00  2388.06   
 ...       ...     ...      ...      ...    ...    ...     ...      ...   
 20626  518.67  643.49  1597.98  1428.63  14.62  21.61  551.43  2388.19   
 20627  518.67  643.54  1604.50  1433.58  14.62  21.61  550.86  2388.23   
 20628  518.67  643.42  1602.46  1428.18  14.62  21.61  550.94  2388.24   
 20629  518.67  643.23  1605.26  1426.53  14.62  21.61  550.68  2388.25   
 20630  518.67  643.85  1600.38  1432.14  14.62  21.61  550.79  2388.26   
 
            s_8  s_9  ...    s_11     s_12     s_13    s_14  s_15  s_16  s_17  \
 0      9046.19  

In [None]:
base = TransformedTargetRegressor(
        check_inverse=False,
        regressor   = KerasRegressor(
                                model=create_model,verbose=0, callbacks=[es], 
                                model__degree=1,
                                validation_split=0.2, 
                                model__metrics=[RMSE(), R2()],
                                model__loss='mse'),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':50})))

## TimeSeriesGeneration

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
def gen_X_data(df, sequence_length, columns):
    data = df[columns].values
    num_elements = data.shape[0]

    # -1 and +1 because of Python indexing
    for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
        yield data[start:stop, :]

def gen_X_wrapper(df, sequence_length, columns, unit_nrs=np.array([]), idx_col="unit_number"):
    if unit_nrs.size <= 0:
        unit_nrs = df[idx_col].unique()
        
    data_gen = (list(gen_X_data(df[df[idx_col]==unit_nr], sequence_length, columns))
               for unit_nr in unit_nrs)
    data_array = np.concatenate(list(data_gen)).astype(np.float32)
    return data_array

In [None]:
train3 = train.copy()
sc = StandardScaler()
train3[seq_cols] = sc.fit_transform(train3[seq_cols])
train3

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20,RUL
0,1,-1.565170,-0.315980,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.058890,-0.269071,-0.603816,-1.387779e-17,-0.781710,0.0,0.0,1.348493,1.194427,191
1,1,-1.550652,0.872722,-1.031720,0.0,0.0,-1.061780,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.781710,0.0,0.0,1.016528,1.236922,190
2,1,-1.536134,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,-1.521616,0.324090,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.781710,0.0,0.0,0.352598,0.777792,188
4,1,-1.507098,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,0.0,0.0,1.618000,1.216258,2.188375,-1.776357e-15,...,2.278282,-0.322542,1.425294,-1.387779e-17,2.446751,0.0,0.0,-1.805173,-2.921113,4
20627,100,1.280386,-0.727453,-1.714186,0.0,0.0,1.717992,2.279706,2.738351,-1.776357e-15,...,1.722087,-0.380207,1.913240,-1.387779e-17,1.155367,0.0,0.0,-2.856395,-1.203764,3
20628,100,1.294904,0.186933,-0.008022,0.0,0.0,1.478011,1.946971,2.138377,-1.776357e-15,...,2.000184,-0.141684,3.265092,-1.387779e-17,3.092444,0.0,0.0,-2.081810,-3.292481,2
20629,100,1.309423,-0.498857,1.015677,0.0,0.0,1.098043,2.403666,1.955051,-1.776357e-15,...,1.861136,-0.233948,2.579834,-1.387779e-17,1.155367,0.0,0.0,-2.911722,-2.085072,1


In [None]:
def gen_y(df, sequence_length, label):
    data_matrix = df[label].values
    num_elements = data_matrix.shape[0]

    # -1 because I want to predict the rul of that last row in the sequence, not the next row
    return data_matrix[sequence_length-1:num_elements, :]  

def gen_y_wrapper(df, sequence_length, label, unit_nrs=np.array([]), idx_col="unit_number"):
    if unit_nrs.size <= 0:
        unit_nrs = df[idx_col].unique()
        
    label_gen = [gen_y(df[df[idx_col]==unit_nr], sequence_length, label) 
                for unit_nr in unit_nrs]
    label_array = np.concatenate(label_gen).astype(np.float32)
    return label_array

In [None]:
def gen_test_data(df, sequence_length, columns, mask_value):
    if df.shape[0] < sequence_length:
        data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value) # pad
        idx = data_matrix.shape[0] - df.shape[0]
        data_matrix[idx:,:] = df[columns].values  # fill with available data
    else:
        data_matrix = df[columns].values
        
    # specifically yield the last possible sequence
    stop = num_elements = data_matrix.shape[0]
    start = stop - sequence_length
    for i in list(range(1)):
        yield data_matrix[start:stop, :]  

In [None]:
index_cols, settings_cols, sensors_cols, cols
seq_cols = [index_cols[1]]+settings_cols+sensors_cols
SEQ_COLS = seq_cols

In [None]:
SEQ_LENGTH = 50
SEQ_COLS = 

In [None]:
X_train = gen_X_wrapper(train3,SEQ_LENGTH,seq_cols)

In [None]:
INPUT_SHAPE = (X_train.shape[1],X_train.shape[2])
INPUT_SHAPE

(50, 25)

In [None]:
train2 = train.copy()
train2['RUL'].clip(upper=125, inplace=True)
y_train = gen_y_wrapper(train2,SEQ_LENGTH,["RUL"])

In [None]:
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, TimeDistributed

def create_model():
    model = Sequential()
    model.add(Masking(mask_value=-99., input_shape=INPUT_SHAPE))
    model.add(LSTM(32, activation='tanh'))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
model = Sequential()
model.add(Masking(mask_value=-99., input_shape=INPUT_SHAPE))
model.add(LSTM(32, activation='tanh'))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
history = model.fit(X_train, y_train,
                    validation_split=0.1,
                    epochs=5,
                    batch_size=16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# function to reshape features into (samples, time steps, features) 
def gen_sequence(id_df, seq_length, seq_cols):
    """ Only sequences that meet the window-length are considered, no padding is used. This means for testing
    we need to drop those which are below the window-length. An alternative would be to pad sequences so that
    we can use shorter ones """
    # for one id I put all the rows in a single matrix
    data_matrix = id_df[seq_cols].values
    num_elements = data_matrix.shape[0]
    # Iterate over two lists in parallel.
    # For example id1 have 192 rows and sequence_length is equal to 50
    # so zip iterate over two following list of numbers (0,112),(50,192)
    # 0 50 -> from row 0 to row 50
    # 1 51 -> from row 1 to row 51
    # 2 52 -> from row 2 to row 52
    # ...
    # 111 191 -> from row 111 to 191
    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        yield data_matrix[start:stop, :]



In [None]:
def gen_labels(df, sequence_length, label):
    data_matrix = df[label].values
    num_elements = data_matrix.shape[0]

    # -1 because I want to predict the rul of that last row in the sequence, not the next row
    return data_matrix[sequence_length-1:num_elements, :]  

def gen_label_wrapper(df, sequence_length, label, unit_nrs=np.array([]), idx_col="unit_number"):
    if unit_nrs.size <= 0:
        unit_nrs = df[idx_col].unique()
        
    label_gen = [gen_labels(df[df[idx_col]==unit_nr], sequence_length, label) 
                for unit_nr in unit_nrs]
    label_array = np.concatenate(label_gen).astype(np.float32)
    return label_array

In [None]:
def gen_test_data(df, sequence_length, columns, mask_value):
    if df.shape[0] < sequence_length:
        data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value) # pad
        idx = data_matrix.shape[0] - df.shape[0]
        data_matrix[idx:,:] = df[columns].values  # fill with available data
    else:
        data_matrix = df[columns].values
        
    # specifically yield the last possible sequence
    stop = num_elements = data_matrix.shape[0]
    start = stop - sequence_length
    for i in list(range(1)):
        yield data_matrix[start:stop, :]

- Criar um transformer customizado que recebe o standard ou o minmax para transformar os dados por experimento e também redimensioná-los
- Na hora de fazer o predict no teste, transforma ele separado e usa apenas o predictor do pipeline

In [None]:
index_cols, settings_cols, sensors_cols, cols
seq_cols = [index_cols[1]]+settings_cols+sensors_cols
seq_cols

['time',
 'op_1',
 'op_2',
 'op_3',
 's_0',
 's_1',
 's_2',
 's_3',
 's_4',
 's_5',
 's_6',
 's_7',
 's_8',
 's_9',
 's_10',
 's_11',
 's_12',
 's_13',
 's_14',
 's_15',
 's_16',
 's_17',
 's_18',
 's_19',
 's_20']

In [None]:
# Generate sequences
seq_gen = (list(gen_sequence(train[train['unit_number']==id], 50, seq_cols)) 
           for id in train['unit_number'].unique())
# Convert them to numpy array
seq_array = np.concatenate(list(seq_gen)).astype(np.float32)
print(seq_array.shape)

(15631, 50, 25)


In [None]:
# function to generate labels
def gen_labels(id_df, seq_length, label):
    """ Only sequences that meet the window-length are considered, no padding is used. This means for testing
    we need to drop those which are below the window-length. An alternative would be to pad sequences so that
    we can use shorter ones """
    # For one id I put all the labels in a single matrix.
    # For example:
    # [[1]
    # [4]
    # [1]
    # [5]
    # [9]
    # ...
    # [200]] 
    data_matrix = id_df[label].values
    num_elements = data_matrix.shape[0]
    # I have to remove the first seq_length labels
    # because for one id the first sequence of seq_length size have as target
    # the last label (the previus ones are discarded).
    # All the next id's sequences will have associated step by step one label as target.
    return data_matrix[seq_length:num_elements, :]

In [None]:
# Generate labels
label_gen = [gen_labels(train[train['unit_number']==id], 50, ['RUL']) 
             for id in train['unit_number'].unique()]
label_array = np.concatenate(label_gen).astype(np.float32)
label_array.shape

(15631, 1)

In [None]:
label_array

array([[141.],
       [140.],
       [139.],
       ...,
       [  2.],
       [  1.],
       [  0.]], dtype=float32)

In [None]:
nb_features = seq_array.shape[2]
nb_out = label_array.shape[1]
seq_length = 50
optim=Adam
loss='mean_squared_error'
metrics=[tf.keras.metrics.MeanSquaredError()]
learning_rate=1e-4

model = Sequential()
model.add(LSTM(
         input_shape=(seq_length, nb_features),
         units=100,
         return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
          units=50,
          return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=nb_out, activation="linear"))
model.compile(loss=loss, optimizer=optim(learning_rate=learning_rate), 
                  metrics=metrics)

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 50, 100)           50400     
                                                                 
 dropout_2 (Dropout)         (None, 50, 100)           0         
                                                                 
 lstm_3 (LSTM)               (None, 50)                30200     
                                                                 
 dropout_3 (Dropout)         (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 51        
                                                                 
Total params: 80,651
Trainable params: 80,651
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
model_path = 'regression_model.h5'

In [None]:
# fit the network
history = model.fit(seq_array, label_array, epochs=100, batch_size=200, validation_split=0.05, verbose=2,
          callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min'),
                       tf.keras.callbacks.ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min', verbose=0)]
          )

# list all data in history
print(history.history.keys())

Epoch 1/100
75/75 - 9s - loss: 10223.8975 - mean_squared_error: 10223.8975 - val_loss: 9789.6182 - val_mean_squared_error: 9789.6182 - 9s/epoch - 122ms/step
Epoch 2/100
75/75 - 1s - loss: 9681.2988 - mean_squared_error: 9681.2988 - val_loss: 9228.5029 - val_mean_squared_error: 9228.5029 - 740ms/epoch - 10ms/step
Epoch 3/100
75/75 - 1s - loss: 9246.7285 - mean_squared_error: 9246.7295 - val_loss: 8916.2256 - val_mean_squared_error: 8916.2256 - 744ms/epoch - 10ms/step
Epoch 4/100
75/75 - 1s - loss: 8980.3496 - mean_squared_error: 8980.3496 - val_loss: 8706.0137 - val_mean_squared_error: 8706.0137 - 739ms/epoch - 10ms/step
Epoch 5/100
75/75 - 1s - loss: 8809.0957 - mean_squared_error: 8809.0957 - val_loss: 8564.6621 - val_mean_squared_error: 8564.6621 - 748ms/epoch - 10ms/step
Epoch 6/100
75/75 - 1s - loss: 8684.6152 - mean_squared_error: 8684.6152 - val_loss: 8457.2520 - val_mean_squared_error: 8457.2520 - 730ms/epoch - 10ms/step
Epoch 7/100
75/75 - 1s - loss: 8585.2500 - mean_squared_er

## Model Instantiation

In [None]:
def create_model(optim=Adam, layer_nodes=[16,32,64], dropout=0.1, 
                 activation="relu", learning_rate=1e-4, degree=1,
                 print_summary=False, loss='mean_squared_error',
                 metrics=[tf.keras.metrics.MeanSquaredError()]):
    """
    Compile a Keras Regressor MLP with specified parameters
    """

    model = Sequential()

    # Input Layer
    input_dim = PolynomialFeatures(degree=degree,include_bias=False) \
                        .fit_transform(X_train).shape[1]
    model.add(Dense(layer_nodes[0], input_dim=input_dim, activation=activation))
    model.add(Dropout(dropout))

    # Hidden Layers
    for i in range(len(layer_nodes)-1):
        model.add(Dense(layer_nodes[i+1], activation=activation))
        model.add(Dropout(dropout))

    # Output Layer
    model.add(Dense(1))

    model.compile(loss=loss, optimizer=optim(learning_rate=learning_rate), 
                  metrics=metrics)
    if(print_summary): model.summary()
    return model

## Auxiliary HyperParameters

In [None]:
layer_sizes=[16,32,64,128,256,512]

In [None]:
# Exponential Decay Schedules
ED1 = ExponentialDecay(initial_learning_rate=1e-2, name="ED1",
                       decay_steps=100000, decay_rate=0.96)
ED2 = ExponentialDecay(initial_learning_rate=1e-2, name="ED2",
                       decay_steps=100000, decay_rate=0.8)
ED3 = ExponentialDecay(initial_learning_rate=1e-1, name="ED3",
                       decay_steps=100000, decay_rate=0.96)

In [None]:
# Generation of list combinations
from itertools import chain, permutations

def all_permutations(lst, size):
    result = list(chain.from_iterable([permutations(lst, x) for x in range(len(lst)+1)]))
    out = []
    for r in result:
        if (len(r) == size):
            out.append(list(r))
    return out