In [1]:
#imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import regularizers
import keras.backend.tensorflow_backend as tfb
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras import metrics 




Using TensorFlow backend.


In [2]:
#get normalized data from s3
import pandas as pd
import time
start = time.time()
print("downloading features...")
features = pd.read_csv('s3://tennessee-eastman-process-alarm-management-dataset/data/sensors_original.csv')
print(time.time()-start)
start = time.time()
print("downloading labels...")
labels = pd.read_csv('s3://tennessee-eastman-process-alarm-management-dataset/data/alarms_filtered.csv')
print(time.time()-start)

downloading features...
257.52050280570984
downloading labels...
32.83607339859009


In [3]:
import keras.backend as K
def weighted_binary_crossentropy(target, output):
    """
    Weighted binary crossentropy between an output tensor 
    and a target tensor. POS_WEIGHT is used as a multiplier 
    for the positive targets.

    Combination of the following functions:
    * keras.losses.binary_crossentropy
    * keras.backend.tensorflow_backend.binary_crossentropy
    * tf.nn.weighted_cross_entropy_with_logits
    """
    # transform back to logits
    _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype)
    output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
    output = tf.log(output / (1 - output))
    # compute weighted loss
    loss = tf.nn.weighted_cross_entropy_with_logits(targets=target,
                                                    logits=output,
                                                    pos_weight=POS_WEIGHT)
    return tf.reduce_mean(loss, axis=-1)

In [4]:
model = Sequential()
lookback = 90
batch_size = 128
testcases = 250000
POS_WEIGHT = 10
#LSTM 1 returns full sequence
model.add(
    LSTM(
        256, 
        input_shape=(lookback, 81),
        activation='tanh',
        recurrent_activation='sigmoid',
        stateful=False, 
        recurrent_dropout=0.2,
        return_sequences=True,
        recurrent_regularizer=regularizers.l2(l=0.001),
        kernel_regularizer=regularizers.l2(l=0.001),
        bias_regularizer=regularizers.l2(l=0.001)
    )
)

#Batch norm layer
model.add(
    BatchNormalization
    (
        axis=-1,
        momentum=0.99,
        epsilon=0.001,
        center=True,
        scale=True,
        beta_initializer="zeros",
        gamma_initializer="ones",
        moving_mean_initializer="zeros",
        moving_variance_initializer="ones",
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None
    )
)

#LSTM 2 return last output only
model.add(
    LSTM(
        256, 
        input_shape=(81, lookback),
        activation='tanh',
        recurrent_activation='sigmoid',
        stateful=False, 
        recurrent_dropout=0.2,
        return_sequences=False,
        recurrent_regularizer=regularizers.l2(l=0.001),
        kernel_regularizer=regularizers.l2(l=0.001),
        bias_regularizer=regularizers.l2(l=0.001)
    )
)

#Batch norm layer
model.add(
    BatchNormalization
    (
        axis=-1,
        momentum=0.99,
        epsilon=0.001,
        center=True,
        scale=True,
        beta_initializer="zeros",
        gamma_initializer="ones",
        moving_mean_initializer="zeros",
        moving_variance_initializer="ones",
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None
    )
)

#Hidden layers
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))

#Prediction layer
model.add(Dense(81, activation='sigmoid'))

#Compile model
model.compile(
    loss=weighted_binary_crossentropy, 
    optimizer='adam', 
    metrics=[
        metrics.Accuracy(),
        metrics.AUC(), 
        metrics.CategoricalAccuracy(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.FalseNegatives(),
        metrics.FalsePositives(),
        metrics.TrueNegatives(),
        metrics.TruePositives()
    ]
)

model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
targets is deprecated, use labels instead
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 90, 256)           346112    
_________________________________________________________________
batch_normalization (BatchNo (None, 90, 256)           1024      
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               525312    
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
dense (Dense)                (None, 256)               65792     
_________________________________________________________________
dense_1 (Dense)              (None, 12

In [5]:
#convert normalized data to sequences, shuffle, and reduce to desired testcase count
timeseries_data = []
for i in range(1,51):
    x = features[features['TEST_NO']==i].drop(columns=['Unnamed: 0', 'TEST_NO']).to_numpy()
    y = labels[labels['TEST_NO']==i].drop(columns=['Unnamed: 0', 'TEST_NO']).to_numpy()
    for state in range(lookback, len(x)):
        timeseries_data.append((x[state-lookback:state],y[state]))
random.shuffle(timeseries_data)
timeseries_data = random.sample(timeseries_data, testcases)

In [6]:
#split train and test
x_train, x_test, y_train, y_test = train_test_split(
    list(example[0] for example in timeseries_data),
    list(example[1] for example in timeseries_data),
    test_size=0.20, 
    random_state=777, 
    shuffle=True
)

In [7]:
#free up memory
del timeseries_data
del features
del labels

In [8]:
#convert train and test splits to numpy arrays
x_train = np.array(x_train, dtype=np.float16)
y_train = np.array(y_train, dtype=np.int8)
x_test = np.array(x_test, dtype=np.float16)
y_test = np.array(y_test, dtype=np.int8)

In [9]:
tf.test.is_gpu_available()

False

In [10]:
#configure hardware for training
config = tf.ConfigProto(device_count = {'GPU': 0 , 'CPU': 16} ) 
sess = tf.Session(config=config) 
keras.backend.set_session(sess)

In [11]:
#train and save model
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=20, batch_size=batch_size, verbose=1, callbacks=[es])



[2021-08-20 04:22:09.746 ip-172-16-142-200:32222 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-08-20 04:22:09.774 ip-172-16-142-200:32222 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 200000 samples, validate on 50000 samples
Epoch 1/20
Epoch 2/20
Epoch 00002: early stopping


In [12]:
model.save('500000t-128b-180l-20e-adam-weightedbinarycrossentropy')

In [13]:
print(history.history.keys())

dict_keys(['loss', 'accuracy', 'auc', 'categorical_accuracy', 'precision', 'recall', 'false_negatives', 'false_positives', 'true_negatives', 'true_positives', 'val_loss', 'val_accuracy', 'val_auc', 'val_categorical_accuracy', 'val_precision', 'val_recall', 'val_false_negatives', 'val_false_positives', 'val_true_negatives', 'val_true_positives'])
