In [10]:
import tensorflow as tf
from tensorflow.keras import Model

class BaseModel(Model):
    '''This is the base model class that inherits some sharable functions to the the models for the training and inference.
    This class also inherits from the tensorflow.keras.Model.
    
    '''
    def __init__(self):
        
        super(BaseModel, self).__init__()
        
        self.weight_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.25)
        self.bias_initializer=tf.zeros_initializer()
    
    def init_variables(self):
        '''Initialize the parameters of the neural network. '''
        
        self.W1=tf.compat.v1.get_variable('W1',shape=[29,20], initializer=self.weight_initializer, dtype=tf.float32)
        self.W2=tf.compat.v1.get_variable('W2',shape=[20,8], initializer=self.weight_initializer, dtype=tf.float32)
        self.W3=tf.compat.v1.get_variable('W3',shape=[8,20], initializer=self.weight_initializer, dtype=tf.float32)
        self.W4=tf.compat.v1.get_variable('W3',shape=[20,29], initializer=self.weight_initializer, dtype=tf.float32)
        
        self.b1=tf.compat.v1.get_variable('b1',shape=[20], initializer=self.bias_initializer, dtype=tf.float32)
        self.b2=tf.compat.v1.get_variable('b2',shape=[8], initializer=self.bias_initializer, dtype=tf.float32)
        self.b3=tf.compat.v1.get_variable('b3',shape=[20], initializer=self.bias_initializer, dtype=tf.float32)
    
    
    def forward_propagation(self, x):
        '''Compute the forward pass given the input features x.
        
        @param x: input features x
        
        @return prediction: the reconstructed input features x
        '''
           
        with tf.name_scope('feed_forward'):
            
            # First hidden layer
            z1=tf.linalg.matmul(x, self.W1)+self.b1
            a1=tf.nn.relu(z1)
            
            # Second hidden layer
            z2=tf.linalg.matmul(a1,self.W2)+self.b2
            a2=tf.nn.relu(z2)
            
            # Third hidden layer
            z3=tf.linalg.matmul(a2,self.W3)+self.b3
            a3=tf.nn.relu(z3)
            
            prediction=tf.linalg.matmul(a3,self.W4)
            
        return prediction

In [None]:
class AnomalyDetector(BaseModel):
    '''This class represents the class for training of the neural network for anomaly detection. 
    In particular this class is used for training only. The learned weights and biases will be used later
    by the inference model to make the actual anomaly detection in production environment.
    '''
    
    def __init__(self):
        
        super(AnomalyDetector, self).__init__()
        self.init_variables()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        
    
    def compute_loss(self, x_train):  
        '''Compute MSE loss function.
        
        @param x_train: input features
        '''
      
        mse = tf.keras.losses.MeanSquaredError()
        loss = mse(x_train, self.forward_propagation(x_train))
        
        return loss
    

    def train(self, x_train): 
         '''Train the autoencoder.
      
         @parameter x_train: training input features
         '''
         
         # Compute the gradients and apply the gradient descent step
         with tf.GradientTape() as tape:
             gradients = tape.gradient(self.compute_loss(x_train), self.trainable_variables)
             gradient_variables = zip(gradients, self.trainable_variables)
             self.optimizer.apply_gradients(gradient_variables)

In [None]:
### Define some hyperparameters ###
      
# Number of training samples
n_training_samples=100000
# Batch size
batch_size=32
# Learning rate 
learning_rate=0.001
# Number of test data samples
n_test_samples=34806
# Number of epochs
num_epoch=50
# number of batches
n_batches=int(n_training_samples/batch_size)
# Evaluate model after number of steps
eval_after=1000
# Path of the TF Records datasets for the training
train_path=os.path.abspath(os.path.join(os.path.dirname("__file__"), '..', 'data/tf_records/train/'))
# Path of the TF Records datasets for the testing
test_path=os.path.abspath(os.path.join(os.path.dirname("__file__"), '..', 'data/tf_records/test/'))

# Loss value threshold
THRESHOLD=10

# Initialize the instance of the class for anomaly detection
model=AnomalyDetector()

# Initialihe the instance of the class for the perfromance measurements
performance=Performance(THRESHOLD)

# Get the training and test datasets
training_dataset=get_training_data(train_path)
test_dataset=get_test_data(test_path)

#Iterate over the epochs
for epoch in range(num_epoch):
    
    temp_loss=0
    
    #Iterate over the batches
    for step, x_train in enumerate(training_dataset):
        
        # Extract features and labels
        features, labels=x_train

        # Train the model
        model.train(features)
        
        # Compute the loss
        loss_values=model.compute_loss(features)
        temp_loss+=loss_values
        
        # Evalute the model on the test set
        if step>0 and step%eval_after==0:
            
            # Iteratre over the test dataset
            for step_test, x_test in enumerate(test_dataset):
                
                # Extract the features and labels
                features, label=x_test
                
                label=label.numpy()
                
                # Compute the loss
                test_loss=model.compute_loss(features)
                
                # Evaluate the test data sample
                performance.eval_prediciton(test_loss, label)
                

            print('epoch_nr: %d, batch: %d/%d, mse_loss: %.3f'%(epoch, step, n_batches, (temp_loss/step)))
            
            # Plot ROC curce and show other evaluation metrics
            performance.evaluate_model()
            performance.reset()

In [None]:
df_raw = pd.read_csv('C:\\Users\\en89912\\Downloads\\machine_measurements.csv')

In [6]:
display(df_raw)

Unnamed: 0,OperationType,TimeKey,QuadKey,OrgId,SessionGuid,EpochSequenceNumber,Timestamp,Timestamp_ms,Latitude_dd,Longitude_dd,...,ImageControllerEthernetLinkStatus,ImageControllerPreviousShutdownReason,HarvestMoisture_prcnt,HarvestWetMassFlow_kg_per_sec,ArticulationAngle_arcdeg,BrakePedalPosition_prcnt,TransmissionGearTarget_gear,IVTAutoModeState,IdleTime_hr,AutomaticTemperatureControlAutomaticModeRequest
0,Unknown,20221004,3000,273019,db629ac2-66a0-f440-aadc-365f61b3c80a,1,2022-10-04T16:02:49.671Z,1664899369671,,,...,,,,,,,,,,
1,Unknown,20221004,3000,273019,db629ac2-66a0-f440-aadc-365f61b3c80a,2,2022-10-04T16:03:19.672Z,1664899399672,0.000000,0.000000,...,,,,,,,,,,
2,Unknown,20221004,3000,273019,db629ac2-66a0-f440-aadc-365f61b3c80a,2,2022-10-04T16:03:20.672Z,1664899400672,0.000000,0.000000,...,,,,,,,,,,
3,Unknown,20221004,3000,273019,db629ac2-66a0-f440-aadc-365f61b3c80a,2,2022-10-04T16:03:21.672Z,1664899401672,0.000000,0.000000,...,,,,,,,,,,
4,Unknown,20221004,3000,273019,db629ac2-66a0-f440-aadc-365f61b3c80a,2,2022-10-04T16:03:22.672Z,1664899402672,0.000000,0.000000,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,Unknown,20221006,213332301,275130,9ac17135-41ac-fa42-9edf-b3735f57ec14,359,2022-10-06T22:09:29.720Z,1665094169720,41.775609,-93.750092,...,,,,,,,,,,
99996,Unknown,20221006,213332301,275130,9ac17135-41ac-fa42-9edf-b3735f57ec14,359,2022-10-06T22:09:30.720Z,1665094170720,41.775621,-93.750092,...,,,,,,,,,,
99997,Unknown,20221006,213332301,275130,9ac17135-41ac-fa42-9edf-b3735f57ec14,359,2022-10-06T22:09:31.720Z,1665094171720,41.775634,-93.750092,...,,,,,,,,,,
99998,Unknown,20221006,213332301,275130,9ac17135-41ac-fa42-9edf-b3735f57ec14,359,2022-10-06T22:09:32.720Z,1665094172720,41.775646,-93.750092,...,,,,,,,,,,


In [8]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

df = df_raw.select_dtypes(include=numerics)

In [9]:
print(df.columns)

Index(['TimeKey', 'QuadKey', 'OrgId', 'EpochSequenceNumber', 'Timestamp_ms',
       'Latitude_dd', 'Longitude_dd', 'Heading_ad', 'LoadedTimestamp_ms',
       'VersionTimestamp_ms',
       ...
       'ImageControllerEthernetLinkStatus',
       'ImageControllerPreviousShutdownReason', 'HarvestMoisture_prcnt',
       'HarvestWetMassFlow_kg_per_sec', 'ArticulationAngle_arcdeg',
       'BrakePedalPosition_prcnt', 'TransmissionGearTarget_gear',
       'IVTAutoModeState', 'IdleTime_hr',
       'AutomaticTemperatureControlAutomaticModeRequest'],
      dtype='object', length=556)


In [12]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
autoencoder.fit(df, df,
                epochs=10,
                shuffle=True,
                validation_data=(x_test, x_test))

In [None]:
encoded_imgs = autoencoder.encoder(x_test).numpy()
decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()