# Train Anamoly Model

## Initialize Parameters from Data-processing notebook

In [None]:
!pip install -r requirements.txt

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.utils import Sequence
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from official.nlp import optimization
from sklearn.utils import shuffle

In [None]:
np_data = np.load('../../NeuralLog/demo/processed_data.npz', allow_pickle=True)
x_te = np_data['x_test']
y_te = np_data['y_test']
embed_dim = 768
max_len = 75
fine_tune_model_remote_path = None
batch_size = 64
epochs = 1

## Helper Functions for Model Training

### Create positional embedding layer (Custom Layer)

In [None]:
def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

class PositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(PositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_encoding = positional_encoding(max_len,
                                                embed_dim)
 
    def call(self, x):
        seq_len = tf.shape(x)[1]
        x += self.pos_encoding[:, :seq_len, :]
        return x

### Batch generator class 

In [None]:
class BatchGenerator(Sequence):

    def __init__(self, X, Y, batch_size):
        self.X, self.Y = X, Y
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.X) / float(self.batch_size)))

    def __getitem__(self, idx):
        # print(self.batch_size)
        dummy = np.zeros(shape=(embed_dim,))
        x = self.X[idx * self.batch_size:min((idx + 1) * self.batch_size, len(self.X))]
        X = np.zeros((len(x), max_len, embed_dim))
        Y = np.zeros((len(x), 2))
        item_count = 0
        for i in range(idx * self.batch_size, min((idx + 1) * self.batch_size, len(self.X))):
            x = self.X[i]
            if len(x) > max_len:
                x = x[-max_len:]
            x = np.pad(np.array(x), pad_width=((max_len - len(x), 0), (0, 0)), mode='constant',
                       constant_values=0)
            X[item_count] = np.reshape(x, [max_len, embed_dim])
            Y[item_count] = self.Y[i]
            item_count += 1
        return X[:], Y[:, 0]

### Anomaly Model

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    
def transformer_classifer(embed_dim, ff_dim, max_len, num_heads, dropout=0.1):
    inputs = layers.Input(shape=(max_len, embed_dim))
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    embedding_layer = PositionEmbedding(100, 2000, embed_dim)
    x = embedding_layer(inputs)
    x = transformer_block(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(32, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

## Helper functions for evaluating the model

In [None]:
def load_model(path):
    init_lr = 3e-4
    optimizer = tfa.optimizers.AdamW(init_lr)
    model = transformer_classifer(768, ff_dim=2048, max_len=75, num_heads=12, dropout=0.1)
    loss_object = SparseCategoricalCrossentropy()
    model.load_weights(path)
    model.compile(loss=loss_object, metrics=['accuracy'],
                  optimizer=optimizer)
    print(model.summary())
    return model

## Evaluate the Model

In [None]:
model = load_model("hdfs_transformer.hdf5")

In [None]:
score = model.evaluate(BatchGenerator(x_te, y_te, batch_size=64))

In [None]:
# model.save("saved_model")
# new_model = keras.models.load_model('save_models/', custom_objects={'TransformerBlock': TransformerBlock, "PositionEmbedding": PositionEmbedding})

In [None]:
metrics = {
    'metrics': [
        {"name": "evaluation_loss",
         "numberValue": score[0]},
        {"name": "evaluation_accuracy",
         "numberValue": score[1]},
    ]}

In [None]:
import json
with open('mlpipeline-metrics.json', 'w') as f:
    json.dump(metrics, f)