# Minio - Prepare Models for Deploy (NeuralLog and BERT Embedding Model)

In [32]:
!pip install -r requirements.txt > /dev/null 2>&1

In [48]:
import os

import fsspec
import s3fs
import pandas as pd
import tensorflow as tf

In [49]:
bucket = os.getenv("BUCKET_NAME","ml-data")
minio_endpoint = os.getenv("S3_ENDPOINT","https://YOUR_MINIO_SERVER")

In [54]:

# initialize minio config for fsspec to be used downstream
fsspec.config.conf = {
  "s3":
  {
    "key": os.getenv("AWS_ACCESS_KEY_ID", "console"),
    "secret": os.getenv("AWS_SECRET_ACCESS_KEY", "console123"),
    "client_kwargs": {
      "endpoint_url": os.getenv("S3_ENDPOINT", minio_endpoint)
    }
  }
}
s3 = s3fs.S3FileSystem()

## BERT Embedding model

In [55]:
models_list = s3.ls(f"{bucket}/logs/saved_models/tfs/")
bert_model_exists = False
for model_path in models_list:
    split_path = model_path.split("/")
    if split_path[-1] == 'bert_model':
        bert_model_exists = True
        break

if not bert_model_exists:
    from transformers import TFBertModel
    model_remote_path = f"s3://{bucket}/logs/saved_models/bert-base-uncased/"
    s3.download(rpath=model_remote_path, lpath="bert-base-uncased", recursive=True)
    bert_model = TFBertModel.from_pretrained("bert-base-uncased")
    tf.saved_model.save(bert_model, 'bert_model/1')
    
    s3.put('bert_model/', f"{bucket}/logs/saved_models/tfs", recursive=True)

## Anomaly Model

In [88]:
model_list = s3.ls(f"{bucket}/logs/saved_models/tfs/log_model")

In [80]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.utils import Sequence
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from official.nlp import optimization
from sklearn.utils import shuffle


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    
def transformer_classifer(embed_dim, ff_dim, max_len, num_heads, dropout=0.1):
    inputs = layers.Input(shape=(max_len, embed_dim), name='input_1')
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    embedding_layer = PositionEmbedding(100, 2000, embed_dim)
    x = embedding_layer(inputs)
    x = transformer_block(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(32, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

class PositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(PositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_encoding = positional_encoding(max_len,
                                                embed_dim)
 
    def call(self, x):
        seq_len = tf.shape(x)[1]
        x += self.pos_encoding[:, :seq_len, :]
        return x

In [81]:
def load_model(path):
    init_lr = 3e-4
    optimizer = tfa.optimizers.AdamW(init_lr)
    model = transformer_classifer(768, ff_dim=2048, max_len=75, num_heads=12, dropout=0.1)
    loss_object = SparseCategoricalCrossentropy()
    model.load_weights(path)
    model.compile(loss=loss_object, metrics=['accuracy'],
                  optimizer=optimizer)
    return model

In [82]:
model = load_model("bert_hdfs_weights.hdf5")

In [89]:
version = int(model_list[-1].split("/")[-1])
new_version = f"{version + 1}"

In [90]:
tf.saved_model.save(model, f'log_model/{new_version}')





INFO:tensorflow:Assets written to: log_model/5/assets


INFO:tensorflow:Assets written to: log_model/5/assets


In [92]:
s3.put('log_model/', f"{bucket}/logs/saved_models/tfs", recursive=True)

[None, None, None, None, None, None, None, None, None, None, None, None]