In [14]:
from opsml.registry import ModelCard, DataCard, CardRegistry
from opsml.connector import SnowflakeQueryRunner
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

### Get Sample Data

In [3]:
vocab_size = 500  # Only consider the top 20k words
maxlen = 100  # Only consider the first 200 words of each movie review
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

25000 Training sequences
25000 Validation sequences


### Create Model

In [4]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

### Combine model  layers
- making model smaller for example

In [5]:
embed_dim = 4  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 4  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(4, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

2023-02-01 21:11:43.437330: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-02-01 21:11:43.438328: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-02-01 21:11:43.438379: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (steven-dev): /proc/driver/nvidia/version does not exist
2023-02-01 21:11:43.439807: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Fit Model

In [6]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
    x_train.astype(np.float32), y_train, batch_size=64, epochs=1, steps_per_epoch=20,validation_data=(x_val.astype(np.float32), y_val)
)



### Test ModelCard

In [16]:
MODEL_NAME = "transformer"
TEAM = "SPMS"
USER_EMAIL = "steven.forrester@shipt.com"

data_card = DataCard(
         name="keras_test_data", 
         team=TEAM, 
         user_email=USER_EMAIL, 
         data=x_train,
        )


model_card = ModelCard(
    name=MODEL_NAME,
    team=TEAM,
    user_email=USER_EMAIL,
    trained_model=model,
    sample_input_data=x_train[0:1],
)

2023-02-01 21:14:33.618105: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-02-01 21:14:33.618338: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-02-01 21:14:33.806015: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-02-01 21:14:33.806210: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session


{"level": "INFO", "message": "Validating converted onnx model", "timestamp": "2023-02-01T21:14:34.782461Z", "app_env": "development", "host": null, "pid": 339443}
{"level": "INFO", "message": "Onnx model validated", "timestamp": "2023-02-01T21:14:34.808398Z", "app_env": "development", "host": null, "pid": 339443}


In [8]:
onnx_model = model_card.model()



In [9]:
onnx_model.input_sig.schema()

{'title': 'Features',
 'type': 'object',
 'properties': {'input_1': {'title': 'Input 1',
   'minItems': 100,
   'maxItems': 100,
   'type': 'array',
   'items': {'type': 'number'}}},
 'required': ['input_1']}

In [10]:
onnx_model.output_sig.schema()

{'title': 'Features',
 'type': 'object',
 'properties': {'dense_3': {'title': 'Dense 3',
   'minItems': 2,
   'maxItems': 2,
   'type': 'array',
   'items': {'type': 'number'}}},
 'required': ['dense_3']}

In [17]:
data_registry = CardRegistry(registry_name="data")
model_registry = CardRegistry(registry_name="model")

In [18]:
data_registry.register_card(data_card)

{"level": "INFO", "message": "DATA_REGISTRTY: keras_test_data registered as version 1", "timestamp": "2023-02-01T21:15:05.877276Z", "app_env": "development", "host": null, "pid": 339443}


In [19]:
model_card.data_card_uid=data_card.uid

In [20]:
model_registry.register_card(model_card)



INFO:tensorflow:Assets written to: /tmp/tmpf4egvg_4/e57d21d188da4d73bac8e7853ea1cdb7/assets


INFO:tensorflow:Assets written to: /tmp/tmpf4egvg_4/e57d21d188da4d73bac8e7853ea1cdb7/assets


{"level": "INFO", "message": "MODEL_REGISTRY: transformer registered as version 1", "timestamp": "2023-02-01T21:15:37.838463Z", "app_env": "development", "host": null, "pid": 339443}
