# Minio - Deploy Models (BERT and NeuralLog) to Kserve

In [None]:
!pip install pydantic
!pip3 install "ray[serve]==1.10.0"
!pip install kserve

In [42]:
import os
from kubernetes import client
from kserve import KServeClient
from kserve import constants
from kserve import V1beta1PredictorSpec
from kserve import V1beta1TFServingSpec
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1InferenceService

In [43]:
kserve = KServeClient()

In [44]:
bucket = os.getenv("BUCKET_NAME","ml-data")
S3_SERVICE_ACCOUNT = os.getenv("S3_SERVICE_ACCOUNT", "minio-sa")
NAMESPACE = os.getenv("NAMESPACE", 'kubeflow-user-example-com')
BERT_INFERENCE_NAME = os.getenv("BERT_INFERENCE_NAME", 'bert-minio')
LOG_ANOMOLY_INFERENCE_NAME = os.getenv("LOG_ANOMOLY_INFERENCE_NAME", 'log-bert-minio')
BERT_MODEL_URL = os.getenv("S3_BERT_MODEL_URL", f"s3://{bucket}logs/saved_models/tfs/bert_model/")
LOG_ANOMOLY_MODEL_URL = os.getenv("LOG_ANOMOLY_MODEL_URL", f"s3://{bucket}/logs/saved_models/tfs/log_model")
TF_RUNTIME_VERSION = os.getenv("TF_RUNTIME_VERSION", "2.8.0")

## Helper Functions

In [1]:
# CHECK IF SERVICE EXISTS
def service_exists(namespace, inference_name):
    inference_list = kserve.get(namespace=namespace)
    for item in inference_list.items():
        if item[0] == 'items':
            for item_inference in item[1]:
                if item_inference['metadata']['name'] == inference_name:
                    return True
    return False


# CREATE INFERENCE SERVICE
def create_inference_service(service_account, storage_url, runtime_version, namespace, name):
    # MODEL SPECIFICATION
    default_model_spec = V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            service_account_name=service_account,
            tensorflow=V1beta1TFServingSpec(
                storage_uri=storage_url,
                runtime_version=runtime_version
            )
        )
    )

    # INFERENCE SERVICE SPECIFICATION
    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                              kind=constants.KSERVE_KIND,
                              metadata=client.V1ObjectMeta(name=name, 
                                                           namespace=namespace,
                                                           labels={name: "yes"}),
                              spec=default_model_spec)
    kserve.create(isvc)
    
    
# RESTART INFERENCE SERVICE PODS
def restart_service(namespace, service_name):
    v1 = client.CoreV1Api()
    result = v1.list_namespaced_pod( namespace, label_selector=f"{service_name}=yes", watch=False)
    for pod in result.items:
        v1.delete_namespaced_pod(pod.metadata.name, pod.metadata.namespace)

## Deploy BERT Embeddings model

In [59]:
# CHECK IF THE SERVICE EXISTS IF NOT CREATE IT
bert_inference_exists = service_exists(NAMESPACE, BERT_INFERENCE_NAME)
if bert_inference_exists:
    restart_service(NAMESPACE, BERT_INFERENCE_NAME)
else:
    create_inference_service(S3_SERVICE_ACCOUNT, BERT_MODEL_URL, TF_RUNTIME_VERSION, NAMESPACE, BERT_INFERENCE_NAME)

## Deploy Log Anomoly model that was trained

In [65]:
# CHECK IF THE SERVICE EXISTS IF NOT CREATE IT
log_inference_exists = service_exists(NAMESPACE, LOG_ANOMOLY_INFERENCE_NAME)
if log_inference_exists:
    restart_service(NAMESPACE, LOG_ANOMOLY_INFERENCE_NAME)
else:
    create_inference_service(S3_SERVICE_ACCOUNT, LOG_ANOMOLY_MODEL_URL, TF_RUNTIME_VERSION, NAMESPACE, LOG_ANOMOLY_INFERENCE_NAME)

## Wait till service is ready

In [66]:
kserve.wait_isvc_ready(BERT_INFERENCE_NAME, namespace=NAMESPACE)
kserve.wait_isvc_ready(LOG_ANOMOLY_INFERENCE_NAME, namespace=NAMESPACE)