In [None]:
# Project Model-Mesh Serving Sprint 3 Demo Script

### Local Setup (Run this before the demo)

In [None]:
%cd ../../config/install

In [None]:
# install python dependencies
!pip install grpcio grpcio-tools numpy matplotlib

In [None]:
# Download data for MNIST
!wget -nv https://s3.amazonaws.com/img-datasets/mnist.npz

# Download triton grpc protos
!wget -nv -O model_config.proto "https://github.com/triton-inference-server/server/blob/master/src/core/model_config.proto?raw=true"
!wget -nv -O grpc_service.proto "https://github.com/triton-inference-server/server/blob/master/src/core/grpc_service.proto?raw=true"

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Run the following command in a separate terminal to forward grpc calls to the cluster. This is needed later during inferencing.

`kubectl port-forward --address 0.0.0.0 service/model-mesh 8033 -n model-serving`

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [None]:
!python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. grpc_service.proto model_config.proto

In [None]:
import grpc
import grpc_service_pb2
import grpc_service_pb2_grpc

### Cluster Setup (Run this before the demo)

#### Pre-requisites 
- ETCD for meta store
- COS/S3 for model storage

#### Ensure you are logged into the cluster with kubectl or oc

In [None]:
#import os
#os.environ['KUBECONFIG']='/Users/tnarayan/AI/KUBE/stg-watson/kube-config-aaa00-stgwat-us-south-mzr-cruiser6.yml'
# Tested on ocp cluster using oc login

#### Created namespace by name 'model-serving' set as default

#### Create service account,roles with required authorization

In [None]:
!kustomize build rbac | kubectl apply -f -

#### Create a Pull secret for public artifactory ( update docker user name and API key)

In [None]:
!kubectl create secret docker-registry swg-devops-registry --docker-server=wcp-ai-foundation-team-docker-virtual.artifactory.swg-devops.com --docker-username=<artifactory user> --docker-password=<artifactory api key>

# Add to both the wmlserving and wmlserving-controller service accounts
!kubectl patch serviceaccount wmlserving -p '{"imagePullSecrets": [{"name": "swg-devops-registry"}]}'
!kubectl patch serviceaccount wmlserving-controller -p '{"imagePullSecrets": [{"name": "swg-devops-registry"}]}'

#### ETCD Secret required prior to Controller installation, verify secret existence

In [None]:
!kubectl describe secret model-serving-etcd

#### CRD Installation

In [None]:
! kustomize build crd | kubectl apply -f -

#### Model Serve Controller Installation

In [None]:
! kustomize build controller | kubectl apply -f -

In [None]:
!kubectl get deployment -l control-plane=wmlserving-controller

#### Pre-req for Triton serving for OCP

In [None]:
!oc adm policy add-scc-to-user anyuid system:serviceaccount:model-serving:wmlserving

# Sprint 3 Demo Summary

- Model serve controller deployment
- CRDs deployment
- Runtime CR deployment ( can be TF and/or Triton )
- Model deployment and Serving using KFServing V2 Protocol 

## ServingRuntime(s) CR installation

In [None]:
!kubectl get servingruntime

In [None]:
!kubectl get deployment -l wmlserving-service

In [None]:
! cat runtimes/triton-2.30.yaml

In [None]:
!kustomize build runtimes | kubectl apply -f -

In [None]:
!kubectl get deployment -l wmlserving-service

## Model management using Predictor CR

#### Secret Key for storage should be added to secret "storage-config"

```yaml
apiVersion: v1
kind: Secret
metadata:
    name: storage-config
stringData:
    myStorage: |
        {
            "type": "s3",
            "access_key_id": "xxx",
            "secret_access_key": "xxx",
            "endpoint_url": "https://s3.us-south.cloud-object-storage.appdomain.cloud",
            "region": "us-south",
            "default_bucket": ""
        }

```

In [None]:
!kubectl patch secret storage-config -p '{"data":{"myStorage":"<base64 encoded json>"}}'

### Submit Predictor CR

In [None]:
!kubectl get predictors

In [None]:
%%bash
# SecretKey "myStorage" added above is used
cat <<EOF | kubectl apply -f -
apiVersion: ai.ibm.com/v1
kind: Predictor
metadata:
  name: minimal-tf-predictor
spec:
  modelType:
    name: tensorflow
  path: tfmnist
  storage:
    s3:
      secretKey: myStorage
      bucket: triton-models
EOF

In [None]:
%%bash
# SecretKey "myStorage" added above is used
cat <<EOF | kubectl apply -f -
apiVersion: ai.ibm.com/v1
kind: Predictor
metadata:
  name: minimal-mleap-predictor
spec:
  modelType:
    name: mleap
  path: example-model/airbnb.model.lr.zip
  storage:
    s3:
      secretKey: myStorage
      bucket: mleap-models
EOF

###  Predictors Status

In [None]:
!kubectl get predictors

## Inference using KFServing V2 dataplane API

### Plotting test data

In [None]:
import numpy as np

dataset = np.load("mnist.npz")
x_test = dataset['x_test']

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
for i, image in enumerate([x_test[0]]):
    plt.subplot(2, 2, i + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')

In [None]:
score_0 = x_test[0].flatten().tolist()

### Inference

In [None]:
channel = grpc.insecure_channel('localhost:8033')

In [None]:
infer_client = grpc_service_pb2_grpc.GRPCInferenceServiceStub(channel)

In [None]:
tensor_contents = grpc_service_pb2.InferTensorContents(fp32_contents=score_0)
infer_input=grpc_service_pb2.ModelInferRequest().InferInputTensor(name="inputs",shape=[1,784],datatype="FP32",contents=tensor_contents)

In [None]:
metadata=(('mm-vmodel-id','minimal-tf-predictor'),)
inputs=[]
inputs.append(infer_input)
request=grpc_service_pb2.ModelInferRequest(model_name="minimal-tf-predictor",inputs=inputs)

results,call=infer_client.ModelInfer.with_call(request=request,metadata=metadata)
print("model_name : "+results.model_name+"\n"+"model_version : "+results.model_version + "\n"
     "prediction : "+str(list(results.raw_output_contents[0])[0]))

### Patch Predictor CR

#### patch an existing Predictor with new version of same model from another path

In [None]:
%%bash
cat <<EOF | kubectl apply -f -
apiVersion: ai.ibm.com/v1
kind: Predictor
metadata:
  name: minimal-tf-predictor
spec:
  modelType:
    name: tensorflow
  path: tfmnistnew
  storage:
    s3:
      secretKey: myStorage
      bucket: triton-models
EOF

In [None]:
!kubectl get predictors

### Inference on patched CR

In [None]:
metadata=(('mm-vmodel-id','minimal-tf-predictor'),)
inputs=[]
inputs.append(infer_input)
results,call=infer_client.ModelInfer.with_call(request=request,metadata=metadata)
print("model_name : "+results.model_name+"\n"+"model_version : "+results.model_version + "\n"
      "prediction : "+str(list(results.raw_output_contents[0])[0]))