# Load packages

In [1]:
# !pip uninstall -y kfp
# !pip install kfp

! pip install git+https://github.com/kubeflow/pipelines.git#subdirectory=sdk/python

Collecting git+https://github.com/kubeflow/pipelines.git#subdirectory=sdk/python
  Cloning https://github.com/kubeflow/pipelines.git to /tmp/pip-req-build-c1a_dm08
  Running command git clone -q https://github.com/kubeflow/pipelines.git /tmp/pip-req-build-c1a_dm08


In [2]:
import kfp
import json
import os
from kfp.onprem import use_k8s_secret
from kfp import components
from kfp.components import load_component_from_file, load_component_from_url
from kfp import dsl
from kfp import compiler
from pathlib import Path

kfp.__version__

'1.6.0-rc.0'

# Enter your gateway loadbalancer and the token from the cookie
[Use this extension on chrome to get token]( https://chrome.google.com/webstore/detail/editthiscookie/fngmhnnpilhplaeedifhccceomclgfbg?hl=en)

![image.png](./image.png)

In [23]:
#Update values for the load balancer and auth session
HOST='http://istio-ingressgateway.istio-system.svc.cluster.local'
AUTH="authservice_session=MTYyMTAxNTA4NXxOd3dBTkVRelExcENRVVZhUVZaWVUwUktVMDVLVVZjMFJWVk5VVVUxV0VWUlVrNUZXRVpRTlRWTlFrVlVTbFpCUnpkTFRFbFdSVkU9fL25mh6uPkLhC0wgFU9dy_0xCkxK72iW8xtXgK5lOmHK"
namespace="kubeflow-user-example-com"

In [11]:
client = kfp.Client(host=HOST+"/pipeline", cookies=AUTH)
experiments = client.list_experiments(namespace=namespace)
my_experiment = experiments.experiments[0]
my_experiment

{'created_at': datetime.datetime(2021, 4, 22, 8, 44, 39, tzinfo=tzlocal()),
 'description': None,
 'id': 'aac96a63-616e-4d88-9334-6ca8df2bb956',
 'name': 'Default',
 'resource_references': [{'key': {'id': 'kubeflow-user-example-com',
                                  'type': 'NAMESPACE'},
                          'name': None,
                          'relationship': 'OWNER'}],
 'storage_state': 'STORAGESTATE_AVAILABLE'}

### Using dsl for model archiver and torchserve integration
#### Init container is used for mar file generation

In [12]:
DEPLOY="bertserve"
MODEL="bert"

In [13]:
prepare_tensorboard_op = load_component_from_file("./examples/bert/yaml/tensorboard/component.yaml")
prep_op = components.load_component_from_file(
    "./examples/bert/yaml/pre_process/component.yaml"
)
train_op = components.load_component_from_file(
    "./examples/bert/yaml/train/component.yaml"
)
deploy_op = load_component_from_file("./examples/bert/yaml/deploy/component.yaml")

In [14]:
minio_op = components.load_component_from_file(
    "./examples/bert/yaml/minio/component.yaml"
)

In [15]:
@dsl.pipeline(name="Training pipeline", description="Sample training job test")
def pytorch_bert(minio_endpoint='http://minio-service.kubeflow:9000',
    log_bucket='mlpipeline',
    log_dir=f'tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}',
    mar_path=f'mar/{dsl.RUN_ID_PLACEHOLDER}/model-store',
    config_prop_path=f'mar/{dsl.RUN_ID_PLACEHOLDER}/config',
    model_uri = f's3://mlpipeline/mar/{dsl.RUN_ID_PLACEHOLDER}',
    tf_image='jagadeeshj/tb_plugin:v1.8'):
   
    prepare_tb_task = prepare_tensorboard_op(
        log_dir_uri=f's3://{log_bucket}/{log_dir}',
        image=tf_image,
        pod_template_spec=json.dumps({
            'spec': {
                'containers': [{
                    'env': [{
                        'name': 'AWS_ACCESS_KEY_ID',
                        'valueFrom': {
                            'secretKeyRef': {
                                'name': 'mlpipeline-minio-artifact',
                                'key': 'accesskey'
                            }
                        }
                    }, {
                        'name': 'AWS_SECRET_ACCESS_KEY',
                        'valueFrom': {
                            'secretKeyRef': {
                                'name': 'mlpipeline-minio-artifact',
                                'key': 'secretkey'
                            }
                        }
                    }, {
                        'name': 'AWS_REGION',
                        'value': 'minio'
                    }, {
                        'name': 'S3_ENDPOINT',
                        'value': f'{minio_endpoint}',
                    }, {
                        'name': 'S3_USE_HTTPS',
                        'value': '0',
                    }, {
                        'name': 'S3_VERIFY_SSL',
                        'value': '0',
                    }]
                }],
            },
        })
    ).set_display_name("Visualization")
    
    prep_task=prep_op().after(prepare_tb_task).set_display_name("Preprocess & Transform")
    train_task=train_op(input_data=prep_task.outputs['output_data'], profiler="pytorch").after(prep_task).set_display_name("Training")
    
    minio_tb_upload = minio_op(bucket_name="mlpipeline", folder_name=log_dir, input_path=train_task.outputs["tensorboard_root"], filename="",).apply( use_k8s_secret( secret_name="mlpipeline-minio-artifact", k8s_secret_key_to_env={ "secretkey": "MINIO_SECRET_KEY", "accesskey": "MINIO_ACCESS_KEY", },)) .after(train_task) .set_display_name("Tensorboard Events Pusher")
    minio_mar_upload = minio_op( bucket_name="mlpipeline", folder_name=mar_path, input_path=train_task.outputs["checkpoint_dir"], filename="bert_test.mar",) .apply( use_k8s_secret( secret_name="mlpipeline-minio-artifact", k8s_secret_key_to_env={ "secretkey": "MINIO_SECRET_KEY", "accesskey": "MINIO_ACCESS_KEY", },)) .after(train_task) .set_display_name("Mar Pusher")
    minio_config_upload = minio_op( bucket_name="mlpipeline", folder_name=config_prop_path, input_path=train_task.outputs["checkpoint_dir"], filename="config.properties",) .apply( use_k8s_secret( secret_name="mlpipeline-minio-artifact", k8s_secret_key_to_env={ "secretkey": "MINIO_SECRET_KEY", "accesskey": "MINIO_ACCESS_KEY", },)) .after(train_task) .set_display_name("Conifg Pusher")
    
    
    model_uri= str(model_uri)
    isvc_yaml = '''
    apiVersion: "serving.kubeflow.org/v1beta1"
    kind: "InferenceService"
    metadata:
      name: {}
      namespace: {}
    spec:
      predictor:
        serviceAccountName: sa
        pytorch:
          storageUri: {}
          resources:
            limits:
              memory: 4Gi   
    '''.format(DEPLOY, namespace, model_uri)
    deploy_task = deploy_op(
        action='apply',
        inferenceservice_yaml=isvc_yaml
    ).after(minio_mar_upload).set_display_name("Deployer")
    

In [16]:
# Compile pipeline
compiler.Compiler().compile(pytorch_bert, 'pytorch.tar.gz', type_check=True)

In [17]:
# Execute pipeline
run = client.run_pipeline(my_experiment.id, 'pytorch-bert', 'pytorch.tar.gz')

### Wait for inference service below to go to `READY True` state.

In [18]:
!kubectl get isvc $DEPLOY

NAME        URL                                                      READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                 AGE
bertserve   http://bertserve.kubeflow-user-example-com.example.com   True           100                              bertserve-predictor-default-7s2k8   3d7h


# Inference

In [19]:
!kubectl get isvc $DEPLOY -o jsonpath='{.items[0].status.url}' | cut -d "/" -f 3

bertserve.kubeflow-user-example-com.example.com


In [20]:
S_HOSTNAME=!kubectl get isvc $DEPLOY  -o jsonpath='{.items[0].status.url}' | cut -d "/" -f 3
SERVICE_HOSTNAME=S_HOSTNAME[0]
SERVICE_HOSTNAME

'bertserve.kubeflow-user-example-com.example.com'

# Prediction

In [28]:
!curl -v -H "Host: $SERVICE_HOSTNAME" -H "Cookie: $AUTH" "$HOST/v1/models/$MODEL:predict" -d @./examples/bert/sample.txt > bert_prediction_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 10.100.251.14:80...
* TCP_NODELAY set
* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.251.14) port 80 (#0)
> POST /v1/models/bert:predict HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.68.0
> Accept: */*
> Cookie: authservice_session=MTYyMTAxNTA4NXxOd3dBTkVRelExcENRVVZhUVZaWVUwUktVMDVLVVZjMFJWVk5VVVUxV0VWUlVrNUZXRVpRTlRWTlFrVlVTbFpCUnpkTFRFbFdSVkU9fL25mh6uPkLhC0wgFU9dy_0xCkxK72iW8xtXgK5lOmHK
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-length: 33
< content-type: application/json; charset=UTF-8
< date: Fri, 14 May 202

In [29]:
! cat bert_prediction_output.json

{"predictions": ["\"Sci/Tech\""]}

In [31]:
!curl -v -H "Host: $SERVICE_HOSTNAME" -H "Cookie: $AUTH" "$HOST/v1/models/$MODEL:explain" -d @./examples/bert/sample.txt  > bert_explaination_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 10.100.251.14:80...
* TCP_NODELAY set
* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.251.14) port 80 (#0)
> POST /v1/models/bert:explain HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.68.0
> Accept: */*
> Cookie: authservice_session=MTYyMTAxNTA4NXxOd3dBTkVRelExcENRVVZhUVZaWVUwUktVMDVLVVZjMFJWVk5VVVUxV0VWUlVrNUZXRVpRTlRWTlFrVlVTbFpCUnpkTFRFbFdSVkU9fL25mh6uPkLhC0wgFU9dy_0xCkxK72iW8xtXgK5lOmHK
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
100    84    0     0  100    84      0      1  0:01:24  0:00:48  0:00:36     0* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-leng

In [32]:
! cat bert_explaination_output.json

{"explanations": [{"words": ["[CLS]", "bloomberg", "has", "reported", "on", "the", "economy", "[SEP]"], "importances": [0.49803317807827413, -0.04228915625436579, -0.22691037181108395, 0.15573719339552444, 0.08677259891698845, 0.1791962203959244, 0.525546079847318, -0.5988261343532961], "delta": 0.12081549835977756}]}

In [33]:
import json
explanations_json = json.loads(open("./bert_explaination_output.json", "r").read())
explanations_json

{'explanations': [{'words': ['[CLS]',
    'bloomberg',
    'has',
    'reported',
    'on',
    'the',
    'economy',
    '[SEP]'],
   'importances': [0.49803317807827413,
    -0.04228915625436579,
    -0.22691037181108395,
    0.15573719339552444,
    0.08677259891698845,
    0.1791962203959244,
    0.525546079847318,
    -0.5988261343532961],
   'delta': 0.12081549835977756}]}

In [34]:
prediction_json = json.loads(open("./bert_prediction_output.json", "r").read())

In [35]:
import torch
attributions = explanations_json["explanations"][0]['importances']
tokens = explanations_json["explanations"][0]['words']
delta = explanations_json["explanations"][0]['delta']

attributions = torch.tensor(attributions)
pred_prob = 0.75
pred_class = prediction_json["predictions"][0]
true_class = "Business"
attr_class ="world"

In [36]:
from captum.attr import visualization
vis_data_records =[]
vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred_prob,
                            pred_class,
                            true_class,
                            attr_class,
                            attributions.sum(),       
                            tokens,
                            delta))

In [37]:
visualization.visualize_text(vis_data_records)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,
