In [6]:
# Clone the repository if needed
# ! git clone -b main https://github.com/chauhang/pytorch-pipeline
import os
cwd = os.getcwd()

os.chdir(os.path.join(cwd, "pytorch-pipeline"))

## Prequisites

#### Following dependent packages needs to be installed before running the build script

1. kfp
2. docker - should be logged in using docker login
3. yq - install using snap - https://github.com/mikefarah/yq


#### ./build.sh takes two arguments

1. path to example - For ex: pytorch_pipeline/examples/bert
2. dockerhub username - For ex: shrinathsuresh


In [74]:
! ./build.sh pytorch_pipeline/examples/bert shrinathsuresh

++++ Building component images with tag=13-05-2021-19-39-34.528751
IMAGE TO BUILD: shrinathsuresh/pytorch_pipeline:13-05-2021-19-39-34.528751
Sending build context to Docker daemon  3.708MB
Step 1/9 : FROM pytorch/pytorch:latest
 ---> 5ffed6c83695
Step 2/9 : COPY requirements.txt requirements.txt
 ---> a75911f19818
Step 3/9 : RUN apt-get update
 ---> Running in 917fe9ea37ae
Get:1 http://archive.ubuntu.com/ubuntu bionic InRelease [242 kB]
Get:2 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:4 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]
Get:5 http://archive.ubuntu.com/ubuntu bionic/multiverse amd64 Packages [186 kB]
Get:6 http://archive.ubuntu.com/ubuntu bionic/restricted amd64 Packages [13.5 kB]
Get:7 http://archive.ubuntu.com/ubuntu bionic/universe amd64 Packages [11.3 MB]
Get:8 http://security.ubuntu.com/ubuntu bionic-security/multiverse amd64 Packages [24.7 k

## At the end of this step - pytorch_bert.yaml file will be generated

In [31]:
import kfp
import json
import os
from kfp import components
from kfp.components import load_component_from_file, load_component_from_url
from kfp import dsl
from kfp import compiler
from pathlib import Path

kfp.__version__

'1.4.0'

## Update AUTH based on the environment

In [32]:
# KFP_URL='istio-ingressgateway.istio-system.svc.cluster.local'
KFP_URL='http://localhost:8080'
AUTH="authservice_session=MTYyMDg4MzA3MHxOd3dBTkZWU1R6WXpWRTVNV0VOUldVTk1UVXROUlRkRVFWRlVORGRGTmxoVk5sbFhRVlpZVmpKVE5FdFVXa2xQU1RKQ1YxQllSVUU9fHLZy5Ig4sSAXjIoMiqDsiZ3awo46HK_cD4OVcdleI0-"
namespace="kubeflow-user-example-com"
client = kfp.Client(host=KFP_URL+"/pipeline", cookies=AUTH)
experiments = client.list_experiments(namespace=namespace)
my_experiment = experiments.experiments[0]
my_experiment

{'created_at': datetime.datetime(2021, 4, 22, 8, 44, 39, tzinfo=tzutc()),
 'description': None,
 'id': 'aac96a63-616e-4d88-9334-6ca8df2bb956',
 'name': 'Default',
 'resource_references': [{'key': {'id': 'kubeflow-user-example-com',
                                  'type': 'NAMESPACE'},
                          'name': None,
                          'relationship': 'OWNER'}],
 'storage_state': 'STORAGESTATE_AVAILABLE'}

## Click on Run Details for navigating to pipeline

In [36]:
run_name = 'pytorch-bert'
# Execute pipeline
run = client.run_pipeline(my_experiment.id, run_name, "pytorch_bert.yaml")

In [56]:
TEST = ! kubectl get isvc bertserve -n kubeflow-user-example-com -o jsonpath='{.status.url}'| cut -d "/" -f 3

HOST = TEST[0]
HOST

'bertserve.kubeflow-user-example-com.example.com'

In [57]:
MODEL_NAME="bert"

In [76]:
!curl -v -H "Host: $HOST" -H "Cookie: $AUTH" "$KFP_URL/v1/models/$MODEL_NAME:predict" -d @./pytorch_pipeline/examples/bert/sample.txt > ./bert_prediction_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 127.0.0.1:8080...
* Connected to localhost (127.0.0.1) port 8080 (#0)
> POST /v1/models/bert:predict HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.71.1
> Accept: */*
> Cookie: authservice_session=MTYyMDg4MzA3MHxOd3dBTkZWU1R6WXpWRTVNV0VOUldVTk1UVXROUlRkRVFWRlVORGRGTmxoVk5sbFhRVlpZVmpKVE5FdFVXa2xQU1RKQ1YxQllSVUU9fHLZy5Ig4sSAXjIoMiqDsiZ3awo46HK_cD4OVcdleI0-
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
100    84    0     0  100    84      0     69  0:00:01  0:00:01 --:--:--    69* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-length: 33
< content-type: application/json; charset=UTF-8
< date: T

In [77]:
! cat ./bert_prediction_output.json

{"predictions": ["\"Sci/Tech\""]}

In [78]:
!curl -v -H "Host: $HOST" -H "Cookie: $AUTH" "$KFP_URL/v1/models/$MODEL_NAME:explain" -d @./pytorch_pipeline/examples/bert/sample.txt  > bert_explaination_output.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 127.0.0.1:8080...
* Connected to localhost (127.0.0.1) port 8080 (#0)
> POST /v1/models/bert:explain HTTP/1.1
> Host: bertserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.71.1
> Accept: */*
> Cookie: authservice_session=MTYyMDg4MzA3MHxOd3dBTkZWU1R6WXpWRTVNV0VOUldVTk1UVXROUlRkRVFWRlVORGRGTmxoVk5sbFhRVlpZVmpKVE5FdFVXa2xQU1RKQ1YxQllSVUU9fHLZy5Ig4sSAXjIoMiqDsiZ3awo46HK_cD4OVcdleI0-
> Content-Length: 84
> Content-Type: application/x-www-form-urlencoded
> 
} [84 bytes data]
* upload completely sent off: 84 out of 84 bytes
100    84    0     0  100    84      0      1  0:01:24  0:00:51  0:00:33     0:00:28  0:00:25  0:00:03     0* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-length: 319
< content-type: applicatio

In [79]:
! cat bert_explaination_output.json

{"explanations": [{"words": ["[CLS]", "bloomberg", "has", "reported", "on", "the", "economy", "[SEP]"], "importances": [0.49803317807827413, -0.04228915625436579, -0.22691037181108395, 0.15573719339552444, 0.08677259891698845, 0.1791962203959244, 0.525546079847318, -0.5988261343532961], "delta": 0.12081549835977756}]}

In [80]:
import json
explanations_json = json.loads(open("./bert_explaination_output.json", "r").read())
explanations_json

{'explanations': [{'words': ['[CLS]',
    'bloomberg',
    'has',
    'reported',
    'on',
    'the',
    'economy',
    '[SEP]'],
   'importances': [0.49803317807827413,
    -0.04228915625436579,
    -0.22691037181108395,
    0.15573719339552444,
    0.08677259891698845,
    0.1791962203959244,
    0.525546079847318,
    -0.5988261343532961],
   'delta': 0.12081549835977756}]}

In [81]:
prediction_json = json.loads(open("./bert_prediction_output.json", "r").read())

In [82]:
import torch
attributions = explanations_json["explanations"][0]['importances']
tokens = explanations_json["explanations"][0]['words']
delta = explanations_json["explanations"][0]['delta']

attributions = torch.tensor(attributions)
pred_prob = 0.75
pred_class = prediction_json["predictions"][0]
true_class = "Business"
attr_class ="world"

In [83]:
from captum.attr import visualization
vis_data_records =[]
vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred_prob,
                            pred_class,
                            true_class,
                            attr_class,
                            attributions.sum(),       
                            tokens,
                            delta))

In [84]:
visualization.visualize_text(vis_data_records)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Business,"""Sci/Tech"" (0.75)",world,0.58,[CLS] bloomberg has reported on the economy [SEP]
,,,,
