# Load packages

In [1]:
!pip uninstall -y kfp
!pip install kfp

Found existing installation: kfp 1.4.0
Uninstalling kfp-1.4.0:
  Successfully uninstalled kfp-1.4.0
Collecting kfp
  Using cached kfp-1.4.0-py3-none-any.whl
Installing collected packages: kfp
Successfully installed kfp-1.4.0


In [2]:
import kfp
import json
import os
from kfp import components
from kfp.components import load_component_from_file, load_component_from_url
from kfp import dsl
from kfp import compiler

kfp.__version__

'1.4.0'

# Enter your gateway loadbalancer and the token from the cookie
[Use this extension on chrome to get token]( https://chrome.google.com/webstore/detail/editthiscookie/fngmhnnpilhplaeedifhccceomclgfbg?hl=en)

![image.png](./image.png)

In [3]:
#Update values for the load balancer and auth session
HOST='istio-ingressgateway.istio-system.svc.cluster.local'
AUTH="authservice_session=MTYyMDAyMzY0N3xOd3dBTkRkU1Z6UlFOMVJCV0UxTFFsRkxOMUpXTkRNM1ZVTklWMEZEVUV4UFJFUkRWakpYUlZsSFF6TlFNelpGVUZsWFExZzJUMEU9fON4nvL0tmuL9davFMjyNxqrLd_NY3PV5wIBx8J5rzoW"
namespace="kubeflow-user-example-com"

In [8]:
client = kfp.Client(host=HOST+"/pipeline", cookies=AUTH)
experiments = client.list_experiments(namespace=namespace)
my_experiment = experiments.experiments[0]
my_experiment

{'created_at': datetime.datetime(2021, 4, 22, 8, 44, 39, tzinfo=tzlocal()),
 'description': None,
 'id': 'aac96a63-616e-4d88-9334-6ca8df2bb956',
 'name': 'Default',
 'resource_references': [{'key': {'id': 'kubeflow-user-example-com',
                                  'type': 'NAMESPACE'},
                          'name': None,
                          'relationship': 'OWNER'}],
 'storage_state': 'STORAGESTATE_AVAILABLE'}

### Using dsl for model archiver and torchserve integration
#### Init container is used for mar file generation

In [9]:
DEPLOY="torchserve"
MODEL="cifar10"

In [10]:
prepare_tensorboard_op = load_component_from_file("./examples/cifar10/yaml/tensorboard/component.yaml")
prep_op = components.load_component_from_file(
    "./examples/cifar10/yaml/pre_process/component.yaml"
)
train_op = components.load_component_from_file(
    "./examples/cifar10/yaml/train/component.yaml"
)
deploy_op = load_component_from_file("./examples/cifar10/yaml/deploy/component.yaml")

In [11]:
@dsl.pipeline(name="Training pipeline", description="Sample training job test")
def pytorch_cifar10(minio_endpoint='minio-service.kubeflow:9000',
    log_bucket='mlpipeline',
    log_dir='tensorboard',
    tf_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest'):

    @dsl.component
    def ls(input_dir: str):
        return dsl.ContainerOp(
            name='list',
            image='busybox:latest',
            command=["ls", "-R", "%s" % input_dir]
        )
    
    prepare_tb_task = prepare_tensorboard_op(
        log_dir_uri=f's3://{log_bucket}/{log_dir}',
        image=tf_image,
        pod_template_spec=json.dumps({
            'spec': {
                'containers': [{
                    'env': [{
                        'name': 'AWS_ACCESS_KEY_ID',
                        'valueFrom': {
                            'secretKeyRef': {
                                'name': 'mlpipeline-minio-artifact',
                                'key': 'accesskey'
                            }
                        }
                    }, {
                        'name': 'AWS_SECRET_ACCESS_KEY',
                        'valueFrom': {
                            'secretKeyRef': {
                                'name': 'mlpipeline-minio-artifact',
                                'key': 'secretkey'
                            }
                        }
                    }, {
                        'name': 'AWS_REGION',
                        'value': 'minio'
                    }, {
                        'name': 'S3_ENDPOINT',
                        'value': f'{minio_endpoint}',
                    }, {
                        'name': 'S3_USE_HTTPS',
                        'value': '0',
                    }, {
                        'name': 'S3_VERIFY_SSL',
                        'value': '0',
                    }]
                }],
            },
        })
    )
    
    prep_task=prep_op().after(prepare_tb_task)
    train_task=train_op(input_data=prep_task.outputs['output_data']).after(prep_task)
    
    
    model_uri= "s3://mlpipeline/mar"
    isvc_yaml = '''
    apiVersion: "serving.kubeflow.org/v1beta1"
    kind: "InferenceService"
    metadata:
      name: {}
      namespace: {}
    spec:
      predictor:
        serviceAccountName: sa
        pytorch:
          storageUri: {}
          resources:
            limits:
              memory: 4Gi   
    '''.format(DEPLOY, namespace, model_uri)
    deploy_task = deploy_op(
        action='apply',
        inferenceservice_yaml=isvc_yaml
    ).after(train_task)
    

In [12]:
# Compile pipeline
compiler.Compiler().compile(pytorch_cifar10, 'pytorch.tar.gz', type_check=True)

In [13]:
# Execute pipeline
run = client.run_pipeline(my_experiment.id, 'pytorch-cifar10', 'pytorch.tar.gz')

### Wait for inference service below to go to `READY True` state.

In [14]:
!kubectl get isvc $DEPLOY

NAME         URL                                                       READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                  AGE
torchserve   http://torchserve.kubeflow-user-example-com.example.com   True           100                              torchserve-predictor-default-dzzhb   4d


# Inference

In [16]:
!kubectl get isvc $DEPLOY -o jsonpath='{.items[0].status.url}' | cut -d "/" -f 3

torchserve.kubeflow-user-example-com.example.com


In [17]:
S_HOSTNAME=!kubectl get isvc $DEPLOY  -o jsonpath='{.items[0].status.url}' | cut -d "/" -f 3
SERVICE_HOSTNAME=S_HOSTNAME[0]
SERVICE_HOSTNAME

'torchserve.kubeflow-user-example-com.example.com'

# Prediction

In [97]:
!curl -v -H "Host: torchserve.kubeflow-user-example-com.example.com" -H "Cookie: authservice_session=MTYxOTc1NzIzNHxOd3dBTkVNM1Qwb3pWVGRJV0VKSlVUSllWMGhFTTBORU5WVXlOVE16V1RaVVEwMUdVVVl5VlZwRVZEVkZVRlJEVEVSVVExZElOa0U9fLS1hlH4DJZ2Qi0reGBgIo-qBZqPJHygxiaXUTGEooKQ" "http://istio-ingressgateway.istio-system.svc.cluster.local/v1/models/cifar10:predict" -d @./examples/cifar10/input.json

*   Trying 10.100.251.14:80...
* TCP_NODELAY set
* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.251.14) port 80 (#0)
> POST /v1/models/cifar10:predict HTTP/1.1
> Host: torchserve.kubeflow-user-example-com.example.com
> User-Agent: curl/7.68.0
> Accept: */*
> Cookie: authservice_session=MTYxOTc1NzIzNHxOd3dBTkVNM1Qwb3pWVGRJV0VKSlVUSllWMGhFTTBORU5WVXlOVE16V1RaVVEwMUdVVVl5VlZwRVZEVkZVRlJEVEVSVVExZElOa0U9fLS1hlH4DJZ2Qi0reGBgIo-qBZqPJHygxiaXUTGEooKQ
> Content-Length: 148025
> Content-Type: application/x-www-form-urlencoded
> Expect: 100-continue
> 
* Mark bundle as not supporting multiuse
< HTTP/1.1 100 Continue
* We are completely uploaded and fine
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< content-length: 150
< content-type: application/json; charset=UTF-8
< date: Fri, 30 Apr 2021 14:10:51 GMT
< server: istio-envoy
< x-envoy-upstream-service-time: 338
< 
* Connection #0 to host istio-ingressgateway.istio-system.svc.cluster.local left intact
{"p

In [20]:
!curl -v -H "Host: torchserve.kubeflow-user-example-com.example.com" -H "Cookie: authservice_session=MTYxOTc1NzIzNHxOd3dBTkVNM1Qwb3pWVGRJV0VKSlVUSllWMGhFTTBORU5WVXlOVE16V1RaVVEwMUdVVVl5VlZwRVZEVkZVRlJEVEVSVVExZElOa0U9fLS1hlH4DJZ2Qi0reGBgIo-qBZqPJHygxiaXUTGEooKQ" "http://istio-ingressgateway.istio-system.svc.cluster.local/v1/models/cifar10:explain" -d @./examples/cifar10/input.json