# About this Jupyter Notebook

@author: Yingding Wang

This notebook is originated from the example of kubeflow visual metric with KF Pipeline UI (https://github.com/kubeflow/pipelines/blob/master/samples/test/metrics_visualization_v1.py)

In [1]:
import sys
!{sys.executable} -m pip install --upgrade --user kfp==1.8.12



## (optional) Restart your notebook server in Kubeflow by stop and start

## (optional) Upgrade your kfp-server-api

Should you see a headtoken issue while starting the kubeflow pipeline from your notebook. Please execute this optional step to upgrade the kfp-server-api to match the kubeflow pipeline backend

In [2]:
# show the kfp-server-api version, 1.7.0 for kf 1.4, 1.7.1 for kf 1.4.1 and 1.8.1 for kf 1.5.0
!{sys.executable} -m pip list | grep kfp

kfp                      1.8.12
kfp-pipeline-spec        0.1.16
kfp-server-api           1.8.1


In [3]:
"""upgrade the kfp server api version to 1.7.0 for KF 1.4"""
# !{sys.executable} -m pip uninstall -y kfp-server-api
# !{sys.executable} -m pip install --user --upgrade kfp-server-api==1.7.0
"""upgrade the kfp server api version to 1.8.1 for KF 1.5"""
# !{sys.executable} -m pip uninstall -y kfp-server-api
# !{sys.executable} -m pip install --user --upgrade kfp-server-api==1.8.1

'upgrade the kfp server api version to 1.8.1 for KF 1.5'

In [4]:
# import sys
# !{sys.executable} -m pip install --upgrade --user kfp==1.8.12
# !{sys.executable} -m pip install --upgrade --user kubernetes==18.20.0

# Restart the kernal
After update the kfp, restart this notebook kernel

Jupyter notebook: Meun -> Kernel -> restart kernel

## set up resource limit helper function

In [5]:
from kfp.dsl import ContainerOp
def pod_resource_transformer(op: ContainerOp, mem_req="200Mi", cpu_req="2000m", mem_lim="1000Mi", cpu_lim='2000m'):
    """
    op.set_memory_limit('1000Mi') = 1GB
    op.set_cpu_limit('1000m') = 1 cpu core
    """
    return op.set_memory_request(mem_req)\
            .set_memory_limit(mem_lim)\
            .set_cpu_request(cpu_req)\
            .set_cpu_limit(cpu_lim)

In [6]:
from typing import NamedTuple
import kfp
from kfp.dsl import pipeline
from kfp.components import create_component_from_func

NAME_SPACE="kubeflow-kindfor"
EXPERIMENT_NAME="kf-metrics"        # Name of the experiment groups runs in the GUI
EXPERIMENT_DESC = 'visualization examples for kf pipeline UI'

@create_component_from_func
def confusion_visualization(matrix_uri: str = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/samples/core/visualization/confusion_matrix.csv') -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
    """Provide confusion matrix csv file to visualize as metrics."""
    import json

    metadata = {
        'outputs' : [{
          'type': 'confusion_matrix',
          'format': 'csv',
          'schema': [
            {'name': 'target', 'type': 'CATEGORY'},
            {'name': 'predicted', 'type': 'CATEGORY'},
            {'name': 'count', 'type': 'NUMBER'},
          ],
          'source': matrix_uri,
          'labels': ['rose', 'lily', 'iris'],
        }]
    }

    from collections import namedtuple
    visualization_output = namedtuple('VisualizationOutput', [
        'mlpipeline_ui_metadata'])
    return visualization_output(json.dumps(metadata))

# @create_component_from_func
# def produce_markdown() -> NamedTuple('Outputs', [('MLPipeline_UI_metadata', 'UI_metadata')]):
#     import sys, json, subprocess
#     subprocess.run([sys.executable, '-m', 'pip', 'install','pandas'])
#     import pandas as pd 
#     matrix = [
#         ['y', 'y', 10],
#         ['y', 'n', 9],
#         ['n', 'y', 6],
#         ['n', 'y', 7]
#     ]
#     df = pd.DataFrame(matrix,columns=['target','predicted','count'])
#     metadata = {
#         "outputs": [
#             {
#                 "type": "confusion_matrix",
#                 "format": "csv",
#                 "schema": [
#                     {
#                         "name": "target",
#                         "type": "CATEGORY"
#                     },
#                     {
#                         "name": "predicted",
#                         "type": "CATEGORY"
#                     },
#                     {
#                         "name": "count",
#                         "type": "NUMBER"
#                     }
#                 ],
#                 "source": df.to_csv(header=False, index=False),
#                 "storage": "inline",
#                 "labels": [
#                     "y",
#                     "n"
#                 ]
#             }
#         ]
#     }
#     return [json.dumps(metadata)]

@pipeline(
    name = EXPERIMENT_NAME,
    description = EXPERIMENT_DESC
)
def my_pipeline():
    # set up a confusion matrix with markdown
    confusion_matrix = confusion_visualization()
    confusion_matrix = pod_resource_transformer(confusion_matrix, mem_req="200Mi", cpu_req="500m")
    confusion_matrix.set_display_name("confusion matrix")
    
    

In [7]:
NAME_SPACE="kubeflow-kindfor"
EXPERIMENT_NAME="kf-metrics"

client = kfp.Client()
client.set_user_namespace(NAME_SPACE)

In [8]:
client.create_run_from_pipeline_func(pipeline_func=my_pipeline, arguments={},
                                     experiment_name=EXPERIMENT_NAME, namespace=NAME_SPACE,
                                     pipeline_conf=kfp.dsl.PipelineConf(),
                                     mode = kfp.dsl.PipelineExecutionMode.V1_LEGACY
                                    )

RunPipelineResult(run_id=6d3a2f5e-3e68-4a16-ade6-bab6098428bd)