# About this Jupyter Notebook

@author: Yingding Wang

This notebook demonstrates how ordered components in kf pipeline can be defined

In [1]:
import sys
!{sys.executable} -m pip install --upgrade --user kfp==1.8.12



# (optional step) Upgrade juypterlab
Run the follow cell to upgrade the jupyterlab server and you need to restart the notebook to see effect

In [2]:
!{sys.executable} -m pip show jupyterlab # 3.0.16
# !{sys.executable} -m pip show jupyter_contrib_nbextensions

Name: jupyterlab
Version: 3.4.3
Summary: JupyterLab computational environment
Home-page: https://jupyter.org
Author: Jupyter Development Team
Author-email: jupyter@googlegroups.com
License: None
Location: /home/jovyan/.local/lib/python3.8/site-packages
Requires: nbclassic, packaging, jupyterlab-server, jupyter-server, jupyter-core, jinja2, ipython, tornado
Required-by: 


In [3]:
# update the jupyter lab
# uncomment the following command to update juypterlab
# !{sys.executable} -m pip install --upgrade --user jupyterlab==3.4.3

## (optional) Restart your notebook server in Kubeflow by stop and start

## (optional) Upgrade your kfp-server-api
Should you see a headtoken issue while starting the kubeflow pipeline from your notebook. Please execute this optional step to upgrade the kfp-server-api to match the kubeflow pipeline backend


In [4]:
# show the kfp-server-api version, 1.7.0 for kf 1.4, 1.7.1 for kf 1.4.1 and 1.8.1 for kf 1.5.0
!{sys.executable} -m pip list | grep kfp

kfp                      1.8.12
kfp-pipeline-spec        0.1.16
kfp-server-api           1.8.1


In [5]:
"""upgrade the kfp server api version to 1.7.0 for KF 1.4"""
# !{sys.executable} -m pip uninstall -y kfp-server-api
# !{sys.executable} -m pip install --user --upgrade kfp-server-api==1.7.0
"""upgrade the kfp server api version to 1.8.1 for KF 1.5"""
# !{sys.executable} -m pip uninstall -y kfp-server-api
# !{sys.executable} -m pip install --user --upgrade kfp-server-api==1.8.1

'upgrade the kfp server api version to 1.8.1 for KF 1.5'

## Define function to restrict ContainerOp resource for kfp with multi-tenancy

In [6]:
from kfp.dsl import ContainerOp
def pod_resource_transformer(op: ContainerOp, mem_req="200Mi", cpu_req="2000m", mem_lim="1000Mi", cpu_lim='2000m'):
    """
    op.set_memory_limit('1000Mi') = 1GB
    op.set_cpu_limit('1000m') = 1 cpu core
    """
    return op.set_memory_request(mem_req)\
            .set_memory_limit(mem_lim)\
            .set_cpu_request(cpu_req)\
            .set_cpu_limit(cpu_lim)

## Define the variables

In [7]:
from platform import python_version

EXPERIMENT_NAME = 'kf v1.5 test'        # Name of the experiment in the UI
EXPERIMENT_DESC = 'testing KF platform'
# BASE_IMAGE = f"library/python:{python_version()}" # Base image used for components in the pipeline, which has not root
BASE_IMAGE = "python:3.8.13"
NAME_SPACE = "kubeflow-kindfor" # change namespace if necessary

## Create kfp pipeline with KFP python sdk

In [8]:
import kfp
import kubernetes
import kfp.dsl as dsl
import kfp.compiler as compiler
import kfp.components as components
from kfp.components import create_component_from_func

## create components from function

In [9]:
from kfp.components import create_component_from_func
from functools import partial

@partial(
    create_component_from_func,
    output_component_file='step_1_component.yaml',
    base_image=BASE_IMAGE,
    packages_to_install=None # can't use [""] as requiremnt, either None or ["pandas"], 
)
def print_op_1():
    '''Calculates sum of two arguments'''
    print(f"I am step 1")

In [10]:
@partial(
    create_component_from_func,
    output_component_file='step_2_component.yaml',
    base_image=BASE_IMAGE,
    packages_to_install=None # can't use [""] as requiremnt, either None or ["pandas"], 
)
def print_op_2():
    '''Calculates sum of two arguments'''
    print(f"I am step 2")

In [11]:


@dsl.pipeline(
    name='confusion-matrix-pipeline',
    description='A sample pipeline to generate Confusion Matrix for UI visualization.'
)
def ordered_dag_pipeline():
    print_task_1 = print_op_1()
    print_task_1 = pod_resource_transformer(print_task_1, mem_req="500Mi", cpu_req="500m")
    print_task_1.execution_options.caching_strategy.max_cache_staleness = "P0D"
    print_task_2 = print_op_2()
    print_task_2 = pod_resource_transformer(print_task_2, mem_req="500Mi", cpu_req="500m")
    print_task_2.after(print_task_1)

my_pipeline = ordered_dag_pipeline    

In [12]:
NAME_SPACE="kubeflow-kindfor"
EXPERIMENT_NAME="kf-metrics"

client = kfp.Client()
client.set_user_namespace(NAME_SPACE)

In [13]:
client.create_run_from_pipeline_func(pipeline_func=my_pipeline, arguments=None,
                                     experiment_name=EXPERIMENT_NAME, namespace=NAME_SPACE,
                                     pipeline_conf=dsl.PipelineConf(),
                                     mode = dsl.PipelineExecutionMode.V1_LEGACY
                                    )

RunPipelineResult(run_id=23ef5fc4-38cc-42d0-849c-d45823505da7)