# Test KFP Integration

- create an experiment
- create a run
- check that the run passes. This happens only when both of the following are true:
    * the run's pod is scheduled on a node with an NVIDIA GPU
    * the code, and more specifically Tensorflow framework, has access to a GPU

In [None]:
# Please check the requirements.in file for more details
!pip install -r requirements.txt

In [None]:
import kfp
import os

from kfp import dsl
from tenacity import retry, stop_after_attempt, wait_exponential

In [None]:
client = kfp.Client()

In [None]:
EXPERIMENT_NAME = 'Check access to GPU'

In [None]:
HTTP_PROXY = HTTPS_PROXY = NO_PROXY = None

if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):
    HTTP_PROXY = os.environ['HTTP_PROXY']
    HTTPS_PROXY = os.environ['HTTPS_PROXY']
    NO_PROXY = os.environ['NO_PROXY']

def add_proxy(task: dsl.PipelineTask, http_proxy=HTTP_PROXY, https_proxy=HTTPS_PROXY, no_proxy=NO_PROXY) -> dsl.PipelineTask:
    """Adds the proxy env vars to the PipelineTask object."""
    return (
        task.set_env_variable(name='http_proxy', value=http_proxy)
        .set_env_variable(name='https_proxy', value=https_proxy)
        .set_env_variable(name='HTTP_PROXY', value=http_proxy)
        .set_env_variable(name='HTTPS_PROXY', value=https_proxy)
        .set_env_variable(name='no_proxy', value=no_proxy)
        .set_env_variable(name='NO_PROXY', value=no_proxy)
    )

def proxy_envs_set():
    """Check if the proxy env vars are set"""
    if HTTP_PROXY and HTTPS_PROXY and NO_PROXY:
        return True
    return False

In [None]:
@dsl.component(base_image="kubeflownotebookswg/jupyter-tensorflow-cuda:v1.9.0")
def gpu_check() -> str:
    """Check access to a GPU."""
    import tensorflow as tf

    gpus = tf.config.list_physical_devices('GPU')
    print("GPU list:", gpus)
    if not gpus:
        raise RuntimeError("No GPU has been detected.")
    return str(len(gpus)>0)

def add_gpu_request(task: dsl.PipelineTask) -> dsl.PipelineTask:
    """Add a request field for a GPU to the container created by the PipelineTask object."""
    return ( task.add_node_selector_constraint(accelerator = "nvidia.com/gpu").set_accelerator_limit(limit = 1) )

In [None]:
@dsl.pipeline
def gpu_check_pipeline() -> str:
    """Create a pipeline that runs code to check access to a GPU."""
    gpu_check1 = add_gpu_request(gpu_check())
    return gpu_check1.output

@dsl.pipeline
def gpu_check_pipeline_proxy() -> str:
    """Create a pipeline that runs code to check access to a GPU and sets the appropriate proxy ENV variables."""
    gpu_check1 = add_proxy(add_gpu_request(gpu_check()))
    return gpu_check1.output

In [None]:
# Setting enable_caching to False to overcome https://github.com/canonical/bundle-kubeflow/issues/1067
if proxy_envs_set():
    run = client.create_run_from_pipeline_func(
        gpu_check_pipeline_proxy,
        experiment_name=EXPERIMENT_NAME,
        enable_caching=False,
    )
else:
    run = client.create_run_from_pipeline_func(
        gpu_check_pipeline,
        experiment_name=EXPERIMENT_NAME,
        enable_caching=False,
    )

In [None]:
client.list_experiments().experiments

In [None]:
client.get_run(run.run_id).state

In [None]:
@retry(
    wait=wait_exponential(multiplier=2, min=1, max=10),
    stop=stop_after_attempt(30),
    reraise=True,
)
def assert_run_succeeded(client, run_id):
    """Wait for the run to complete successfully."""
    status = client.get_run(run_id).state
    assert status == "SUCCEEDED", f"KFP run in {status} state."

In [None]:
# fetch KFP experiment to ensure it exists
client.get_experiment(experiment_name=EXPERIMENT_NAME)

assert_run_succeeded(client, run.run_id)