All below taken from here: https://github.com/kubeflow/examples/blob/master/pipelines/simple-notebook-pipeline/Simple%20Notebook%20Pipeline.ipynb

### Preliminary

In [1]:
from my_utils.os_utils import subprocess_execute

subprocess_execute("echo 'hello'")

... cmd to execute:
echo 'hello'
... result:


'hello\n'

In [4]:
# NOTE: only ever need to do this once
# auth using gcp creds
#!gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}

In [2]:
# verify creds
!gcloud projects list

PROJECT_ID   NAME         PROJECT_NUMBER
andrewm4894  andrewm4894  776608946719


In [5]:
# make bucket
!gsutil mb gs://andrewm4894-tmp/

Creating gs://andrewm4894-tmp/...
ServiceException: 409 Bucket andrewm4894-tmp already exists.


### Set Up

In [5]:
# Set your output directory and project name. 
PROJECT_NAME =  'andrewm4894'
OUTPUT_DIR = 'gs://andrewm4894-tmp/assets' 

In [22]:
EXPERIMENT_NAME = 'dev'                    # Name of the experiment in the UI
BASE_IMAGE = 'tensorflow/tensorflow:2.0.0b0-py3'    # Base image used for components in the pipeline

In [7]:
import kfp
import kfp.dsl as dsl
from kfp import compiler

### Create pipeline component

#### Create python function

In [8]:
@dsl.python_component(
    name='add_op',
    description='adds two numbers',
    base_image=BASE_IMAGE  # you can define the base image here, or when you build in the next step. 
)
def add(a: float, b: float) -> float:
    '''Calculates sum of two arguments'''
    print(a, '+', b, '=', a + b)
    return a + b

#### Build pipeline component from the function

In [14]:
# Docker image for the pipeline component
TARGET_IMAGE = f'gcr.io/{PROJECT_NAME}/add-op:latest' 

# The return value "DeployerOp" represents a step that is used in a pipeline
add_op = compiler.build_python_component(
    component_func=add,
    staging_gcs_path=OUTPUT_DIR,
    base_image=BASE_IMAGE,
    target_image=TARGET_IMAGE,
    namespace='kubeflow-andrewm4894',
    timeout=100)

2019-10-19 20:36:38:INFO:Build an image that is based on tensorflow/tensorflow:2.0.0b0-py3 and push the image to gcr.io/andrewm4894/add-op:latest
2019-10-19 20:36:38:INFO:Checking path: gs://andrewm4894-tmp/assets...
2019-10-19 20:36:38:INFO:Generate entrypoint and serialization codes.
2019-10-19 20:36:38:INFO:Generate build files.
2019-10-19 20:36:38:INFO:Start a kaniko job for build.
2019-10-19 20:36:38:INFO:Cannot Find local kubernetes config. Trying in-cluster config.
2019-10-19 20:36:38:INFO:Initialized with in-cluster config.
2019-10-19 20:36:44:INFO:5 seconds: waiting for job to complete
2019-10-19 20:36:49:INFO:10 seconds: waiting for job to complete
2019-10-19 20:36:54:INFO:15 seconds: waiting for job to complete
2019-10-19 20:36:59:INFO:20 seconds: waiting for job to complete
2019-10-19 20:37:04:INFO:25 seconds: waiting for job to complete
2019-10-19 20:37:09:INFO:30 seconds: waiting for job to complete
2019-10-19 20:37:14:INFO:35 seconds: waiting for job to complete
2019-10-

KeyboardInterrupt: 

In [15]:
@dsl.pipeline(
   name='Calculation pipeline',
   description='A toy pipeline that performs arithmetic calculations.'
)
def calc_pipeline(
   a='0',
   b='7',
   c='17',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, 4) #Returns a dsl.ContainerOp class instance. 
    
    #You can create explicit dependency between the tasks using xyz_task.after(abc_task)
    add_2_task = add_op(a, b)
    
    add_3_task = add_op(add_task.output, add_2_task.output)

In [16]:
pipeline_func = calc_pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
compiler.Compiler().compile(pipeline_func, pipeline_filename)

In [24]:
#Get or create an experiment and submit a pipeline run
client = kfp.Client()
experiment = client.create_experiment(EXPERIMENT_NAME)
#experiment = client.get_experiment(EXPERIMENT_NAME)

In [26]:
#Specify pipeline argument values
arguments = {'a': '1', 'b': '1'}

#Submit a pipeline run
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)

#This link leads to the run information page. 
#Note: There is a bug in JupyterLab that modifies the URL and makes the link stop working