## Add transformers to 05_01_Pipeline_SDK example and show how to use node selectors to land on preferred nodegroup with label role=workers

### Define Tolerations

Ensure that following labels and taints exists (role=workers) on nodes

In [1]:
import kfp.dsl as dsl
from kfp import compiler

#from irml_tim.kubeflow import transformers
from kubernetes import client as k8s_client
from kubernetes.client.models import V1EnvVar, V1SecretKeySelector

In [2]:
def node_selector(op):
    if isinstance(op, dsl.ContainerOp):
        op.add_node_selector_constraint('role', 'workers')
        # op.add_node_selector_constraint('single-az', 'true')
        # op.add_node_selector_constraint('spot', 'false')
        # op.container.set_memory_request("2G")
        # op.container.set_cpu_request("1")

### Build simple components and pipelines

In [3]:
import kfp
authservice_session='authservice_session=MTYwNjE1Nzg3MnxOd3dBTkVSWFFsRlRRMEpXTTBKRFZWTktSMDVCUzFoYVZrUkhOelZZUlVWUVZEVk1NMHBPVmtjMVVWWlFWalZOTjFsRlJrdEpUVkU9fNjq1h1F6P5TSlsIpE1SpZCHSgc5gnzexq-3pOhqDHV1'
client = kfp.Client(host='http://3e100955-istiosystem-istio-2af2-1671188516.us-west-2.elb.amazonaws.com/pipeline', cookies=authservice_session)
#client.list_experiments(namespace="eksworkshop")
from kfp import dsl

def add_two_numbers(a, b):
    return dsl.ContainerOp(
        name='calculate_sum',
        image='python:3.6.8',
        command=['python', '-c'],
        arguments=['with open("/tmp/results.txt", "a") as file: file.write(str({} + {}))'.format(a, b)],
        file_outputs={
            'data': '/tmp/results.txt',
        }
    )

def echo_op(text):
    return dsl.ContainerOp(
        name='echo',
        image='library/bash:4.4.23',
        command=['sh', '-c'],
        arguments=['echo "Result: {}"'.format(text)]
    )

### Define your pipeline as a Python function

In [4]:
@dsl.pipeline(
  name='Calcualte sum pipeline',
  description='Calculate sum of numbers and prints the result.'
)
def calculate_sum(
    a=7,
    b=10,
    c=4,
    d=7
):
    """A four-step pipeline with first two running in parallel."""

    sum1 = add_two_numbers(a, b)
    sum2 = add_two_numbers(c, d)
    sum = add_two_numbers(sum1.output, sum2.output)

    echo_task = echo_op(sum.output)
    
    pipeline_conf = dsl.get_pipeline_conf()
#     pipeline_conf.add_op_transformer(transformers.irml_defaults)
    pipeline_conf.add_op_transformer(node_selector)    

### Compile the pipeline

In [5]:
kfp.compiler.Compiler().compile(calculate_sum, 'calculate-sum-pipeline.zip')



### Deploy pipeline

In [6]:
#client = kfp.Client()
aws_experiment = client.create_experiment(name='aws', namespace='eksworkshop')
my_run = client.run_pipeline(aws_experiment.id, 'calculate-sum-pipeline', 
  'calculate-sum-pipeline.zip')