# Super simple Kubeflow Pipelines

Here we will schedule a one pipeline that will download artifacts from minio buckets

In [None]:
!pip3 install --upgrade './extensions' > /dev/null
!pip3 install --upgrade 'https://storage.googleapis.com/ml-pipeline/release/0.1.8/kfp.tar.gz' > /dev/null

# jupyter notebook extensions
%load_ext extensions

# imports goes here
from kubernetes import client as kube_client
from ipython_secrets import get_secret
from os import environ
import kfp
import kfp.compiler as compiler
import boto3
import tarfile
import kfp.dsl as dsl


## Environment setup

In [None]:
EXPERIMENT_NAME = 'Das-Experiment-1'

AWS_DEFAULT_REGION = environ.get('AWS_DEFAULT_REGION', 'us-east-1')
AWS_S3_BUCKET = environ.get('AWS_S3_BUCKET') or get_secret('AWS_S3_BUCKET')
AWS_S3_ENDPOINT = environ.get('AWS_S3_ENDPOINT', 's3.amazonaws.com')
AWS_ACCESS_KEY_ID = get_secret('AWS_ACCESS_KEY_ID') 
AWS_SECRET_ACCESS_KEY = get_secret('AWS_SECRET_ACCESS_KEY')
AWS_SECRET_NAME = environ.get('AWS_SECRET_NAME') or get_secret('AWS_SECRET_NAME')

TAG = 'latest'
DOCKER_REGISTRY = environ.get('DOCKER_REGISTRY') or get_secret('DOCKER_REGISTRY')
DOCKER_REGISTRY_SECRET = get_secret('DOCKER_REGISTRY_SECRET')
DOCKER_IMAGE = f'{DOCKER_REGISTRY}/library/kubectl:{TAG}'

Create API clients for pipelines and object storage

In [None]:
s3_client = boto3.client('s3',
    region_name = AWS_DEFAULT_REGION,
    aws_access_key_id = AWS_ACCESS_KEY_ID,
    aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

client = kfp.Client()

*Create experiment*: we retrieve an experiment by name or otherwise will create new

In [None]:
try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)

# Build an experiment related docker image

Below we generate a dockerfile that will be used to put some `minio` awarness:
- `Dockerfile` - a docker container that will be built and pushed into private docker egistry
- `Kaniko` - deploument job to carry on our container build

In [None]:
%%template Dockerfile
FROM gcr.io/google-samples/ml-pipeline-t2ttrain:latest
RUN echo "{{DOCKER_IMAGE}}"

In [None]:
%templatefile extensions/templates/kaniko-workflow.yaml -o kaniko.yaml -v

Generated files must be uploaded to object storage bucket (i.e s3, minio). Docker build process (Kaniko) will have to access to s3 bucket

In [None]:
with tarfile.open('dockerbuild.tar.gz', 'w:gz') as tar:
    tar.add('Dockerfile', arcname='Dockerfile')
    tar.add('kaniko.yaml', arcname='pipeline.yaml')

s3_client.upload_file('dockerbuild.tar.gz' , AWS_S3_BUCKET, f'{EXPERIMENT_NAME}/dockerbuild.tar.gz')

r = client.run_pipeline(experiment.id, 
                        f'build {DOCKER_IMAGE}', 
                        'dockerbuild.tar.gz', 
                        params={'image': DOCKER_IMAGE})

In [None]:
client.wait_for_run_completion(r.id, timeout=180).run.status

# Define an Experiment Pipeline

In [None]:
@dsl.pipeline(
  name='Super simple minio integration',
  description='I as a pipeline want to read a file from minio bucket'
)
def hello_minio_pipeline(filename: dsl.PipelineParam):
    op1 = dsl.ContainerOp(
        name='download',
        image='minio/mc',
        command=['mc', '--no-color'],
        arguments=['cp', f'minio/{filename}', '/tmp/results.txt'],
        file_outputs={'downloaded': '/tmp/results.txt'}
    ).add_env_variable(
        k8sc.V1EnvVar(
            name='MC_HOSTS_minio', 
            value=f'https://{S3_ACCESS_KEY}:{S3_SECRET_KEY}@{S3_ENDPOINT}' 
        ))
    op2 = dsl.ContainerOp(
        name='echo',
        image='library/bash:4.4.23',
        command=['sh', '-c'],
        arguments=[f'echo {op1.output}'])

### Execute the pipeline

Code below will create a new experiement **Hello Minio** and run it

In [None]:
compiler.Compiler().compile(hello_minio_pipeline, 'pipeline.tar.gz')
run = client.run_pipeline(exp.id, 
                          'pipeline 1', 
                          'pipeline.tar.gz',
                          params={'filename': 'default/hello.txt'})
