# Super simple Kubeflow Pipelines

Here we will schedule a one pipeline that will download artifacts from minio buckets

In [None]:
!pip3 install --upgrade './extensions' > /dev/null
!pip3 install --upgrade 'https://storage.googleapis.com/ml-pipeline/release/0.1.8/kfp.tar.gz' > /dev/null

# jupyter notebook extensions
%load_ext extensions

# imports goes here
from ipython_secrets import get_secret

from os import environ
import kfp
import kfp.compiler as compiler
import boto3
import tarfile
import kfp.dsl as dsl
from kfp.dsl import compiler


## Environment setup

In [None]:
EXPERIMENT_NAME = 'Der-Experiment-1'

AWS_S3_BUCKET = get_secret('AWS_S3_BUCKET')

DOCKER_REGISTRY = get_secret('DOCKER_REGISTRY')
DOCKER_REGISTRY_SECRET = get_secret('DOCKER_REGISTRY_SECRET')
DOCKER_IMAGE = 'library/experiment'
DOCKER_TAG = 'latest'

Create API clients for pipelines and object storage

In [None]:
client = kfp.Client()
try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)

# Build an experiment related docker image

Below we generate a dockerfile that will be used to put some `minio` awarness:
- `Dockerfile` - a docker container that will be built and pushed into private docker egistry
- `Kaniko` - deploument job to carry on our container build

In [None]:
%%template Dockerfile
FROM gcr.io/google-samples/ml-pipeline-t2ttrain:latest
RUN echo 'hello, bucket {{AWS_S3_BUCKET}}!'

In [None]:
import extensions
from extensions.kubeflow import KanikoOp

@dsl.pipeline(
  name='Build',
  description='Kaniko docker ubild operations'
)
def kaniko():
    KanikoOp(
        name='kaniko',
        image='gcr.io/kaniko-project/executor:latest',
        destination=f"{DOCKER_REGISTRY}/{DOCKER_IMAGE}:{DOCKER_TAG}",
        package=f"s3://{AWS_S3_BUCKET}/{EXPERIMENT_NAME}/dockerbuild.tar.gz",
    ).add_pull_secret(
        secret_name=DOCKER_REGISTRY_SECRET
    ).add_aws_secret(
        secret_name='kaniko-awscreds',
        session=boto3.session.Session()
    )

Compiler().compile(kaniko, 'kaniko.tar.gz')

In [None]:
r = client.run_pipeline(experiment.id, f'build {DOCKER_IMAGE}', 'kaniko.tar.gz')

# uncomment this to block untill completion
# client.wait_for_run_completion(r.id, timeout=400).run.status

Define a pipeline that will build and compile a docker container

Generated files must be uploaded to object storage bucket (i.e s3, minio). Docker build process (Kaniko) will have to access to s3 bucket

In [None]:
# block till completion
client.wait_for_run_completion(r.id, timeout=400).run.status

# Define an Experiment Pipeline

In [None]:
@dsl.pipeline(
  name='Super simple minio integration',
  description='I as a pipeline want to read a file from minio bucket'
)
def hello_minio_pipeline(filename: dsl.PipelineParam):
    op1 = dsl.ContainerOp(
        name='download',
        image=IMAGE_NAME,
        command=['mc', '--no-color'],
        arguments=['cp', f'minio/{filename}', '/tmp/results.txt'],
        file_outputs={'downloaded': '/tmp/results.txt'}
    ).add_env_variable(
        k8sc.V1EnvVar(
            name='MC_HOSTS_minio', 
            value=f'https://{S3_ACCESS_KEY}:{S3_SECRET_KEY}@{S3_ENDPOINT}' 
        ))
    op2 = dsl.ContainerOp(
        name='echo',
        image='library/bash:4.4.23',
        command=['sh', '-c'],
        arguments=[f'echo {op1.output}'])

### Execute the pipeline

Code below will create a new experiement **Hello Minio** and run it