## Building Docker Artifacts

In [1]:
cd farmer

/home/jupyter/kubeflow-sdk-demo/farmer


In [2]:
import getpass

In [10]:
username = getpass.getpass('username')
password = getpass.getpass('password')


username ········
password ········


In [15]:
username

'apoorv01gupta'

In [16]:
!docker login -u apoorv01gupta -p Apoorv123#

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [None]:
%%writefile config.py
gs_bucket_name="data-labeling-demo"
Bucket_uri="gs://data-labeling-demo"
version=1
store_artifacts=Bucket_uri + "/" + str(version)
data_path=Bucket_uri + "/" + "data/data_raw.csv"
processed_data=Bucket_uri + "/" + "processed/data_processed.csv"

In [17]:
%%writefile Dockerfile
FROM tensorflow/tensorflow:2.2.0-gpu
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies


RUN apt-key del 3bf863cc
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN apt-get update && apt-get install -y \
    git \
    gpg-agent \
    python3-cairocffi \
    protobuf-compiler \
    python3-pil \
    python3-lxml \
    python3-tk \
    wget
# Install gcloud and gsutil commands
# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
    echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
    apt-get update -y && apt-get install google-cloud-sdk -y
WORKDIR /pipeline
COPY ./ ./
RUN pip install -r requirements.txt
RUN pip install "dask[dataframe]" --upgrade
ENV TF_CPP_MIN_LOG_LEVEL 3

Writing Dockerfile


## Build Docker Image

In [20]:
!docker build -t apoorv01gupta/farmer_docker_image .

Sending build context to Docker daemon  15.36kB
Step 1/11 : FROM tensorflow/tensorflow:2.2.0-gpu
 ---> f5ba7a196d56
Step 2/11 : ARG DEBIAN_FRONTEND=noninteractive
 ---> Using cache
 ---> 0a700e73df75
Step 3/11 : RUN apt-key del 3bf863cc
 ---> Using cache
 ---> cb31fb30a7f6
Step 4/11 : RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
 ---> Using cache
 ---> dc8d279da1a7
Step 5/11 : RUN apt-get update && apt-get install -y     git     gpg-agent     python3-cairocffi     protobuf-compiler     python3-pil     python3-lxml     python3-tk     wget
 ---> Using cache
 ---> 157368107bc1
Step 6/11 : RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" &&     echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - &&     apt-get update -y && apt-get install google-cloud-sdk -y

## Push Docker Image to Docker hub

In [21]:
!docker push apoorv01gupta/farmer_docker_image

Using default tag: latest
The push refers to repository [docker.io/apoorv01gupta/farmer_docker_image]

[1B520da984: Preparing 
[1B31cde575: Preparing 
[1B9176f7b7: Preparing 
[1B1b2dd4c6: Preparing 
[1B3b38691e: Preparing 
[1B6a826791: Preparing 
[1B42fe40dd: Preparing 
[1B6c3dfb90: Preparing 
[1Be55f84c6: Preparing 
[1Bb0f92c14: Preparing 
[1Bcf4cd527: Preparing 
[1Bc1f74e01: Preparing 
[1B9e4b0fc9: Preparing 
[1Be3b79e0a: Preparing 
[1Be43735a0: Preparing 
[1B3918ca41: Preparing 
[1B768f66a4: Preparing 
[1Bd332a58a: Preparing 
[1Bf11cbf29: Preparing 
[1Ba4b22186: Preparing 
[1Bafb09dc3: Preparing 
[17Ba826791: Waiting g 
[17B2fe40dd: Waiting g 
[20Bb38691e: Pushed   977.5MB/944.2MBtensorflow 20A[2K[20A[2K[20A[2K[23A[2K[20A[2K[20A[2K[23A[2K[23A[2K[23A[2K[20A[2K[23A[2K[20A[2K[23A[2K[20A[2K[23A[2K[23A[2K[20A[2K[24A[2K[23A[2K[20A[2K[23A[2K[18A[2K[23A[2K[20A[2K[23A[2K[20A[2K[19A[2K[20A[2K[23A[2K[20A[2K[23A

## Install Kubeflow SDk

In [49]:
!python3 -m pip install kfp


[0m

In [55]:
!python3 -m pip uninstall kfp.pipeline_spec

[0mFound existing installation: kfp-pipeline-spec 0.1.16
Uninstalling kfp-pipeline-spec-0.1.16:
  Would remove:
    /home/jupyter/.local/lib/python3.7/site-packages/kfp/pipeline_spec/*
    /home/jupyter/.local/lib/python3.7/site-packages/kfp_pipeline_spec-0.1.16.dist-info/*
Proceed (Y/n)? ^C
[31mERROR: Operation cancelled by user[0m[31m
[0m

# Building Kubeflow Pipeline

In [61]:
import kfp
import kfp.components as comp
from kubernetes.client.models import V1EnvVar

ModuleNotFoundError: No module named 'deprecated'

## Define kubeflow pipeline Component

In [None]:
@kfp.dsl.component
def get_data():
    # Defining component configuration
    getdata_component = kfp.dsl.ContainerOp(
        name='Data-Preparation',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'get_data.py'],
        )
    return getdata_component

In [None]:
@kfp.dsl.component
def data_preprocessing():    
    # Defining component configuration
    data_preprocessing = kfp.dsl.ContainerOp(
        name='data-preprocessing',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'process_data.py'],
        )
    return data_preprocessing

In [None]:
@kfp.dsl.component
def training():
    # Defining component configuration
    training_component = kfp.dsl.ContainerOp(
        name='training',
        image='docker.io/ankitaj86/kubeflow-sdk-farmer',
        command=['python', 'train.py'],
        file_outputs={'mlpipeline-ui-metadata':'/mlpipeline-ui-metadata.json', "mlpipeline-metrics":'/mlpipeline-metrics.json'}
        )
    
    return training_component

In [None]:
# Let see output of component configuration
debug = True
if debug :
    training_component_vis = training()
    print(training_component_vis)

## Final Kubeflow pipeline Definition

In [None]:
@kfp.dsl.pipeline(
  name="Modeling Swiss farmer's attitudes about",
  description="Modeling Swiss farmer's attitudes about"
)
def farmer():
    download_data = get_data()
    download_data.execution_options.caching_strategy.max_cache_staleness = "P0D"
    data_processing = data_preprocessing().after(download_data)
    data_processing.execution_options.caching_strategy.max_cache_staleness = "P0D"
    train = training().after(data_processing)
    train.execution_options.caching_strategy.max_cache_staleness = "P0D"


In [None]:
# Let see output of pipeline configuration
debug = True
if debug :
    training_pipeline_output = farmer()
    print(training_pipeline_output)

## Compile Kubeflow Pipeline 
* It will Generate .zip file inside this contain YAMl file which contain the configuration of kubeflow pipeline

In [None]:
kfp.compiler.Compiler().compile(farmer, 'farmer-kubeflow-sdk-demo.zip')

## Connect to deployed kubeflow pipeline Endpoint (GCP)

In [None]:
# Create kfp client
# Note: Add the KubeFlow Pipeline endpoint below if the client is not running on the same cluster.
client = kfp.Client("57f227181c3ac171-dot-us-central1.pipelines.googleusercontent.com")

### Create Experiment 

In [None]:
EXPERIMENT_NAME = 'farmer-1'
experiment = client.create_experiment(name=EXPERIMENT_NAME)

### Deploy pipeline to kubeflow pipeline Endpoint

In [None]:
run = client.run_pipeline(experiment.id, 'farmer-run-3', 'farmer-kubeflow-sdk-demo.zip')