## Building Docker Artifacts

In [1]:
cd farmer

/home/jupyter/kubeflow-sdk-demo/farmer


In [2]:
!docker login -u username -p password

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [4]:
%%writefile config.py
gs_bucket_name="kubeflow-depankar-demo"
Bucket_uri="gs://kubeflow-depankar-demo"
version=1
store_artifacts=Bucket_uri + "/" + str(version)
data_path=Bucket_uri + "/" + "data/data_raw.csv"
processed_data=Bucket_uri + "/" + "processed/data_processed.csv"

Overwriting config.py


In [54]:
%%writefile Dockerfile
FROM tensorflow/tensorflow
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
RUN apt-get update && apt-get install -y \
    git \
    gpg-agent \
    python3-cairocffi \
    protobuf-compiler \
    python3-pil \
    python3-lxml \
    python3-tk \
    wget
# Install gcloud and gsutil commands
# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
#RUN apt-get update && apt-get install -y lsb-release && apt-get clean all
RUN echo "deb https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
    apt-get update -y && apt-get install google-cloud-sdk -y
WORKDIR /pipeline
COPY ./ ./
#RUN pip install -r requirements.txt
RUN /usr/bin/python3 -m pip install --upgrade pip
RUN pip install google-cloud-storage
RUN pip install google-cloud
RUN pip install gcsfs
RUN pip install dask[dataframe]
RUN pip install google-api-python-client
RUN pip install matplotlib
RUN pip install seaborn
RUN pip install pandas
RUN pip install sklearn
RUN pip install wget
#RUN pip install dvc
RUN pip install "dask[dataframe]" --upgrade
ENV TF_CPP_MIN_LOG_LEVEL 3

Overwriting Dockerfile


## Build Docker Image

In [55]:
!docker build -t depankars/kubeflow-sdk-farmer .

Sending build context to Docker daemon  27.65kB
Step 1/19 : FROM tensorflow/tensorflow
 ---> b51f642475ab
Step 2/19 : ARG DEBIAN_FRONTEND=noninteractive
 ---> Using cache
 ---> e2b9630a5545
Step 3/19 : RUN apt-get update && apt-get install -y     git     gpg-agent     python3-cairocffi     protobuf-compiler     python3-pil     python3-lxml     python3-tk     wget
 ---> Using cache
 ---> 9f6f79a076a4
Step 4/19 : RUN echo "deb https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - &&     apt-get update -y && apt-get install google-cloud-sdk -y
 ---> Using cache
 ---> 16aa13e5b9f8
Step 5/19 : WORKDIR /pipeline
 ---> Using cache
 ---> 145dfdc47e6f
Step 6/19 : COPY ./ ./
 ---> e87278efb138
Step 7/19 : RUN /usr/bin/python3 -m pip install --upgrade pip
 ---> Running in fced597bbe55
Collecting pip
  Downloading pip-21.3.1-py3-none-any.whl (1.7 MB)
Installing c

## Push Docker Image to Docker hub

In [56]:
!docker push depankars/kubeflow-sdk-farmer

Using default tag: latest
The push refers to repository [docker.io/depankars/kubeflow-sdk-farmer]

[1B231dfd28: Preparing 
[1Bd9c2d84f: Preparing 
[1B899e1d08: Preparing 
[1Be6ca7c94: Preparing 
[1B5015742d: Preparing 
[1Bdfbd6969: Preparing 
[1B2ddf8d1a: Preparing 
[1Ba8013090: Preparing 
[1B3f5ad6cc: Preparing 
[1B4a2dc57b: Preparing 
[1B976a1b86: Preparing 
[1B77227b48: Preparing 
[1B03a8310a: Preparing 
[1Bef00e0cf: Preparing 
[1B7bcdea39: Preparing 
[1B984157d1: Preparing 
[1B8d4da588: Preparing 
[1Bf6d04422: Preparing 
[1B45155a19: Preparing 
[1Bb1a4d7c3: Preparing 
[1Baf4dda95: Preparing 
[1B6741f73b: Preparing 
[1B2698ea9f: Preparing 
[10Bbcdea39: Pushed   611.5MB/585.5MBtensorflow 20A[2K[22A[2K[22A[2K[20A[2K[22A[2K[22A[2K[22A[2K[22A[2K[22A[2K[20A[2K[20A[2K[24A[2K[20A[2K[20A[2K[20A[2K[20A[2K[20A[2K[22A[2K[19A[2K[20A[2K[17A[2K[19A[2K[18A[2K[17A[2K[19A[2K[22A[2K[20A[2K[17A[2K[18A[2K[19A[2K[22A[2K

## Install Kubeflow SDk

In [8]:
!pip install kfp

Collecting kfp
  Downloading kfp-1.8.10.tar.gz (298 kB)
     |████████████████████████████████| 298 kB 6.7 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting absl-py<=0.11,>=0.9
  Downloading absl_py-0.11.0-py3-none-any.whl (127 kB)
     |████████████████████████████████| 127 kB 83.8 MB/s            
[?25hCollecting PyYAML<6,>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
     |████████████████████████████████| 636 kB 82.5 MB/s            
Collecting kubernetes<19,>=8.0.0
  Downloading kubernetes-18.20.0-py2.py3-none-any.whl (1.6 MB)
     |████████████████████████████████| 1.6 MB 87.9 MB/s            
[?25hCollecting google-api-python-client<2,>=1.7.8
  Downloading google_api_python_client-1.12.8-py2.py3-none-any.whl (61 kB)
     |████████████████████████████████| 61 kB 38 kB/s              
[?25hCollecting google-auth<2,>=1.6.1
  Downloading google_auth-1.35.0-py2.py3-none-any.whl (152 kB)
     |████████████████████████

# Building Kubeflow Pipeline

In [11]:
import kfp
import kfp.components as comp
from kubernetes.client.models import V1EnvVar

## Define kubeflow pipeline Component

In [57]:
@kfp.dsl.component
def get_data():
    # Defining component configuration
    getdata_component = kfp.dsl.ContainerOp(
        name='Data-Preparation',
        image='docker.io/depankars/kubeflow-sdk-farmer',
        command=['python', 'get_data.py'],
        )
    return getdata_component

In [58]:
@kfp.dsl.component
def data_preprocessing():    
    # Defining component configuration
    data_preprocessing = kfp.dsl.ContainerOp(
        name='data-preprocessing',
        image='docker.io/depankars/kubeflow-sdk-farmer',
        command=['python', 'process_data.py'],
        )
    return data_preprocessing

In [59]:
@kfp.dsl.component
def training():
    # Defining component configuration
    training_component = kfp.dsl.ContainerOp(
        name='training',
        image='docker.io/depankars/kubeflow-sdk-farmer',
        command=['python', 'train.py'],
        file_outputs={'mlpipeline-ui-metadata':'/mlpipeline-ui-metadata.json', "mlpipeline-metrics":'/mlpipeline-metrics.json'}
        )
    
    return training_component

In [60]:
# Let see output of component configuration
debug = True
if debug :
    training_component_vis = training()
    print(training_component_vis)

{'ContainerOp': {'is_exit_handler': False, 'human_name': 'training', 'display_name': None, 'name': 'training 800007fa4dc6f649', 'node_selector': {}, 'volumes': [], 'tolerations': [], 'affinity': {}, 'pod_annotations': {}, 'pod_labels': {}, 'num_retries': 0, 'retry_policy': None, 'backoff_factor': None, 'backoff_duration': None, 'backoff_max_duration': None, 'timeout': 0, 'init_containers': [], 'sidecars': [], 'loop_args': None, '_component_spec_inputs_with_pipeline_params': [], '_inputs': [], 'dependent_names': [], 'enable_caching': True, 'attrs_with_pipelineparams': ['node_selector', 'volumes', 'pod_annotations', 'pod_labels', 'num_retries', 'init_containers', 'sidecars', 'tolerations', '_container', 'artifact_arguments', '_parameter_arguments'], '_is_v2': False, '_container': {'args': None,
 'command': ['python', 'train.py'],
 'env': None,
 'env_from': None,
 'image': 'docker.io/depankars/kubeflow-sdk-farmer',
 'image_pull_policy': None,
 'lifecycle': None,
 'liveness_probe': None,
 

## Final Kubeflow pipeline Definition

In [61]:
@kfp.dsl.pipeline(
  name="Modeling Swiss farmer's attitudes about",
  description="Modeling Swiss farmer's attitudes about"
)
def farmer():
    download_data = get_data()
    download_data.execution_options.caching_strategy.max_cache_staleness = "P0D"
    data_processing = data_preprocessing().after(download_data)
    data_processing.execution_options.caching_strategy.max_cache_staleness = "P0D"
    train = training().after(data_processing)
    train.execution_options.caching_strategy.max_cache_staleness = "P0D"


In [62]:
# Let see output of pipeline configuration
debug = True
if debug :
    training_pipeline_output = farmer()
    print(training_pipeline_output)

None


## Compile Kubeflow Pipeline 
* It will Generate .zip file inside this contain YAMl file which contain the configuration of kubeflow pipeline

In [63]:
kfp.compiler.Compiler().compile(farmer, 'farmer-kubeflow-sdk-demo.zip')

## Connect to deployed kubeflow pipeline Endpoint (GCP)

In [64]:
# Create kfp client
# Note: Add the KubeFlow Pipeline endpoint below if the client is not running on the same cluster.
client = kfp.Client("https://4cc472dcfd645f-dot-us-central1.pipelines.googleusercontent.com")

### Create Experiment 

In [65]:
EXPERIMENT_NAME = 'farmer-3'
experiment = client.create_experiment(name=EXPERIMENT_NAME)

### Deploy pipeline to kubeflow pipeline Endpoint

In [66]:
run = client.run_pipeline(experiment.id, 'farmer-run-1', 'farmer-kubeflow-sdk-demo.zip')