In [None]:
##### Copyright 2020 Google LLC.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Demo for NitroML on Cloud using KubeFlow 

## Step 1: Get `kfp` and `skaffold`. 

In [None]:
import sys

# install kfp (https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.html)
!{sys.executable} -m pip install --user --upgrade -q kfp==0.5.1

# Download skaffold and set it executable.
# !curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-amd64 && chmod +x skaffold && mv skaffold /home/jupyter/.local/bin/
    
# Set `PATH` to include user python binary directory and a directory containing `skaffold`.
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

## Step 2: Check and install  tfx (if necessary)
#### If TFX is not installed, uncomment the pip install command below. We have tested this example with `tfx==0.22.0`

In [None]:
# !{sys.executable} -m pip install --user --upgrade -q tfx==0.22.0
# !{sys.executable} -m pip install --user --upgrade -q tensorflow_datasets==3.1.0
!python3 -c "import tfx; print('TFX version: {}'.format(tfx.__version__)); import tensorflow_datasets as tfds; print('TFDS version: {}'.format(tfds.__version__))"

## Step 3: Get the GCP project ID and create Docker image name

In [None]:
# Read GCP project id from env.
shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
GCP_PROJECT_ID=shell_output[0]
print("GCP project ID:" + GCP_PROJECT_ID)

In [None]:
# Docker image name for the pipeline image 
# IMAGE_NAME = 'nitroml_benchmark4'
IMAGE_NAME = 'nitroml_tfx_0130.dev'
CUSTOM_TFX_IMAGE='gcr.io/' + GCP_PROJECT_ID + '/' + IMAGE_NAME

## Step 4: Set KFP Cluster End point

In [None]:
# This refers to the KFP cluster endpoint
# To find your endpoint, go to: Google_Project_Console -> AI_PLATFORMS -> PIPELINES. 
# Then for the cluster you want to run your pipeline on, click on the "Open Pipeline Dashboard". Copy the url "*.googleusercontent.com". This is your ENDPOINT var.

# ENDPOINT='40acc30b0dc82d1d-dot-us-east1.pipelines.googleusercontent.com' # cluster 4
ENDPOINT='70a793405e3e430c-dot-us-east1.pipelines.googleusercontent.com' # cluster 1

if not ENDPOINT:
    from absl import logging
    logging.error('Set your ENDPOINT in this cell.')

In [None]:
import sys, os
PROJECT_DIR=os.path.join(sys.path[0], '..')
%cd {PROJECT_DIR}

In [None]:
from examples import config
PIPELINE_NAME=config.PIPELINE_NAME

In [None]:
PIPELINE_NAME

## Step 5: Create the tfx pipeline

In [None]:
_OPENML_API_KEY = 'OPENML_API_KEY'

os.environ[_OPENML_API_KEY] = 'b1514bb2761ecc4709ab26db50673a41'
os.getenv(_OPENML_API_KEY, '')

In [None]:
example = 'metalearning'

if example == 'titanic':
    pipeline_path = 'examples/titanic_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_titanic'
elif example == 'openml_cc18':
    pipeline_path = 'examples/openml_cc18_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_openML'
elif example == 'demo':
    pipeline_path = 'examples/demo.py'
    pipeline_name = f'{PIPELINE_NAME}_demo'
elif example == 'metalearning':
    pipeline_path = 'examples/meta_learning_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_metalearning'
    

In [None]:
TFX_IMAGE=config.TFX_IMAGE

In [None]:
CUSTOM_TFX_IMAGE

In [None]:
!tfx pipeline create  \
--pipeline-path={pipeline_path} \
--endpoint={ENDPOINT} \
--build-target-image={CUSTOM_TFX_IMAGE} \
--build-base-image={TFX_IMAGE} \
--engine='kubeflow'

## Step 6: Run the created tfx pipeline

## Step 7 (Optional): If the pipeline src is updated, we will have to update the pipeline at endpoint. The following block updates the pipeline and runs it.

In [None]:
# If we update the pipeline
!tfx pipeline update \
--pipeline-path={pipeline_path} \
--endpoint={ENDPOINT} \
--engine='kubeflow'

In [None]:
print (pipeline_name)

In [None]:
!tfx run create --pipeline-name={pipeline_name}  --endpoint={ENDPOINT} --engine='kubeflow'

In [None]:
# !kfp --endpoint {ENDPOINT} --namespace kubeflow diagnose_me