In [None]:
##### Copyright 2020 Google LLC.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Demo for NitroML on Cloud using KubeFlow 

## Step 1: Get `kfp` and `skaffold`. 

In [2]:
import sys

# install kfp (https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.html)
!{sys.executable} -m pip install --user --upgrade -q kfp==0.5.1

# Download skaffold and set it executable.
# !curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-amd64 && chmod +x skaffold && mv skaffold /home/jupyter/.local/bin/
    
# Set `PATH` to include user python binary directory and a directory containing `skaffold`.
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

env: PATH=/usr/local/cuda/bin:/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyter/.local/bin


## Step 2: Check and install  tfx (if necessary)
#### If TFX is not installed, uncomment the pip install command below. We have tested this example with `tfx==0.22.0`

In [3]:
# !{sys.executable} -m pip install --user --upgrade -q tfx==0.22.0
# !{sys.executable} -m pip install --user --upgrade -q tensorflow_datasets==3.1.0
!python3 -c "import tfx; print('TFX version: {}'.format(tfx.__version__)); import tensorflow_datasets as tfds; print('TFDS version: {}'.format(tfds.__version__))"

TFX version: 0.23.0.dev
TFDS version: 3.1.0


## Step 3: Get the GCP project ID and create Docker image name

In [4]:
# Read GCP project id from env.
shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
GCP_PROJECT_ID=shell_output[0]
print("GCP project ID:" + GCP_PROJECT_ID)

GCP project ID:nitroml-brain-xgcp


In [5]:
# Docker image name for the pipeline image 
# IMAGE_NAME = 'nitroml_benchmark4'
IMAGE_NAME = 'nitroml_tfx_0130.dev'
CUSTOM_TFX_IMAGE='gcr.io/' + GCP_PROJECT_ID + '/' + IMAGE_NAME

## Step 4: Set KFP Cluster End point

In [6]:
# This refers to the KFP cluster endpoint
# To find your endpoint, go to: Google_Project_Console -> AI_PLATFORMS -> PIPELINES. 
# Then for the cluster you want to run your pipeline on, click on the "Open Pipeline Dashboard". Copy the url "*.googleusercontent.com". This is your ENDPOINT var.

# ENDPOINT='40acc30b0dc82d1d-dot-us-east1.pipelines.googleusercontent.com' # cluster 4
ENDPOINT='70a793405e3e430c-dot-us-east1.pipelines.googleusercontent.com' # cluster 1

if not ENDPOINT:
    from absl import logging
    logging.error('Set your ENDPOINT in this cell.')

In [7]:
import sys, os
PROJECT_DIR=os.path.join(sys.path[0], '..')
%cd {PROJECT_DIR}

/home/jupyter/AIHub/nitroml


In [8]:
from examples import config
PIPELINE_NAME=config.PIPELINE_NAME

In [9]:
PIPELINE_NAME

'examples'

## Step 5: Create the tfx pipeline

In [10]:
_OPENML_API_KEY = 'OPENML_API_KEY'

os.environ[_OPENML_API_KEY] = 'b1514bb2761ecc4709ab26db50673a41'
os.getenv(_OPENML_API_KEY, '')

'b1514bb2761ecc4709ab26db50673a41'

In [22]:
# example = 'metalearning'
example = 'metalearning'
if example == 'titanic':
    pipeline_path = 'examples/titanic_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_titanic'
elif example == 'openml_cc18':
    pipeline_path = 'examples/openml_cc18_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_openML'
elif example == 'demo':
    pipeline_path = 'examples/demo.py'
    pipeline_name = f'{PIPELINE_NAME}_demo'
elif example == 'metalearning':
    pipeline_path = 'examples/meta_learning_benchmark.py'
    pipeline_name = f'{PIPELINE_NAME}_metalearning'
    

In [23]:
TFX_IMAGE=config.TFX_IMAGE

In [24]:
CUSTOM_TFX_IMAGE

'gcr.io/nitroml-brain-xgcp/nitroml_tfx_0130.dev'

In [25]:
!tfx pipeline create  \
--pipeline-path={pipeline_path} \
--endpoint={ENDPOINT} \
--build-target-image={CUSTOM_TFX_IMAGE} \
--build-base-image={TFX_IMAGE} \
--engine='kubeflow'

CLI
Creating pipeline
Reading build spec from build.yaml
Target image gcr.io/nitroml-brain-xgcp/nitroml_tfx_0130.dev is not used. If the build spec is provided, update the target image in the build spec file build.yaml.
Use skaffold to build the container image.
/home/jupyter/.local/bin/skaffold
New container image is built. Target image is available in the build spec file.
I0724 23:38:02.896994 139690520397184 openml_cc18.py:75] The directory gs://artifacts.nitroml-brain-xgcp.appspot.com/other-datasets/openML_datasets exists. 72 datasets found
I0724 23:38:13.718438 139690520397184 meta_learning_benchmark.py:82] Train: connect4
I0724 23:38:13.721756 139690520397184 meta_learning_benchmark.py:82] Train: creditapproval
I0724 23:38:13.722826 139690520397184 meta_learning_benchmark.py:82] Train: creditg
I0724 23:38:13.723702 139690520397184 meta_learning_benchmark.py:82] Train: cylinderbands
I0724 23:38:13.724680 139690520397184 meta_learning_benchmark.py:82] Train: diabetes
I0724 23:38:13

## Step 6: Run the created tfx pipeline

## Step 7 (Optional): If the pipeline src is updated, we will have to update the pipeline at endpoint. The following block updates the pipeline and runs it.

In [20]:
# If we update the pipeline
!tfx pipeline update \
--pipeline-path={pipeline_path} \
--endpoint={ENDPOINT} \
--engine='kubeflow'

CLI
Updating pipeline
Reading build spec from build.yaml
Use skaffold to build the container image.
/home/jupyter/.local/bin/skaffold
New container image is built. Target image is available in the build spec file.
I0724 23:31:50.142108 140474966017408 dataset_info.py:361] Load dataset info from gs://artifacts.nitroml-brain-xgcp.appspot.com/tensorflow-datasets/titanic/2.0.0
I0724 23:31:50.853109 140474966017408 dataset_info.py:401] Field info.citation from disk and from code do not match. Keeping the one from code.
I0724 23:31:50.856588 140474966017408 tfds_task.py:47] Preparing dataset...
I0724 23:31:50.888455 140474966017408 dataset_builder.py:282] Reusing dataset titanic (gs://artifacts.nitroml-brain-xgcp.appspot.com/tensorflow-datasets/titanic/2.0.0)
I0724 23:31:50.888761 140474966017408 tfds_task.py:49] tfds.core.DatasetInfo(
    name='titanic',
    version=2.0.0,
    description='Dataset describing the survival status of individual passengers on the Titanic. Missing values in the 

In [19]:
print (pipeline_name)

examples_titanic


In [26]:
!tfx run create --pipeline-name={pipeline_name}  --endpoint={ENDPOINT} --engine='kubeflow'

CLI
Creating a run for pipeline: examples_metalearning
Run created for pipeline: examples_metalearning
+-----------------------+--------------------------------------+----------+---------------------------+--------------------------------------------------------------------------------------------------------------------------+
| pipeline_name         | run_id                               | status   | created_at                | link                                                                                                                     |
| examples_metalearning | ea69c8f7-45cb-444c-a725-696094145821 |          | 2020-07-24T23:40:36+00:00 | http://70a793405e3e430c-dot-us-east1.pipelines.googleusercontent.com/#/runs/details/ea69c8f7-45cb-444c-a725-696094145821 |
+-----------------------+--------------------------------------+----------+---------------------------+-----------------------------------------------------------------------------------------------------------------

In [28]:
# !kfp --endpoint {ENDPOINT} --namespace kubeflow diagnose_me

In [29]:
import tensorflow_datasets