In [None]:
##### Copyright 2020 Google LLC.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Visualize the MetaLearning pipeline built on top NitroML. 
# We are using NitroML on Kubeflow: 

This notebook allows users to analyze NitroML metalearning pipelines results.

In [None]:
# Step 1: Configure your cluster with gcloud
# `gcloud container clusters get-credentials <cluster_name> --zone <cluster-zone> --project <project-id>

# Step 2: Get the port where the gRPC service is running on the cluster
# `kubectl get configmap metadata-grpc-configmap -o jsonpath={.data}`
# Use `METADATA_GRPC_SERVICE_PORT` in the next step. The default port used is 8080.

# Step 3: Port forwarding
# `kubectl port-forward deployment/metadata-grpc-deployment 9898:<METADATA_GRPC_SERVICE_PORT>`

# Troubleshooting
# If getting error related to Metadata (For examples, Transaction already open). Try restarting the metadata-grpc-service using:
# `kubectl rollout restart deployment metadata-grpc-deployment`  

In [None]:
import sys, os
PROJECT_DIR=os.path.join(sys.path[0], '..')
%cd {PROJECT_DIR}

In [None]:
import json

from examples import config as cloud_config
import examples.tuner_data_utils as tuner_utils
from ml_metadata.proto import metadata_store_pb2
from ml_metadata.metadata_store import metadata_store
from nitroml.benchmark import results
import seaborn as sns
import tensorflow as tf
import qgrid

sns.set()

## Connect to the ML Metadata (MLMD) database

First we need to connect to our MLMD database which stores the results of our
benchmark runs.

In [None]:
connection_config = metadata_store_pb2.MetadataStoreClientConfig()

connection_config.host = 'localhost'
connection_config.port = 9898

store = metadata_store.MetadataStore(connection_config)

### Get trial summary data (used to plot Area under Learning Curve) stored as AugmentedTuner artifacts.

In [None]:
# Name of the dataset/subbenchmark
# This is used to filter out the component path.
testdata = 'ilpd' 

In [None]:
def get_metalearning_data(meta_algorithm: str = '', test_dataset: str = '', multiple_runs: bool = True):
    
    d_list = []
    execs = store.get_executions_by_type('nitroml.automl.metalearning.tuner.component.AugmentedTuner')
    model_dir_map = {}
    for tuner_exec in execs:

        run_id = tuner_exec.properties['run_id'].string_value
        pipeline_root = tuner_exec.properties['pipeline_root'].string_value
        component_id = tuner_exec.properties['component_id'].string_value
        pipeline_name = tuner_exec.properties['pipeline_name'].string_value
        
        if multiple_runs:
            if '.run_' not in component_id:
                continue
                
        if test_dataset not in component_id:
            continue
            
        if f'metalearning_benchmark' != pipeline_name and meta_algorithm not in pipeline_name:
            continue

        config_path = os.path.join(pipeline_root, component_id, 'trial_summary_plot', str(tuner_exec.id))
        model_dir_map[tuner_exec.id] = config_path
        d_list.append(config_path)
        
    return d_list

In [None]:
# Specify the path to tuner_dir from above
# You can get the list of tuner_dirs by calling: get_metalearning_data(multiple_runs=False)
example_plot = ''
if not example_plot:
    raise ValueError('Please specify the path to the tuner plot dir.')
    
with tf.io.gfile.GFile(os.path.join(example_plot, 'tuner_plot_data.txt'), mode='r') as fin:
    data = json.load(fin)
    
tuner_utils.display_tuner_data(data, save_plot=False)

## Majority Voting

In [None]:
algorithm = 'majority_voting' 
d_list = get_metalearning_data(algorithm, testdata)

d_list

In [None]:
# Select the runs from `d_list` to visualize. 

In [None]:
data_list = []

for d in d_list:
    with tf.io.gfile.GFile(os.path.join(d, 'tuner_plot_data.txt'), mode='r') as fin:
        data_list.append(json.load(fin))

tuner_utils.display_tuner_data_with_error_bars(data_list, save_plot=True)

## Nearest Neighbor

In [None]:
algorithm = 'nearest_neighbor' 
d_list = get_metalearning_data(algorithm, testdata)

d_list

In [None]:
# Select the runs from `d_list` to visualize. 

In [None]:
data_list = []

for d in d_list:
    with tf.io.gfile.GFile(os.path.join(d, 'tuner_plot_data.txt'), mode='r') as fin:
        data_list.append(json.load(fin))

tuner_utils.display_tuner_data_with_error_bars(data_list, save_plot=True)