In [None]:
# model_test_pipeline.ipynb
import os
import datetime
import tensorflow as tf    
import argparse
from tensorflow.python.keras.callbacks import Callback


class MyModel(object):
    def __init__(self):
        self.model_path = None
        self.model = None
        
    def get_model_path(self):
        return self.model_path
    
    def get_model(self):
        return self.model
    
    def train(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('--node_amount', required=False, type=int, default=128)
        parser.add_argument('--epoch', required=False, type=int, default=10)
        parser.add_argument('--dropout_rate', required=False, type=float, default=0.2)
        parser.add_argument('--optimizer', required=False, type=str, default="sgd")
        if os.getenv('FAIRING_RUNTIME', None) is None:        
            args = parser.parse_args(args=[])
        else:            
            args = parser.parse_args()
        
        mnist = tf.keras.datasets.fashion_mnist
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)
        print("x_test shape:", x_test.shape, "y_test shape:", y_test.shape)

        x_train, x_test = x_train / 255.0, x_test / 255.0

        self.model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(args.node_amount, activation='relu'),
            tf.keras.layers.Dropout(args.dropout_rate),
            tf.keras.layers.Dense(10, activation='softmax')
        ])

        self.model.compile(optimizer=args.optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['acc'])


        date_folder = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 
        if os.getenv('FAIRING_RUNTIME', None) is None:
            log_dir = "log/fit/" + date_folder
        else:
            log_dir = "/notebook/log/fit/" + date_folder 

        print(f"tensorboard log dir : {log_dir}")

        tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                        histogram_freq=1)
        print(f"Total epochs {args.epoch}")
        hist = self.model.fit(x_train, y_train,
                              verbose=0,
                              validation_data=(x_test, y_test),
                              epochs=args.epoch,
                              callbacks=[LoggingTrain(),
                                         tensorboard_cb])
        model_ver = get_strftime('%Y%m%d%H%M%S') # timestamp 형식 변경 (문자 제거)
        model_val_acc = int(float(hist.history['val_acc'][-1]) * 100) # 소수점 제거
        self.model_path = f"model/{model_ver}.{model_val_acc}"
        self.model.save(self.model_path, save_format='tf')
        return self.model
    
def get_strftime(time_format):
    dt_now = datetime.datetime.now()
    return dt_now.strftime(time_format)        

def p(msg):
    dt_now = datetime.datetime.now()
    strftime = dt_now.strftime('%Y-%m-%dT%H:%M:%SZ')
    print(f"{strftime} {msg}", flush=True)    
    
class LoggingTrain(Callback):
    """logging for train
    """
    def on_batch_end(self, batch, logs={}):
        if batch % 100 == 0:
            p(f"batch: {batch}")
            p(f"accuracy={logs.get('acc')} loss={logs.get('loss')}")
            
    def on_epoch_begin(self, epoch, logs={}):
        p(f"epoch: {epoch}")

    def on_epoch_end(self, epoch, logs={}):
        p(f"Validation-accuracy={logs.get('val_acc')}")
        p(f"Validation-loss={logs.get('val_loss')}")
        return   

In [None]:
# model_test_pipeline.ipynb
my_model = MyModel()
model = my_model.train()

In [None]:
# model_test_pipeline.ipynb
import kfp
from kfp import dsl
from kfp.components import func_to_container_op, OutputPath
from typing import NamedTuple


def test_model_component(model_version: str) -> NamedTuple('Outputs', [
    ('mlpipeline_metrics', 'Metrics'),
]):
    import tensorflow as tf
    import numpy as np
    import json
    test_dataset = np.load("/notebook/new_dataset/test/new_test.npz")
    x_test = test_dataset['x_test']
    y_test = test_dataset['y_test']
    loaded_model = tf.keras.models.load_model(f"/notebook/model/{model_version}")
    score = loaded_model.evaluate(x_test, y_test, verbose=0)
    print(f"test-accuracy = {score[1]}, test-loss={score[0]}")

    metrics = {
        'metrics': [{
            'name': 'val-acc',
            'numberValue': float(round(score[1], 4)),
            'format': "PERCENTAGE",
        }, {
            'name': 'val-loss',
            'numberValue': float(round(score[0], 4)),
            'format': "RAW",
        }, {
            'name': 'model-version',
            'numberValue': float(model_version),
            'format': "RAW",
        }] 
    }
    
    from collections import namedtuple    
    result = namedtuple(
      'Outputs',
      ['mlpipeline_metrics'])    
    return result(json.dumps(metrics))


def test_model_pipeline(model_version: str):
    test_model_op = func_to_container_op(test_model_component,
                                     base_image="dudaji/cap-jupyterlab:tf2.0-cpu") 

    notebook_vol = dsl.PipelineVolume(pvc="workspace-handson")
    test_model = test_model_op(model_version) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})
    test_model.execution_options.caching_strategy.max_cache_staleness = "P0D"    
    


arguments = {'model_version': "20210812034159.85"}

client = kfp.Client()
client.create_run_from_pipeline_func(test_model_pipeline, 
                                     experiment_name="tutorial_model",
                                     arguments=arguments)

In [None]:
kfp.compiler.Compiler().compile(pipeline_func=test_model_pipeline,
                                package_path='test_model_pipeline.yaml')

client.upload_pipeline(pipeline_name="test-model-pipeline",
                       pipeline_package_path="test_model_pipeline.yaml")