In [None]:
# train_pipeline.ipynb

from typing import NamedTuple


def get_best_model(goal_acc: float = 0.50,
                   experiment_name: str = "tutorial_model") \
        -> NamedTuple('BestModel', [('version', float), ('acc', float)]):
    import kfp
    import os

    os.environ["KF_PIPELINES_ENDPOINT"] = "http://ml-pipeline.kubeflow.svc.cluster.local:8888"

    kfp_proxy_host = os.environ["KFP_PROXY_SERVICE_HOST"]
    kfp_proxy_port = os.environ["KFP_PROXY_SERVICE_PORT"]

    client = kfp.Client(proxy=f"http://{kfp_proxy_host}:{kfp_proxy_port}")
    #client = kfp.Client()

    experiment = client.get_experiment(experiment_name=experiment_name)
    list_runs = client.list_runs(experiment_id=experiment.id,
                                 page_size=0)
    runs = list_runs.runs

    best_model = {'path': 'None', 'acc': goal_acc}
    for run in runs:

        run_dict = run.to_dict()
        if run_dict['storage_state'] == 'STORAGESTATE_ARCHIVED':
            continue
        if run_dict['metrics'] is None:
            continue
        if run_dict['status'] != 'Succeeded':
            continue
        if run_dict['pipeline_spec']['parameters'] is None:
            continue     
   
        model = {'version': run_dict['metrics'][0]['number_value'],
                 'acc': run_dict['metrics'][1]['number_value']}
        if best_model['acc'] < model['acc']:
            best_model = model
    print(best_model)

    from collections import namedtuple
    result = namedtuple('BestModel', ['version', 'acc'])

    return result(best_model['version'], best_model['acc'])


In [None]:
def get_last_dataset(data_path: str) -> str:
    import fnmatch
    import numpy as np
    import os    
    files = fnmatch.filter(os.listdir(data_path), '*.npz')
    files.sort(key=lambda fn: os.path.getmtime(os.path.join(data_path, fn)))
    return os.path.join(data_path, files[1])

In [None]:
# train_pipeline.ipynb

from typing import NamedTuple


def train_from_best_model(model_version: float,
                          new_dataset_path: str) \
        -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics'), 
                                  ('model_version', float)]):
    import datetime
    import time
    import json
    import os
    import shutil
    from kubeflow import katib
    from kubeflow.katib import KatibClient

    from kubernetes.client import V1ObjectMeta
    from kubeflow.katib import V1beta1Experiment
    from kubeflow.katib import V1beta1AlgorithmSpec
    from kubeflow.katib import V1beta1ObjectiveSpec
    from kubeflow.katib import V1beta1FeasibleSpace
    from kubeflow.katib import V1beta1ExperimentSpec
    from kubeflow.katib import V1beta1ObjectiveSpec
    from kubeflow.katib import V1beta1ParameterSpec
    from kubeflow.katib import V1beta1TrialTemplate
    from kubeflow.katib import V1beta1TrialParameterSpec

    def find_metrics(_list, name, _type):
        return next(filter(lambda _list: _list['name'] == name, _list))[_type]
    
    def move_dataset(orig_path):
        target_dir = "/notebook/new_dataset/retrain"
        os.makedirs(target_dir, exist_ok=True)
        npz = os.path.basename(orig_path)
        shutil.copy(orig_path, f'{target_dir}/{npz}')

    kclient = KatibClient()
    kclient.list_experiments()
    date_postfix = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    experiment_name = f"my-model-retrain-{date_postfix}"
    objective_metric_name = "Validation-accuracy"

    metadata = V1ObjectMeta(
        name=experiment_name
    )

    algorithm_spec = V1beta1AlgorithmSpec(
        algorithm_name="random"
    )

    objective_spec = V1beta1ObjectiveSpec(
        type="maximize",
        goal=0.99,
        objective_metric_name=objective_metric_name,
        additional_metric_names=["Validation-loss", "accuracy", "loss"]
    )

    parameters = [
        V1beta1ParameterSpec(
            name="epoch",
            parameter_type="int",
            feasible_space=V1beta1FeasibleSpace(
                min="10",
                max="30"
            ),
        ),
    ]

    dt_now = datetime.datetime.now()
    save_version = dt_now.strftime('%Y%m%d%H%M%S')
    # JSON template specification for the Trial's Worker Kubernetes Job.
    trial_spec = {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "spec": {
            "template": {
                "metadata": {
                    "annotations": {
                        "sidecar.istio.io/inject": "false"
                    }
                },
                "spec": {
                    "containers": [
                        {
                            "name": "training-container",
                            "image": "yourID/fashion-mnist-retrain:handson", 
                            "imagePullPolicy": "Always",
                            "volumeMounts": [
                                {"name": "notebook", "mountPath": "/notebook"},
                            ],
                            "command": [
                                "python",
                                "/app/my_model_retrain.py",
                                "--node_amount=${trialParameters.epoch}",
                                f"--dataset_path={new_dataset_path}",
                                f"--model_path=/notebook/model/",
                                f"--train_version={model_version}",
                                f"--save_version={save_version}"
                            ]
                        }
                    ],
                    "restartPolicy": "Never",
                    "volumes": [
                        {"name": "notebook",
                         "persistentVolumeClaim": {"claimName": "workspace-handson"}
                         }
                    ]
                }
            }
        }
    }

    # Configure parameters for the Trial template.
    trial_template = V1beta1TrialTemplate(
        primary_container_name="training-container",
        trial_parameters=[
            V1beta1TrialParameterSpec(
                name="epoch",
                description="train epoch",
                reference="epoch"
            ),
        ],
        trial_spec=trial_spec
    )

    # Experiment object.
    experiment = V1beta1Experiment(
        api_version="kubeflow.org/v1beta1",
        kind="Experiment",
        metadata=metadata,
        spec=V1beta1ExperimentSpec(
            max_trial_count=3,
            parallel_trial_count=3,
            max_failed_trial_count=3,
            algorithm=algorithm_spec,
            objective=objective_spec,
            parameters=parameters,
            resume_policy=None,
            trial_template=trial_template,
        )
    )
    kclient.create_experiment(experiment)
    accuracy = 0.0
    while 1:
        time.sleep(60)
        status = kclient.get_experiment_status(experiment_name)
        result = kclient.get_optimal_hyperparameters(experiment_name)
        if status == "Running":
            print(f"{experiment_name} is running")
        if status == "Succeeded":
            if result:
                metrics = result['currentOptimalTrial']['observation']['metrics']
                val_acc = find_metrics(metrics, objective_metric_name, 'max')
            move_dataset(new_dataset_path)                
            break

    metrics = {
        'metrics': [{
            'name': 'val-acc',
            'numberValue': float(val_acc),
            'format': "PERCENTAGE",
        },{
            'name': 'model-version',
            'numberValue': float(save_version) + float(val_acc[:4]),
            'format': "RAW",
        }]
    }
    from collections import namedtuple    
    result = namedtuple('Outputs', ['mlpipeline_metrics', 'model_version'])    
    return result(json.dumps(metrics), model_version)
          

In [None]:
# train_pipeline.ipynb

from kfp import dsl
from kfp.components import func_to_container_op


def disable_cache(op):
    op.execution_options.caching_strategy.max_cache_staleness = "P0D"

def container_op(func):
    return func_to_container_op(func,
                                base_image="dudaji/cap-jupyterlab:tf2.0-cpu")

def train_pipeline(goal_acc: float = 0.50,
                   dataset_path: str = "/notebook/new_dataset/train"):
    notebook_vol = dsl.PipelineVolume(pvc="workspace-handson")

    model_op = container_op(get_best_model)
    dataset_op = container_op(get_last_dataset)
    train_op = container_op(train_from_best_model)

    model_comp = model_op(goal_acc) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})

    dataset_comp = dataset_op(dataset_path) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})

    train_comp = train_op(model_version=model_comp.outputs['version'],
                          new_dataset_path=dataset_comp.output) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})
                          
    disable_cache(dataset_comp)                          
    disable_cache(model_comp)                          
    disable_cache(train_comp)

In [None]:
# train_pipeline.ipynb

import kfp
from kfp import dsl
from kfp.components import func_to_container_op
from typing import NamedTuple


def get_best_model(goal_acc: float = 0.50,
                   experiment_name: str = "tutorial_model") \
        -> NamedTuple('BestModel', [('version', float), ('acc', float)]):
    import kfp
    import os

    os.environ["KF_PIPELINES_ENDPOINT"] = "http://ml-pipeline.kubeflow.svc.cluster.local:8888"

    kfp_proxy_host = os.environ["KFP_PROXY_SERVICE_HOST"]
    kfp_proxy_port = os.environ["KFP_PROXY_SERVICE_PORT"]

    client = kfp.Client(proxy=f"http://{kfp_proxy_host}:{kfp_proxy_port}")
    #client = kfp.Client()

    experiment = client.get_experiment(experiment_name=experiment_name)
    list_runs = client.list_runs(experiment_id=experiment.id,
                                 page_size=0)
    runs = list_runs.runs

    best_model = {'path': 'None', 'acc': goal_acc}
    for run in runs:

        run_dict = run.to_dict()
        if run_dict['storage_state'] == 'STORAGESTATE_ARCHIVED':
            continue
        if run_dict['metrics'] is None:
            continue
        if run_dict['status'] != 'Succeeded':
            continue
        if run_dict['pipeline_spec']['parameters'] is None:
            continue     
   
        model = {'version': run_dict['metrics'][0]['number_value'],
                 'acc': run_dict['metrics'][1]['number_value']}
        if best_model['acc'] < model['acc']:
            best_model = model
    print(best_model)

    from collections import namedtuple
    result = namedtuple('BestModel', ['version', 'acc'])

    return result(best_model['version'], best_model['acc'])


def get_last_dataset(dataset_path: str) -> str:
    import fnmatch
    import numpy as np
    import os    
    files = fnmatch.filter(os.listdir(dataset_path), '*.npz')
    files.sort(key=lambda fn: os.path.getmtime(os.path.join(dataset_path, fn)))
    last_dataset = os.path.join(dataset_path, files[1])
    print(last_dataset)
    return last_dataset

from typing import NamedTuple


from typing import NamedTuple


def train_from_best_model(model_version: float,
                          new_dataset_path: str) \
        -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics'), 
                                  ('model_version', str)]):
    import datetime
    import time
    import json
    import os
    import shutil
    from kubeflow import katib
    from kubeflow.katib import KatibClient

    from kubernetes.client import V1ObjectMeta
    from kubeflow.katib import V1beta1Experiment
    from kubeflow.katib import V1beta1AlgorithmSpec
    from kubeflow.katib import V1beta1ObjectiveSpec
    from kubeflow.katib import V1beta1FeasibleSpace
    from kubeflow.katib import V1beta1ExperimentSpec
    from kubeflow.katib import V1beta1ObjectiveSpec
    from kubeflow.katib import V1beta1ParameterSpec
    from kubeflow.katib import V1beta1TrialTemplate
    from kubeflow.katib import V1beta1TrialParameterSpec

    def find_metrics(_list, name, _type):
        return next(filter(lambda _list: _list['name'] == name, _list))[_type]
    
    def move_dataset(orig_path):
        target_dir = "/notebook/new_dataset/retrain"
        os.makedirs(target_dir, exist_ok=True)
        npz = os.path.basename(orig_path)
        shutil.copy(orig_path, f'{target_dir}/{npz}')

    kclient = KatibClient()
    kclient.list_experiments()
    date_postfix = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    experiment_name = f"my-model-retrain-{date_postfix}"
    objective_metric_name = "Validation-accuracy"

    metadata = V1ObjectMeta(
        name=experiment_name
    )

    algorithm_spec = V1beta1AlgorithmSpec(
        algorithm_name="random"
    )

    objective_spec = V1beta1ObjectiveSpec(
        type="maximize",
        goal=0.99,
        objective_metric_name=objective_metric_name,
        additional_metric_names=["Validation-loss", "accuracy", "loss"]
    )

    parameters = [
        V1beta1ParameterSpec(
            name="epoch",
            parameter_type="int",
            feasible_space=V1beta1FeasibleSpace(
                min="10",
                max="30"
            ),
        ),
    ]

    # JSON template specification for the Trial's Worker Kubernetes Job.
    trial_spec = {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "spec": {
            "template": {
                "metadata": {
                    "annotations": {
                        "sidecar.istio.io/inject": "false"
                    }
                },
                "spec": {
                    "containers": [
                        {
                            "name": "training-container",
                            "image": "yourID/fashion-mnist-retrain:handson.rc4",
                            "volumeMounts": [
                                {"name": "notebook", "mountPath": "/notebook"},
                            ],
                            "command": [
                                "python",
                                "/app/my_model_retrain.py",
                                "--node_amount=${trialParameters.epoch}",
                                f"--dataset_path={new_dataset_path}",
                                f"--model_path=/notebook/model/{model_version}",

                            ]
                        }
                    ],
                    "restartPolicy": "Never",
                    "volumes": [
                        {"name": "notebook",
                         "persistentVolumeClaim": {"claimName": "workspace-handson"}
                         }
                    ]
                }
            }
        }
    }

    # Configure parameters for the Trial template.
    trial_template = V1beta1TrialTemplate(
        primary_container_name="training-container",
        trial_parameters=[
            V1beta1TrialParameterSpec(
                name="epoch",
                description="train epoch",
                reference="epoch"
            ),
        ],
        trial_spec=trial_spec
    )

    # Experiment object.
    experiment = V1beta1Experiment(
        api_version="kubeflow.org/v1beta1",
        kind="Experiment",
        metadata=metadata,
        spec=V1beta1ExperimentSpec(
            max_trial_count=3,
            parallel_trial_count=30,
            max_failed_trial_count=3,
            algorithm=algorithm_spec,
            objective=objective_spec,
            parameters=parameters,
            resume_policy=None,
            trial_template=trial_template,
        )
    )
    kclient.create_experiment(experiment)
    accuracy = 0.0
    while 1:
        time.sleep(60)
        status = kclient.get_experiment_status(experiment_name)
        result = kclient.get_optimal_hyperparameters(experiment_name)
        if status == "Running":
            print(f"{experiment_name} is running")
        if status == "Succeeded":
            if result:
                metrics = result['currentOptimalTrial']['observation']['metrics']
                val_acc = find_metrics(metrics, objective_metric_name, 'max')
            move_dataset(new_dataset_path)                
            break

    metrics = {
        'metrics': [{
            'name': 'val-acc',
            'numberValue': float(val_acc),
            'format': "PERCENTAGE",
        }]
    }
    
    from collections import namedtuple    
    result = namedtuple('Outputs', ['mlpipeline_metrics', 'model_version'])    
    return result(json.dumps(metrics), model_version)     


def disable_cache(op):
    op.execution_options.caching_strategy.max_cache_staleness = "P0D"

def container_op(func):
    return func_to_container_op(func,
                                base_image="dudaji/cap-jupyterlab:tf2.0-cpu")

def train_pipeline(goal_acc: float = 0.50,
                   dataset_path: str = "/notebook/new_dataset/train"):
    notebook_vol = dsl.PipelineVolume(pvc="workspace-handson")

    model_op = container_op(get_best_model)
    dataset_op = container_op(get_last_dataset)
    train_op = container_op(train_from_best_model)

    model_comp = model_op(goal_acc) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})

    dataset_comp = dataset_op(dataset_path) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})

    train_comp = train_op(model_version=model_comp.outputs['version'],
                          new_dataset_path=dataset_comp.output) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})
                          
    disable_cache(dataset_comp)                          
    disable_cache(model_comp)                          
    disable_cache(train_comp)

arguments = {"goal_acc": 0.50,
             "dataset_path": "/notebook/new_dataset/train"}

client = kfp.Client()
client.create_run_from_pipeline_func(train_pipeline, 
                                     experiment_name="train_model",
                                     arguments=arguments)         


kfp.compiler.Compiler().compile(pipeline_func=train_pipeline,
                                package_path='train_pipeline.yaml')    

client.upload_pipeline(pipeline_name="train_pipeline2",
                       description="Train from best model",
                       pipeline_package_path="train_pipeline.yaml")                                