In [93]:
import kfp
import kfp.components as components

# https://www.kubeflow.org/docs/components/pipelines/sdk/pipelines-metrics/
#https://elyra.readthedocs.io/en/latest/recipes/visualizing-output-in-the-kfp-ui.html

def get_data_batch() -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics')]):
    print("getting data")
    import json
    
    accuracy = 0.9
    metrics = {
        'metrics': [{
          'name': 'accuracy-score', # The name of the metric. Visualized as the column name in the runs table.
          'numberValue':  accuracy, # The value of the metric. Must be a numeric value.
          'format': "PERCENTAGE",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
        }]
    }
    return [json.dumps(metrics)]


def get_latest_data() -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
    print("Getting latest data")
    from sklearn.metrics import confusion_matrix
    import json
    import pandas as pd
    
    matrix = [
    ['yummy', 'yummy', 10],
    ['yummy', 'not yummy', 2],
    ['not yummy', 'yummy', 6],
    ['not yummy', 'not yummy', 7]
    ]

    df = pd.DataFrame(matrix,columns=['target','predicted','count'])

    metadata = {
        "outputs": [
            {
                "type": "confusion_matrix",
                "format": "csv",
                "schema": [
                    {
                        "name": "target",
                        "type": "CATEGORY"
                    },
                    {
                        "name": "predicted",
                        "type": "CATEGORY"
                    },
                    {
                        "name": "count",
                        "type": "NUMBER"
                    }
                ],
                "source": df.to_csv(header=False, index=False),
                "storage": "inline",
                "labels": [
                    "yummy",
                    "not yummy"
                ]
            }
        ]
    }
    
    from collections import namedtuple
    visualization_output = namedtuple('VisualizationOutput', ['mlpipeline_ui_metadata'])
    return visualization_output(json.dumps(metadata))
    

        
        
from typing import NamedTuple
def reshape_data() -> NamedTuple('MyDivmodOutput', [('mlpipeline_ui_metadata', 'UI_metadata'), ('mlpipeline_metrics', 'Metrics')]):
    print("reshaping data")
    
    
    # Exports a sample tensorboard:
    metadata = {
        'outputs': [
            {
                # Markdown that is hardcoded inline
                'storage': 'inline',
                'source': '''# Inline Markdown
* [Kubeflow official doc](https://www.kubeflow.org/).
''',
                'type': 'markdown',
            },
            {
                # Markdown that is read from a file
                'source': 'https://raw.githubusercontent.com/kubeflow/pipelines/master/README.md',
                # Alternatively, use Google Cloud Storage for sample.
                # 'source': 'gs://jamxl-kfp-bucket/v2-compatible/markdown/markdown_example.md',
                'type': 'markdown',
            }]
    }

    # Exports two sample metrics:
    metrics = {
      'metrics': [{
          'name': 'quotient',
          'numberValue':  float(2),
        },{
          'name': 'remainder',
          'numberValue':  float(3),
        }]}
    
    from collections import namedtuple
    import json
    
    divmod_output = namedtuple('MyDivmodOutput', ['mlpipeline_ui_metadata', 'mlpipeline_metrics'])
    return divmod_output(json.dumps(metadata), json.dumps(metrics))


def model_building(no_epochs:int):
    print("model building")
    print(no_epochs)
    print(type(no_epochs))
    
    

comp_get_data_batch = components.create_component_from_func(get_data_batch,base_image="public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.4")
comp_get_latest_data = components.create_component_from_func(get_latest_data,base_image="public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.4")
comp_reshape_data = components.create_component_from_func(reshape_data,base_image="public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.4")
comp_model_building = components.create_component_from_func(model_building,base_image="public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.4")


@kfp.dsl.pipeline(
   name='output_test',
   description='test'
)
def output_test(no_epochs:int):
    step1_1 = comp_get_data_batch()
    step1_2 = comp_get_latest_data()
    
    step2 = comp_reshape_data()
    step2.after(step1_1)
    step2.after(step1_2)
    
    step3 = comp_model_building(no_epochs)
    step3.after(step2)


if __name__ == "__main__":
    client = kfp.Client()

    arguments = {
        "no_epochs" : 3
    }

    run_directly = 1
    
    if (run_directly == 1):
        client.create_run_from_pipeline_func(output_test,arguments=arguments,experiment_name="test")
    else:
        kfp.compiler.Compiler().compile(pipeline_func=output_test,package_path='output_test.yaml')
        client.upload_pipeline_version(pipeline_package_path='output_test.yaml',pipeline_version_name="0.4",pipeline_name="pipeline test",description="just for testing")

In [None]:
"""
create artifacts, kfpv1

def get_data_batch(metadata_data_batch : kfp.components.OutputPath()):
    print("getting data")
    import json

    metadata = {
        'outputs' : [
        # Markdown that is hardcoded inline
        {
          'storage': 'inline',
          'source': '# Inline Markdown\n[A link](https://www.kubeflow.org/)',
          'type': 'markdown',
        }]
    }
    
    with open(metadata_data_batch, 'w') as metadata_file:
        json.dump(metadata, metadata_file)
"""