From e34aa33ce65e14df4cfecb3181b82d622184cbea Mon Sep 17 00:00:00 2001
From: eterna2 <eterna2@hotmail.com>
Date: Mon, 23 Mar 2020 12:22:17 +0800
Subject: [PATCH] v0.1.0a5 - add kfp_metric and kfp_metrics

---
 README.md                     | 74 ++++++++++++++++++++++++-----------
 kfx/dsl/__init__.py           |  4 +-
 kfx/dsl/_artifact_location.py | 23 ++++++++---
 kfx/vis/__init__.py           | 26 ++++++++++--
 kfx/vis/_helpers.py           | 43 +++++++++++++++++++-
 kfx/vis/_helpers_test.py      | 20 ++++++++++
 kfx/vis/enums.py              |  7 ++++
 kfx/vis/models.py             | 29 +++++++++++++-
 version.txt                   |  2 +-
 9 files changed, 190 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 52e8259..8cadda0 100644
--- a/README.md
+++ b/README.md
@@ -18,13 +18,14 @@ following sub-packages
 > - Repo: [https://github.com/e2fyi/kfx](https://github.com/e2fyi/kfx)
 
 > ### NOTE this is currently alpha
+>
 > There will likely to have breaking changes, and feel free to do a feature request
 >
 > ### Known issues
+>
 > - `kfx.vis.vega.vega_web_app` and `KfpArtifact` does not work well together (see example) because of CORs - the web app is hosted inside an iFrame which prevents it from accessing the `ml-pipeline-ui` API server.
 > - `kfx.vis.vega.vega_web_app` is only supported in the latest kubeflow pipeline UI (as inline is only supported after `0.2.5`)
 
-
 ## Quick start
 
 Installation
@@ -66,27 +67,23 @@ def test_op(
     import kfx.vis
     import kfx.vis.vega
 
-    data = [
-        {"a": "A", "b": 28},
-        {"a": "B", "b": 55},
-        {"a": "C", "b": 43},
-        {"a": "D", "b": 91},
-        {"a": "E", "b": 81},
-        {"a": "F", "b": 53},
-        {"a": "G", "b": 19},
-        {"a": "H", "b": 87},
-        {"a": "I", "b": 52},
-    ]
-    vega_data_file.write(json.dumps(data))
-
     # `KfpArtifact` provides the reference to data artifact created
     # inside this task
     spec = {
         "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
         "description": "A simple bar chart",
         "data": {
-            "url": kfx.dsl.KfpArtifact("vega_data_file"),
-            "format": {"type": "json"},
+            "values": [
+                {"a": "A", "b": 28},
+                {"a": "B", "b": 55},
+                {"a": "C", "b": 43},
+                {"a": "D", "b": 91},
+                {"a": "E", "b": 81},
+                {"a": "F", "b": 53},
+                {"a": "G", "b": 19},
+                {"a": "H", "b": 87},
+                {"a": "I", "b": 52},
+            ]
         },
         "mark": "bar",
         "encoding": {
@@ -140,14 +137,27 @@ Example: Using [pydantic](https://pydantic-docs.helpmanual.io/) data models to g
 import kfp.components
 import kfx.vis
 
-from kfx.vis.enums import KfpStorage
+from kfx.vis.enums import KfpStorage, KfpMetricFormat
 
 
 @func_to_container_op
-def some_op(mlpipeline_ui_metadata: OutputTextFile(str)):
-    "kfp operator that provides metadata for visualizations."
-
-    mlpipeline_ui_metadata = kfx.vis.kfp_ui_metadata(
+def some_op(
+    mlpipeline_metrics: OutputTextFile(str), mlpipeline_ui_metadata: OutputTextFile(str),
+):
+    "kfp operator that provides metadata and metrics for visualizations."
+
+    # create metrics
+    metrics = kfp_metrics([
+        # override metric format with custom value
+        kfp_metric(name="accuracy-score", value=0.8, metric_format="PERCENTAGE"),
+        # render recall as percent
+        kfp_metric("recall-score", 0.9, percent=true),
+        # raw score
+        kfp_metric("raw-score", 123.45),
+    ])
+
+    # create ui metadata for vis
+    ui_metadata = kfx.vis.kfp_ui_metadata(
         [
             # creates a confusion matrix vis
             kfx.vis.confusion_matrix(
@@ -180,11 +190,30 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)):
             kfx.vis.web_app(
                 "gs://your_project/your_bucket/your_html_file",
             ),
+            # creates a Vega-Lite vis as a web app
+            kfx.vis.vega_web_app(spec={
+                "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+                "description": "A simple bar chart with embedded data.",
+                "data": {
+                    "values": [
+                        {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43},
+                        {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53},
+                        {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}
+                    ]
+                },
+                "mark": "bar",
+                "encoding": {
+                    "x": {"field": "a", "type": "ordinal"},
+                    "y": {"field": "b", "type": "quantitative"}
+                }
+            })
         ]
     )
 
+    # write metrics to kubeflow pipelines UI
+    mlpipeline_metrics.write(kfx.vis.asjson(metrics))
     # write ui metadata so that kubeflow pipelines UI can render visualizations
-    mlpipeline_ui_metadata.write(kfx.vis.asjson(mlpipeline_ui_metadata))
+    mlpipeline_ui_metadata.write(kfx.vis.asjson(ui_metadata))
 ```
 
 ## Developer guide
@@ -208,6 +237,7 @@ The version of the package is read from `version.txt` - i.e. please update the
 appropriate semantic version (major -> breaking changes, minor -> new features, patch -> bug fix, postfix -> pre-release/post-release).
 
 ### `Makefile`:
+
 ```bash
 # autoformat codes with docformatter, isort, and black
 make format
diff --git a/kfx/dsl/__init__.py b/kfx/dsl/__init__.py
index b7433fd..8b45bd9 100644
--- a/kfx/dsl/__init__.py
+++ b/kfx/dsl/__init__.py
@@ -34,7 +34,6 @@ def test_op(
             {"a": "H", "b": 87},
             {"a": "I", "b": 52},
         ]
-        vega_data_file.write(json.dumps(data))
 
         # `KfpArtifact` provides the reference to data artifact created
         # inside this task
@@ -42,8 +41,7 @@ def test_op(
             "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
             "description": "A simple bar chart",
             "data": {
-                "url": kfx.dsl.KfpArtifact("vega_data_file"),
-                "format": {"type": "json"},
+                "values": data,
             },
             "mark": "bar",
             "encoding": {
diff --git a/kfx/dsl/_artifact_location.py b/kfx/dsl/_artifact_location.py
index 4892fe7..4ff5c1f 100644
--- a/kfx/dsl/_artifact_location.py
+++ b/kfx/dsl/_artifact_location.py
@@ -265,7 +265,17 @@ def set_workflow_envs(task: kfp.dsl.ContainerOp):
         return set_workflow_envs
 
 
-def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str:
+def _handle_special_artifact_names(name: str) -> str:
+    """Always sanitize special artifact names (e.g. mlpipeline_ui_metadata)"""
+    sanitized: str = sanitize_k8s_name(name)
+    return (
+        sanitized
+        if sanitized in {"mlpipeline-ui-metadata", "mlpipeline-metrics"}
+        else name
+    )
+
+
+def _sanitize_artifact_name(name: str, sanitize: bool = False) -> str:
     """Sanitize the artifact name based on k8s resource naming convention.
 
     Also remove suffixes "_path" and "_file". (See this `comment <https://github.com/kubeflow/pipelines/blob/4cb81ea047361ddce7ce8b0b68133b0a92724588/sdk/python/kfp/components/_python_op.py#L327>'_.)
@@ -273,7 +283,7 @@ def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str:
 
     Args:
         name (str): [description]
-        sanitize (bool, optional): Whether to sanitize the name. Defaults to True.
+        sanitize (bool, optional): Whether to sanitize the name. Defaults to False.
 
     Returns:
         str: [description]
@@ -282,7 +292,10 @@ def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str:
         name = name[0 : -len("_path")]
     elif name.endswith("_file"):
         name = name[0 : -len("_file")]
-    return sanitize_k8s_name(name) if sanitize else name  # type: ignore
+
+    return (  # type: ignore
+        sanitize_k8s_name(name) if sanitize else _handle_special_artifact_names(name)
+    )
 
 
 class KfpArtifact:
@@ -329,7 +342,6 @@ def test_op(
                     {"a": "H", "b": 87},
                     {"a": "I", "b": 52},
                 ]
-                vega_data_file.write(json.dumps(data))
 
                 # `KfpArtifact` provides the reference to data artifact created
                 # inside this task
@@ -337,8 +349,7 @@ def test_op(
                     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
                     "description": "A simple bar chart",
                     "data": {
-                        "url": kfx.dsl.KfpArtifact("vega_data_file"),
-                        "format": {"type": "json"},
+                        "values": data,
                     },
                     "mark": "bar",
                     "encoding": {
diff --git a/kfx/vis/__init__.py b/kfx/vis/__init__.py
index fe55720..7c54852 100644
--- a/kfx/vis/__init__.py
+++ b/kfx/vis/__init__.py
@@ -5,14 +5,16 @@
     import kfp.components
     import kfx.vis
 
-    from kfx.vis.enums import KfpStorage
+    from kfx.vis.enums import KfpStorage, KfpMetricFormat
 
 
     @func_to_container_op
-    def some_op(mlpipeline_ui_metadata: OutputTextFile(str)):
+    def some_op(
+        mlpipeline_ui_metadata: OutputTextFile(str), mlpipeline_metrics: OutputTextFile(str)
+    ):
         "kfp operator that provides metadata for visualizations."
 
-        mlpipeline_ui_metadata = kfx.vis.kfp_ui_metadata(
+        ui_metadata = kfx.vis.kfp_ui_metadata(
             [
                 # creates a confusion matrix vis
                 kfx.vis.confusion_matrix(
@@ -66,7 +68,21 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)):
         )
 
         # write ui metadata so that kubeflow pipelines UI can render visualizations
-        mlpipeline_ui_metadata.write(kfx.vis.asjson(mlpipeline_ui_metadata))
+        mlpipeline_ui_metadata.write(kfx.vis.asjson(ui_metadata))
+
+
+        # create metrics
+        metrics = kfp_metrics([
+            # override metric format with custom value
+            kfp_metric(name="accuracy-score", value=0.8, metric_format="PERCENTAGE"),
+            # render recall as percent
+            kfp_metric("recall-score", 0.9, percent=true),
+            # raw score
+            kfp_metric("raw-score", 123.45),
+        ])
+
+        # write metrics to kubeflow pipelines UI
+        mlpipeline_metrics.write(kfx.vis.asjson(metrics))
 
 """
 from kfx.vis._helpers import (
@@ -76,6 +92,8 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)):
     asjson,
     web_app,
     markdown,
+    kfp_metric,
+    kfp_metrics,
     tensorboard,
     tolocalfile,
     kfp_ui_metadata,
diff --git a/kfx/vis/_helpers.py b/kfx/vis/_helpers.py
index 2fdac0c..fedfcd1 100644
--- a/kfx/vis/_helpers.py
+++ b/kfx/vis/_helpers.py
@@ -1,5 +1,5 @@
 """Helper functions for generating visualization in Kubeflow pipelines UI."""
-from typing import List, Union, Optional
+from typing import List, Union, Iterable, Optional
 
 from pydantic import BaseModel
 
@@ -9,10 +9,13 @@
     Table,
     WebApp,
     Markdown,
+    KfpMetric,
+    KfpMetrics,
     KfpStorage,
     Tensorboard,
     KfpUiMetadata,
     ConfusionMatrix,
+    KfpMetricFormat,
     KfpArtifactDataFormat,
 )
 
@@ -154,6 +157,44 @@ def kfp_ui_metadata(
     return KfpUiMetadata(version=version, outputs=outputs)
 
 
+def kfp_metric(
+    name: str,
+    value: Union[float, int],
+    percent: bool = False,
+    metric_format: Union[str, KfpMetricFormat] = None,
+) -> KfpMetric:
+    """Describes a single kubeflow pipeline metric.
+
+    Args:
+        name (str): Name of the metric. Must be of the form `^[a-z]([-a-z0-9]{0,62}[a-z0-9])?$`.
+        value (Union[float, int]): Numerical value of the metric.
+        percent (bool, optional): Set to True to render value as percentage. Defaults to False.
+        metric_format (Union[str, KfpMetricFormat], optional): Format for the metrics - "PERCENTAGE", "RAW" or None. Overrides "percent" flag if provided. Defaults to None.
+
+    Returns:
+        KfpMetric: an instance of KfpMetric to be passed to a KfpMetrics object.
+    """
+    if not metric_format and percent:
+        metric_format = KfpMetricFormat.PERCENTAGE
+    return KfpMetric(name=name, numberValue=value, format=metric_format)
+
+
+def kfp_metrics(
+    metrics: Union[
+        Iterable[KfpMetric], Iterable[dict], Iterable[Union[KfpMetric, dict]]
+    ]
+) -> KfpMetrics:
+    """Describes a list of kubeflow pipeline metrics.
+
+    Args:
+        metrics (Union[Iterable[KfpMetric], Iterable[dict], Iterable[Union[KfpMetric, dict]]]): Any iterable of dict or KfpMetric.
+
+    Returns:
+        KfpMetrics: an instance of KfpMetrics which can be stream to the output.
+    """
+    return KfpMetrics(metrics=metrics)
+
+
 def asdict(obj: BaseModel) -> dict:
     """Returns the dict representations of the pydantic data object."""
     return obj.dict(exclude_none=True, by_alias=True)
diff --git a/kfx/vis/_helpers_test.py b/kfx/vis/_helpers_test.py
index 183ca64..3be4051 100644
--- a/kfx/vis/_helpers_test.py
+++ b/kfx/vis/_helpers_test.py
@@ -161,3 +161,23 @@ def test_ui_metadata():
         ]
     )
     assert kfxvis.asdict(data) == expected, "generates json for kfp ui metadata"
+
+
+def test_kfp_metrics():
+    expected = {
+        "metrics": [
+            {"name": "foo-bar1", "numberValue": 1.0, "format": "PERCENTAGE"},
+            {"name": "foo-bar2", "numberValue": 1000, "format": "RAW"},
+            {"name": "foo-bar3", "numberValue": 1000.0},
+        ]
+    }
+    data = kfxvis.kfp_metrics(
+        [
+            kfxvis.kfp_metric("foo-bar1", 1.0, True),
+            kfxvis.kfp_metric(
+                name="foo-bar2", value=1000, metric_format=kfxvis.KfpMetricFormat.RAW
+            ),
+            kfxvis.kfp_metric("foo-bar3", 1000.0),
+        ]
+    )
+    assert kfxvis.asdict(data) == expected, "generates json for kfp metrics"
diff --git a/kfx/vis/enums.py b/kfx/vis/enums.py
index 1ba6d03..30cb4da 100644
--- a/kfx/vis/enums.py
+++ b/kfx/vis/enums.py
@@ -38,3 +38,10 @@ class KfpStorage(str, Enum):
     s3 = "s3"
     http = "http"
     https = "https"
+
+
+class KfpMetricFormat(str, Enum):
+    """Metric format."""
+
+    PERCENTAGE = "PERCENTAGE"
+    RAW = "RAW"
diff --git a/kfx/vis/models.py b/kfx/vis/models.py
index 4c05104..540de1e 100644
--- a/kfx/vis/models.py
+++ b/kfx/vis/models.py
@@ -3,7 +3,13 @@
 
 from pydantic import Field, BaseModel
 
-from kfx.vis.enums import KfpStorage, KfpVisType, KfpDataType, KfpArtifactDataFormat
+from kfx.vis.enums import (
+    KfpStorage,
+    KfpVisType,
+    KfpDataType,
+    KfpMetricFormat,
+    KfpArtifactDataFormat,
+)
 
 
 class KfpArtifactSchema(BaseModel):
@@ -316,3 +322,24 @@ class KfpUiMetadata(BaseModel):
     ] = Field(
         [], description="List of objects describing the desired kfp visualizations."
     )
+
+
+class KfpMetric(BaseModel):
+    """Describes a single metric from a kubeflow pipeline task."""
+
+    name: str = Field(
+        ...,
+        description="Name of the metric. Must be format: ^[a-z]([-a-z0-9]{0,62}[a-z0-9])?$",
+    )
+    numberValue: Union[float, int] = Field(
+        ..., description="Numerical value of the metric."
+    )
+    format: Optional[KfpMetricFormat] = Field(
+        None, description="can only be PERCENTAGE, RAW, or not set"
+    )
+
+
+class KfpMetrics(BaseModel):
+    """Describes the metrics outputs of a kubeflow pipeline task."""
+
+    metrics: List[KfpMetric] = Field([], description="A list of KfpMetric objects.")
diff --git a/version.txt b/version.txt
index c32101a..c83b347 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-0.1.0a4
\ No newline at end of file
+0.1.0a5
\ No newline at end of file