From e34aa33ce65e14df4cfecb3181b82d622184cbea Mon Sep 17 00:00:00 2001 From: eterna2 Date: Mon, 23 Mar 2020 12:22:17 +0800 Subject: [PATCH] v0.1.0a5 - add kfp_metric and kfp_metrics --- README.md | 74 ++++++++++++++++++++++++----------- kfx/dsl/__init__.py | 4 +- kfx/dsl/_artifact_location.py | 23 ++++++++--- kfx/vis/__init__.py | 26 ++++++++++-- kfx/vis/_helpers.py | 43 +++++++++++++++++++- kfx/vis/_helpers_test.py | 20 ++++++++++ kfx/vis/enums.py | 7 ++++ kfx/vis/models.py | 29 +++++++++++++- version.txt | 2 +- 9 files changed, 190 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 52e8259..8cadda0 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,14 @@ following sub-packages > - Repo: [https://github.com/e2fyi/kfx](https://github.com/e2fyi/kfx) > ### NOTE this is currently alpha +> > There will likely to have breaking changes, and feel free to do a feature request > > ### Known issues +> > - `kfx.vis.vega.vega_web_app` and `KfpArtifact` does not work well together (see example) because of CORs - the web app is hosted inside an iFrame which prevents it from accessing the `ml-pipeline-ui` API server. > - `kfx.vis.vega.vega_web_app` is only supported in the latest kubeflow pipeline UI (as inline is only supported after `0.2.5`) - ## Quick start Installation @@ -66,27 +67,23 @@ def test_op( import kfx.vis import kfx.vis.vega - data = [ - {"a": "A", "b": 28}, - {"a": "B", "b": 55}, - {"a": "C", "b": 43}, - {"a": "D", "b": 91}, - {"a": "E", "b": 81}, - {"a": "F", "b": 53}, - {"a": "G", "b": 19}, - {"a": "H", "b": 87}, - {"a": "I", "b": 52}, - ] - vega_data_file.write(json.dumps(data)) - # `KfpArtifact` provides the reference to data artifact created # inside this task spec = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "description": "A simple bar chart", "data": { - "url": kfx.dsl.KfpArtifact("vega_data_file"), - "format": {"type": "json"}, + "values": [ + {"a": "A", "b": 28}, + {"a": "B", "b": 55}, + {"a": "C", "b": 43}, + {"a": "D", "b": 91}, + {"a": "E", "b": 81}, + {"a": "F", "b": 53}, + {"a": "G", "b": 19}, + {"a": "H", "b": 87}, + {"a": "I", "b": 52}, + ] }, "mark": "bar", "encoding": { @@ -140,14 +137,27 @@ Example: Using [pydantic](https://pydantic-docs.helpmanual.io/) data models to g import kfp.components import kfx.vis -from kfx.vis.enums import KfpStorage +from kfx.vis.enums import KfpStorage, KfpMetricFormat @func_to_container_op -def some_op(mlpipeline_ui_metadata: OutputTextFile(str)): - "kfp operator that provides metadata for visualizations." - - mlpipeline_ui_metadata = kfx.vis.kfp_ui_metadata( +def some_op( + mlpipeline_metrics: OutputTextFile(str), mlpipeline_ui_metadata: OutputTextFile(str), +): + "kfp operator that provides metadata and metrics for visualizations." + + # create metrics + metrics = kfp_metrics([ + # override metric format with custom value + kfp_metric(name="accuracy-score", value=0.8, metric_format="PERCENTAGE"), + # render recall as percent + kfp_metric("recall-score", 0.9, percent=true), + # raw score + kfp_metric("raw-score", 123.45), + ]) + + # create ui metadata for vis + ui_metadata = kfx.vis.kfp_ui_metadata( [ # creates a confusion matrix vis kfx.vis.confusion_matrix( @@ -180,11 +190,30 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)): kfx.vis.web_app( "gs://your_project/your_bucket/your_html_file", ), + # creates a Vega-Lite vis as a web app + kfx.vis.vega_web_app(spec={ + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "description": "A simple bar chart with embedded data.", + "data": { + "values": [ + {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, + {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, + {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52} + ] + }, + "mark": "bar", + "encoding": { + "x": {"field": "a", "type": "ordinal"}, + "y": {"field": "b", "type": "quantitative"} + } + }) ] ) + # write metrics to kubeflow pipelines UI + mlpipeline_metrics.write(kfx.vis.asjson(metrics)) # write ui metadata so that kubeflow pipelines UI can render visualizations - mlpipeline_ui_metadata.write(kfx.vis.asjson(mlpipeline_ui_metadata)) + mlpipeline_ui_metadata.write(kfx.vis.asjson(ui_metadata)) ``` ## Developer guide @@ -208,6 +237,7 @@ The version of the package is read from `version.txt` - i.e. please update the appropriate semantic version (major -> breaking changes, minor -> new features, patch -> bug fix, postfix -> pre-release/post-release). ### `Makefile`: + ```bash # autoformat codes with docformatter, isort, and black make format diff --git a/kfx/dsl/__init__.py b/kfx/dsl/__init__.py index b7433fd..8b45bd9 100644 --- a/kfx/dsl/__init__.py +++ b/kfx/dsl/__init__.py @@ -34,7 +34,6 @@ def test_op( {"a": "H", "b": 87}, {"a": "I", "b": 52}, ] - vega_data_file.write(json.dumps(data)) # `KfpArtifact` provides the reference to data artifact created # inside this task @@ -42,8 +41,7 @@ def test_op( "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "description": "A simple bar chart", "data": { - "url": kfx.dsl.KfpArtifact("vega_data_file"), - "format": {"type": "json"}, + "values": data, }, "mark": "bar", "encoding": { diff --git a/kfx/dsl/_artifact_location.py b/kfx/dsl/_artifact_location.py index 4892fe7..4ff5c1f 100644 --- a/kfx/dsl/_artifact_location.py +++ b/kfx/dsl/_artifact_location.py @@ -265,7 +265,17 @@ def set_workflow_envs(task: kfp.dsl.ContainerOp): return set_workflow_envs -def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str: +def _handle_special_artifact_names(name: str) -> str: + """Always sanitize special artifact names (e.g. mlpipeline_ui_metadata)""" + sanitized: str = sanitize_k8s_name(name) + return ( + sanitized + if sanitized in {"mlpipeline-ui-metadata", "mlpipeline-metrics"} + else name + ) + + +def _sanitize_artifact_name(name: str, sanitize: bool = False) -> str: """Sanitize the artifact name based on k8s resource naming convention. Also remove suffixes "_path" and "_file". (See this `comment '_.) @@ -273,7 +283,7 @@ def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str: Args: name (str): [description] - sanitize (bool, optional): Whether to sanitize the name. Defaults to True. + sanitize (bool, optional): Whether to sanitize the name. Defaults to False. Returns: str: [description] @@ -282,7 +292,10 @@ def _sanitize_artifact_name(name: str, sanitize: bool = True) -> str: name = name[0 : -len("_path")] elif name.endswith("_file"): name = name[0 : -len("_file")] - return sanitize_k8s_name(name) if sanitize else name # type: ignore + + return ( # type: ignore + sanitize_k8s_name(name) if sanitize else _handle_special_artifact_names(name) + ) class KfpArtifact: @@ -329,7 +342,6 @@ def test_op( {"a": "H", "b": 87}, {"a": "I", "b": 52}, ] - vega_data_file.write(json.dumps(data)) # `KfpArtifact` provides the reference to data artifact created # inside this task @@ -337,8 +349,7 @@ def test_op( "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "description": "A simple bar chart", "data": { - "url": kfx.dsl.KfpArtifact("vega_data_file"), - "format": {"type": "json"}, + "values": data, }, "mark": "bar", "encoding": { diff --git a/kfx/vis/__init__.py b/kfx/vis/__init__.py index fe55720..7c54852 100644 --- a/kfx/vis/__init__.py +++ b/kfx/vis/__init__.py @@ -5,14 +5,16 @@ import kfp.components import kfx.vis - from kfx.vis.enums import KfpStorage + from kfx.vis.enums import KfpStorage, KfpMetricFormat @func_to_container_op - def some_op(mlpipeline_ui_metadata: OutputTextFile(str)): + def some_op( + mlpipeline_ui_metadata: OutputTextFile(str), mlpipeline_metrics: OutputTextFile(str) + ): "kfp operator that provides metadata for visualizations." - mlpipeline_ui_metadata = kfx.vis.kfp_ui_metadata( + ui_metadata = kfx.vis.kfp_ui_metadata( [ # creates a confusion matrix vis kfx.vis.confusion_matrix( @@ -66,7 +68,21 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)): ) # write ui metadata so that kubeflow pipelines UI can render visualizations - mlpipeline_ui_metadata.write(kfx.vis.asjson(mlpipeline_ui_metadata)) + mlpipeline_ui_metadata.write(kfx.vis.asjson(ui_metadata)) + + + # create metrics + metrics = kfp_metrics([ + # override metric format with custom value + kfp_metric(name="accuracy-score", value=0.8, metric_format="PERCENTAGE"), + # render recall as percent + kfp_metric("recall-score", 0.9, percent=true), + # raw score + kfp_metric("raw-score", 123.45), + ]) + + # write metrics to kubeflow pipelines UI + mlpipeline_metrics.write(kfx.vis.asjson(metrics)) """ from kfx.vis._helpers import ( @@ -76,6 +92,8 @@ def some_op(mlpipeline_ui_metadata: OutputTextFile(str)): asjson, web_app, markdown, + kfp_metric, + kfp_metrics, tensorboard, tolocalfile, kfp_ui_metadata, diff --git a/kfx/vis/_helpers.py b/kfx/vis/_helpers.py index 2fdac0c..fedfcd1 100644 --- a/kfx/vis/_helpers.py +++ b/kfx/vis/_helpers.py @@ -1,5 +1,5 @@ """Helper functions for generating visualization in Kubeflow pipelines UI.""" -from typing import List, Union, Optional +from typing import List, Union, Iterable, Optional from pydantic import BaseModel @@ -9,10 +9,13 @@ Table, WebApp, Markdown, + KfpMetric, + KfpMetrics, KfpStorage, Tensorboard, KfpUiMetadata, ConfusionMatrix, + KfpMetricFormat, KfpArtifactDataFormat, ) @@ -154,6 +157,44 @@ def kfp_ui_metadata( return KfpUiMetadata(version=version, outputs=outputs) +def kfp_metric( + name: str, + value: Union[float, int], + percent: bool = False, + metric_format: Union[str, KfpMetricFormat] = None, +) -> KfpMetric: + """Describes a single kubeflow pipeline metric. + + Args: + name (str): Name of the metric. Must be of the form `^[a-z]([-a-z0-9]{0,62}[a-z0-9])?$`. + value (Union[float, int]): Numerical value of the metric. + percent (bool, optional): Set to True to render value as percentage. Defaults to False. + metric_format (Union[str, KfpMetricFormat], optional): Format for the metrics - "PERCENTAGE", "RAW" or None. Overrides "percent" flag if provided. Defaults to None. + + Returns: + KfpMetric: an instance of KfpMetric to be passed to a KfpMetrics object. + """ + if not metric_format and percent: + metric_format = KfpMetricFormat.PERCENTAGE + return KfpMetric(name=name, numberValue=value, format=metric_format) + + +def kfp_metrics( + metrics: Union[ + Iterable[KfpMetric], Iterable[dict], Iterable[Union[KfpMetric, dict]] + ] +) -> KfpMetrics: + """Describes a list of kubeflow pipeline metrics. + + Args: + metrics (Union[Iterable[KfpMetric], Iterable[dict], Iterable[Union[KfpMetric, dict]]]): Any iterable of dict or KfpMetric. + + Returns: + KfpMetrics: an instance of KfpMetrics which can be stream to the output. + """ + return KfpMetrics(metrics=metrics) + + def asdict(obj: BaseModel) -> dict: """Returns the dict representations of the pydantic data object.""" return obj.dict(exclude_none=True, by_alias=True) diff --git a/kfx/vis/_helpers_test.py b/kfx/vis/_helpers_test.py index 183ca64..3be4051 100644 --- a/kfx/vis/_helpers_test.py +++ b/kfx/vis/_helpers_test.py @@ -161,3 +161,23 @@ def test_ui_metadata(): ] ) assert kfxvis.asdict(data) == expected, "generates json for kfp ui metadata" + + +def test_kfp_metrics(): + expected = { + "metrics": [ + {"name": "foo-bar1", "numberValue": 1.0, "format": "PERCENTAGE"}, + {"name": "foo-bar2", "numberValue": 1000, "format": "RAW"}, + {"name": "foo-bar3", "numberValue": 1000.0}, + ] + } + data = kfxvis.kfp_metrics( + [ + kfxvis.kfp_metric("foo-bar1", 1.0, True), + kfxvis.kfp_metric( + name="foo-bar2", value=1000, metric_format=kfxvis.KfpMetricFormat.RAW + ), + kfxvis.kfp_metric("foo-bar3", 1000.0), + ] + ) + assert kfxvis.asdict(data) == expected, "generates json for kfp metrics" diff --git a/kfx/vis/enums.py b/kfx/vis/enums.py index 1ba6d03..30cb4da 100644 --- a/kfx/vis/enums.py +++ b/kfx/vis/enums.py @@ -38,3 +38,10 @@ class KfpStorage(str, Enum): s3 = "s3" http = "http" https = "https" + + +class KfpMetricFormat(str, Enum): + """Metric format.""" + + PERCENTAGE = "PERCENTAGE" + RAW = "RAW" diff --git a/kfx/vis/models.py b/kfx/vis/models.py index 4c05104..540de1e 100644 --- a/kfx/vis/models.py +++ b/kfx/vis/models.py @@ -3,7 +3,13 @@ from pydantic import Field, BaseModel -from kfx.vis.enums import KfpStorage, KfpVisType, KfpDataType, KfpArtifactDataFormat +from kfx.vis.enums import ( + KfpStorage, + KfpVisType, + KfpDataType, + KfpMetricFormat, + KfpArtifactDataFormat, +) class KfpArtifactSchema(BaseModel): @@ -316,3 +322,24 @@ class KfpUiMetadata(BaseModel): ] = Field( [], description="List of objects describing the desired kfp visualizations." ) + + +class KfpMetric(BaseModel): + """Describes a single metric from a kubeflow pipeline task.""" + + name: str = Field( + ..., + description="Name of the metric. Must be format: ^[a-z]([-a-z0-9]{0,62}[a-z0-9])?$", + ) + numberValue: Union[float, int] = Field( + ..., description="Numerical value of the metric." + ) + format: Optional[KfpMetricFormat] = Field( + None, description="can only be PERCENTAGE, RAW, or not set" + ) + + +class KfpMetrics(BaseModel): + """Describes the metrics outputs of a kubeflow pipeline task.""" + + metrics: List[KfpMetric] = Field([], description="A list of KfpMetric objects.") diff --git a/version.txt b/version.txt index c32101a..c83b347 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1.0a4 \ No newline at end of file +0.1.0a5 \ No newline at end of file