# Metadata management in kubeflow

In [None]:
!pip install kubeflow-metadata --user

In [1]:
from kubeflow.metadata import metadata
from datetime import datetime
from uuid import uuid4

In [2]:
METADATA_STORE_HOST = "metadata-grpc-service.kubeflow" # default DNS of Kubeflow Metadata gRPC serivce.
METADATA_STORE_PORT = 8080

In [3]:
#Define a workspace
ws_tf = metadata.Workspace(
    
    store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),
    name="kubeflow_dnn_tf",
    description="Simple DNN workspace",
    labels={"Execution_key_1": "value_1"})

In [4]:
#Create a run inside the workspace
r = metadata.Run(
    workspace=ws_tf,
    name="run-" + datetime.utcnow().isoformat("T") ,
    description="A example run"
)

In [5]:
#Create an execution
exec = metadata.Execution(
    name = "execution" + datetime.utcnow().isoformat("T") ,
    workspace=ws_tf,
    run=r,
    description="DNN Execution example",
)
print("An execution was created with id %s" % exec.id)

An execution was created with id 16


In [6]:
##Run model ....

In [7]:
#Log information about the input data used
date_set_version = "data_set_version_" + str(uuid4())
data_set = exec.log_input(
        metadata.DataSet(
            description="Sample dateset - fashion mnist",
            name="data-exraction",
            owner="luis@luis.com",
            uri="gs://...",
            version=date_set_version,
            query="SELECT * FROM table ..."))
print("Data set id is {0.id} with version '{0.version}'".format(data_set))

Data set id is 11 with version 'data_set_version_cecc67e2-b908-457d-a627-7123570ea03f'


In [8]:
#Log information about a trained model
model_version = "model_version_" + str(uuid4())
model = exec.log_output(
    metadata.Model(
            name="MNIST Fashion",
            description="model to recognize classify fashion",
            owner="luis@luis.com",
            uri="gs://...",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v1.0"
            },
            hyperparameters={
                
                "layers": [10, 3, 1],
                "early_stop": True
            },
            version=model_version,
            labels={"mylabel": "l1"}))
print(model)
print("\nModel id is {0.id} and version is {0.version}".format(model))

kubeflow.metadata.metadata.Model(workspace=None, name='MNIST Fashion', description='model to recognize classify fashion', owner='luis@luis.com', uri='gs://...', version='model_version_944e639b-db1f-45a5-b13b-c9ec282e7451', model_type='neural network', training_framework={'name': 'tensorflow', 'version': 'v1.0'}, hyperparameters={'layers': [10, 3, 1], 'early_stop': True}, labels={'mylabel': 'l1'}, id=12, create_time='2020-12-11T18:56:39.711894Z', kwargs={})

Model id is 12 and version is model_version_944e639b-db1f-45a5-b13b-c9ec282e7451


In [9]:
#Log metrics information about the model
metrics = exec.log_output(
    metadata.Metrics(
            name="Fashion MNIST-evaluation",
            description="validating the Fashion MNIST model to recognize fashion clothes",
            owner="luis@luis.com",
            uri="gs://...",
            data_set_id=str(data_set.id),
            model_id=str(model.id),
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": 0.95},
            labels={"mylabel": "l1"}))
print("Metrics id is %s" % metrics.id)

Metrics id is 13
