In [1]:
try:
    from tfx import v1 as tfx
    
    if tfx.__version__ != "1.4.0":
        raise ModuleNotFoundError
except ModuleNotFoundError:
    !pip install tfx==1.4.0

In [4]:
import tensorflow as tf

from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

%load_ext tfx.orchestration.experimental.interactive.notebook_extensions.skip

print('TFX version: {}'.format(tfx.__version__))
print('TensorFlow version: {}'.format(tf.__version__))

The tfx.orchestration.experimental.interactive.notebook_extensions.skip extension is already loaded. To reload it, use:
  %reload_ext tfx.orchestration.experimental.interactive.notebook_extensions.skip
TFX version: 1.4.0
TensorFlow version: 2.6.2


In [5]:
import os
from absl import logging

logging.set_verbosity(logging.INFO)

PIPELINE_NAME = "iris"
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

DATA_ROOT = os.path.join("data")
os.makedirs(DATA_ROOT, exist_ok=True)

# Retrieve data

In [6]:
import urllib.request

_data_url = 'https://datahub.io/machine-learning/iris/r/iris.csv'
_data_filepath = os.path.join(DATA_ROOT, "data.csv")

urllib.request.urlretrieve(_data_url, _data_filepath)

!head {_data_filepath}

sepallength,sepalwidth,petallength,petalwidth,class
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa


# Create TFX pipeline and run it

In [7]:
_module_file = 'iris_utils.py'

In [8]:
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     module_file: str, serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
    # Brings data into the pipeline or otherwise joins/converts training data.
    example_gen = tfx.components.CsvExampleGen(
        input_base=data_root)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = tfx.components.StatisticsGen(
        examples=example_gen.outputs['examples'])

    # Generate a schema based on your data statistics.
    # A schema defines the expected bounds, types, and properties of the features in your dataset.
    schema_gen = tfx.components.SchemaGen(
        statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = tfx.components.ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Transforms input data using preprocessing_fn in the 'module_file'.
    transform = tfx.components.Transform(
        examples=example_gen.outputs['examples'],
        schema=schema_gen.outputs['schema'],
        materialize=False,
        module_file=module_file)

    # Uses user-provided Python function that trains a model.
    trainer = tfx.components.Trainer(
        module_file=module_file,
        examples=example_gen.outputs['examples'],
        schema=schema_gen.outputs['schema'],
        transform_graph=transform.outputs['transform_graph'],
        train_args=tfx.proto.TrainArgs(num_steps=2000),
        eval_args=tfx.proto.EvalArgs(num_steps=5))

    # Pushes the model to a filesystem destination.
    pusher = tfx.components.Pusher(
        model=trainer.outputs['model'],
        push_destination=tfx.proto.PushDestination(filesystem=tfx.proto.PushDestination
                                                   .Filesystem(base_directory=serving_model_dir))
    )

    components = [
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        trainer,
        pusher,
    ]

    return tfx.dsl.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=tfx.orchestration.metadata.sqlite_metadata_connection_config(metadata_path),
        components=components)

In [9]:
pipeline = _create_pipeline(
    pipeline_name=PIPELINE_NAME,
    pipeline_root=PIPELINE_ROOT,
    data_root=DATA_ROOT,
    module_file=_module_file,
    serving_model_dir=SERVING_MODEL_DIR,
    metadata_path=METADATA_PATH
)

tfx.orchestration.LocalDagRunner().run(pipeline)

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying iris_utils.py -> build/lib
installing to /tmp/tmpma4kdwul
running install
running install_lib
copying build/lib/iris_utils.py -> /tmp/tmpma4kdwul
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
Copying tfx_user_code_Transform.egg-info to /tmp/tmpma4kdwul/tfx_user_code_Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3.8.egg-info
running install_scripts
creating /tmp/tmpma4kdwul/tfx_user_code_Transform-0.0+12d3fc749eb533579



running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying iris_utils.py -> build/lib
installing to /tmp/tmp0osxlvzj
running install
running install_lib
copying build/lib/iris_utils.py -> /tmp/tmp0osxlvzj
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /tmp/tmp0osxlvzj/tfx_user_code_Trainer-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3.8.egg-info
running install_scripts
creating /tmp/tmp0osxlvzj/tfx_user_code_Trainer-0.0+12d3fc749eb5335791029dbaf090874cd8450

I1117 11:06:11.575806 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:11.595890 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:11.615383 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:11.635632 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:11.676694 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type


I1117 11:06:12.700177 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:14.899238 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:14.938690 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:14.983157 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:15.136801 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type


Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7
Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7
Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7
Instructions for updating:
Use ref() instead.


2021-11-17 11:06:18.732832: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-17 11:06:18.745466: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-17 11:06:18.746425: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-17 11:06:18.747518: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/3b357c41360f4c278f4662f5a8aad575/assets


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/3b357c41360f4c278f4662f5a8aad575/assets


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/203ca96a2d7542bf95958a2af8e3dd6f/assets


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/203ca96a2d7542bf95958a2af8e3dd6f/assets


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.
I1117 11:06:26.897055 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:26.924156 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:26.968791 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type


Processing ./pipelines/iris/_wheels/tfx_user_code_Trainer-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7-py3-none-any.whl
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+12d3fc749eb5335791029dbaf090874cd84509177f1f61319fac201421b132e7
INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.
2021-11-17 11:06:29.049883: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-11-17 11:06:29.049927: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-11-17 11:06:29.049948: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1614] Profiler found 1 GPUs
2021-11-17 11:06:29.303451: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-11-17 11:06:29.305120: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1748] CUPTI activity buffer flushed


   1/2000 [..............................] - ETA: 23:04 - loss: 1.1805 - sparse_categorical_accuracy: 0.3500

2021-11-17 11:06:30.099749: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-11-17 11:06:30.099792: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.


  37/2000 [..............................] - ETA: 24s - loss: 0.6431 - sparse_categorical_accuracy: 0.7270

2021-11-17 11:06:30.378906: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-11-17 11:06:30.379857: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1748] CUPTI activity buffer flushed
2021-11-17 11:06:30.401350: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:673]  GpuTracer has collected 86 callback api events and 83 activity events. 
2021-11-17 11:06:30.403387: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-11-17 11:06:30.406448: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: pipelines/iris/Trainer/model_run/6/train/plugins/profile/2021_11_17_11_06_30

2021-11-17 11:06:30.408397: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to pipelines/iris/Trainer/model_run/6/train/plugins/profile/2021_11_17_11_06_30/EkE-Legion5.trace.json.gz
2021-11-17 11:06:30.412833: I tensorflow/core/profiler/rpc/client

INFO:tensorflow:Assets written to: pipelines/iris/Trainer/model/6/Format-Serving/assets


INFO:tensorflow:Assets written to: pipelines/iris/Trainer/model/6/Format-Serving/assets
I1117 11:06:38.235312 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:38.244904 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:38.283003 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1117 11:06:38.331022 14100 rdbms_metadata_access_object.cc:686] No property is defined for the Type


# Visualize pipeline

In [12]:
from tfx.orchestration.portable.mlmd import execution_lib
from tfx.orchestration.metadata import Metadata
from tfx.orchestration.experimental.interactive import visualizations
from ml_metadata.proto import metadata_store_pb2


def get_latest_artifact(component_id):
    metadata_connection_config = tfx.orchestration.metadata.sqlite_metadata_connection_config(METADATA_PATH)

    with Metadata(metadata_connection_config) as metadata_handler:
        context = metadata_handler.store.get_context_by_type_and_name('node', f'{PIPELINE_NAME}.{component_id}')
        executions = metadata_handler.store.get_executions_by_context(context.id)
        latest_execution = max(executions, key=lambda e: e.last_update_time_since_epoch)

        return execution_lib.get_artifacts_dict(metadata_handler, latest_execution.id, [metadata_store_pb2.Event.OUTPUT])


def visualize(artifact):
    visualization = visualizations.get_registry().get_visualization(artifact.type_name)
    visualization.display(artifact)


from tfx.orchestration.experimental.interactive import standard_visualizations

standard_visualizations.register_standard_visualizations()

## StatisticsGen

In [13]:
statistic_artifact = get_latest_artifact("StatisticsGen")['statistics'][0]

visualize(statistic_artifact)

## SchemaGen

In [14]:
schema_artifact = get_latest_artifact("SchemaGen")['schema'][0]

visualize(schema_artifact)

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'class',STRING,required,,'class'
'petallength',FLOAT,required,,-
'petalwidth',FLOAT,required,,-
'sepallength',FLOAT,required,,-
'sepalwidth',FLOAT,required,,-


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'class',"'Iris-setosa', 'Iris-versicolor', 'Iris-virginica'"


## ExampleValidator

In [15]:
example_validator_articaft = get_latest_artifact("ExampleValidator")['anomalies'][0]

visualize(example_validator_articaft)

## Transformer

In [16]:
# TODO

## Trainer

In [17]:
model_run_artifact_dir = get_latest_artifact("Trainer")['model_run'][0].uri

%load_ext tensorboard
%tensorboard --logdir {model_run_artifact_dir}