In [15]:
import tensorflow as tf
from tfx import v1 as tfx

print('TFX version: {}'.format(tfx.__version__))
print('TensorFlow version: {}'.format(tf.__version__))

TFX version: 1.3.1
TensorFlow version: 2.6.0


In [16]:
import os
from absl import logging

PIPELINE_NAME = "iris"
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

DATA_ROOT = os.path.join("data")

logging.set_verbosity(logging.INFO)

In [17]:
import csv
import urllib.request

_data_url = 'https://forge.scilab.org/index.php/p/rdataset/source/file/master/csv/datasets/iris.csv'
_download_filepath = os.path.join(DATA_ROOT, "iris.csv")
_data_filepath = os.path.join(DATA_ROOT, "data.csv")

urllib.request.urlretrieve(_data_url, _download_filepath)

with open(_download_filepath, 'r') as infile, open(_data_filepath, 'w') as outfile:
    r = csv.reader(infile)
    w = csv.writer(outfile)

    next(r, None)
    w.writerow(["", "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"])

    for row in r:
        w.writerow(row)

os.remove(_download_filepath)

In [18]:
!head {_data_filepath}

,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5,3.6,1.4,0.2,setosa
6,5.4,3.9,1.7,0.4,setosa
7,4.6,3.4,1.4,0.3,setosa
8,5,3.4,1.5,0.2,setosa
9,4.4,2.9,1.4,0.2,setosa


In [19]:
_module_file = 'iris_utils.py'

In [20]:
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     module_file: str, serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
    """Implements the penguin pipeline with TFX."""
    # Brings data into the pipeline or otherwise joins/converts training data.
    example_gen = tfx.components.CsvExampleGen(input_base=data_root)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = tfx.components.StatisticsGen(
        examples=example_gen.outputs['examples'])

    # Generate a schema based on your data statistics.
    # A schema defines the expected bounds, types, and properties of the features in your dataset.
    schema_gen = tfx.components.SchemaGen(
        statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = tfx.components.ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Transforms input data using preprocessing_fn in the 'module_file'.
    transform = tfx.components.Transform(
        examples=example_gen.outputs['examples'],
        schema=schema_gen.outputs['schema'],
        materialize=False,
        module_file=module_file)

    # Uses user-provided Python function that trains a model.
    trainer = tfx.components.Trainer(
        module_file=module_file,
        examples=example_gen.outputs['examples'],

        # Pass transform_graph to the trainer.
        transform_graph=transform.outputs['transform_graph'],

        train_args=tfx.proto.TrainArgs(num_steps=100),
        eval_args=tfx.proto.EvalArgs(num_steps=5))

    # Pushes the model to a filesystem destination.
    pusher = tfx.components.Pusher(
        model=trainer.outputs['model'],
        push_destination=tfx.proto.PushDestination(
            filesystem=tfx.proto.PushDestination.Filesystem(
                base_directory=serving_model_dir)))

    components = [
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        trainer,
        pusher,
    ]

    return tfx.dsl.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=tfx.orchestration.metadata
            .sqlite_metadata_connection_config(metadata_path),
        components=components)

In [21]:
tfx.orchestration.LocalDagRunner().run(
  _create_pipeline(
      pipeline_name=PIPELINE_NAME,
      pipeline_root=PIPELINE_ROOT,
      data_root=DATA_ROOT,
      module_file=_module_file,
      serving_model_dir=SERVING_MODEL_DIR,
      metadata_path=METADATA_PATH))

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Generating ephemeral wheel package for '/home/ekrem/Workplace/Python/tfx_seminar/iris_utils.py' (including modules: ['tfx_visualize', 'iris_constants', 'iris_trainer', 'iris_transform', 'iris_utils']).
INFO:absl:User module package has hash fingerprint version add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb.
INFO:absl:Executing: ['/home/ekrem/.pyenv/versions/3.8.7/envs/tfx/bin/python3.8', '/tmp/tmpoj8z24xe/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmpiy0o39f6', '--dist-dir', '/tmp/tmpprhd49sw']
INFO:absl:Successfully built user code wheel distribution at 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'; target user module is 'iris_utils'.
INFO:absl:Full user modul

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying tfx_visualize.py -> build/lib
copying iris_constants.py -> build/lib
copying iris_trainer.py -> build/lib
copying iris_transform.py -> build/lib
copying iris_utils.py -> build/lib
installing to /tmp/tmpiy0o39f6
running install
running install_lib
copying build/lib/tfx_visualize.py -> /tmp/tmpiy0o39f6
copying build/lib/iris_constants.py -> /tmp/tmpiy0o39f6
copying build/lib/iris_trainer.py -> /tmp/tmpiy0o39f6
copying build/lib/iris_transform.py -> /tmp/tmpiy0o39f6
copying build/lib/iris_utils.py -> /tmp/tmpiy0o39f6
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_u

INFO:absl:Successfully built user code wheel distribution at 'pipelines/iris/_wheels/tfx_user_code_Trainer-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'; target user module is 'iris_utils'.
INFO:absl:Full user module path is 'iris_utils@pipelines/iris/_wheels/tfx_user_code_Trainer-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'
INFO:absl:Using deployment config:
 executor_specs {
  key: "CsvExampleGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.example_gen.csv_example_gen.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "ExampleValidator"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.example_validator.executor.Executor"
    }
  }
}
executor_specs {
  key: "Pusher"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.pusher.executor.Executor"
    }
  }
}
executor_specs {
  key: 

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying tfx_visualize.py -> build/lib
copying iris_constants.py -> build/lib
copying iris_trainer.py -> build/lib
copying iris_transform.py -> build/lib
copying iris_utils.py -> build/lib
installing to /tmp/tmp48teut1l
running install
running install_lib
copying build/lib/tfx_visualize.py -> /tmp/tmp48teut1l
copying build/lib/iris_constants.py -> /tmp/tmp48teut1l
copying build/lib/iris_trainer.py -> /tmp/tmp48teut1l
copying build/lib/iris_transform.py -> /tmp/tmp48teut1l
copying build/lib/iris_utils.py -> /tmp/tmp48teut1l
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_T

INFO:absl:Going to run a new execution: ExecutionInfo(execution_id=1, input_dict={}, output_dict=defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "pipelines/iris/CsvExampleGen/examples/1"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:4360,xor_checksum:1635260718,sum_checksum:1635260718"
  }
}
custom_properties {
  key: "name"
  value {
    string_value: "iris:2021-10-26T17:05:19.130464:CsvExampleGen:examples:0"
  }
}
custom_properties {
  key: "span"
  value {
    int_value: 0
  }
}
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
properties {
  key: "version"
  value: INT
}
)]}), exec_properties={'input_base': 'data', 'output_data_format': 6, 'output_config': '{\n  "split_config": {\n    "splits": [\n      {\n        "hash_buckets": 2,\n        "name": "train"\n      },\n      {\n        "hash_buckets": 1,\n        "na

Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl


INFO:absl:Successfully installed 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'.
INFO:absl:udf_utils.get_fn {'module_file': None, 'module_path': 'iris_utils@pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl', 'stats_options_updater_fn': None} 'stats_options_updater_fn'
INFO:absl:Installing 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/home/ekrem/.pyenv/versions/3.8.7/envs/tfx/bin/python3.8', '-m', 'pip', 'install', '--target', '/tmp/tmpbh6n1msg', 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl']


Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb
Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl


INFO:absl:Successfully installed 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'.
INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.


Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb


INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTe

Processing ./pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl


INFO:absl:Successfully installed 'pipelines/iris/_wheels/tfx_user_code_Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'.


Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb


INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTe

INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/95a22d0c6af943bc9012ac13245570cf/assets


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/95a22d0c6af943bc9012ac13245570cf/assets


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/4df9847faa11473987b1bb1fb0ed402a/assets


INFO:tensorflow:Assets written to: pipelines/iris/Transform/transform_graph/5/.temp_path/tftransform_tmp/4df9847faa11473987b1bb1fb0ed402a/assets


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 5 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'pre_transform_schema': [Artifact(artifact: uri: "pipelines/iris/Transform/pre_transform_schema/5"
custom_properties {
  key: "name"
  value {
    string_value: "iris:2021-10-26T17:05:19.130464:Transform:pre_transform_schema:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.3.1"
  }
}
, artifact_type: name: "Schema"
)], 'updated_analyzer_cache': [Artifact(artifact: uri: "pipelines/iris/Transform/updated_analyzer_cache/5"
custom_properties {
  key: "name"
  value {
    string_value: "iris:2021-10-26T17:05:19.130464:Transform:updated_analyzer_cache:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.3.1"
  }
}
, artifact_type: name: "TransformCache"
)], 'post_transform_stats': [Artifact(art

Processing ./pipelines/iris/_wheels/tfx_user_code_Trainer-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl


INFO:absl:Successfully installed 'pipelines/iris/_wheels/tfx_user_code_Trainer-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb-py3-none-any.whl'.
INFO:absl:Training model.
INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.


Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+add2b00587a6c723d91da23e140fb454fa347dc098328b4d3a22d4a889760fcb
INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.
INFO:absl:Feature Species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature  has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Petal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Sepal.Width has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Model: "model_1"
INFO:absl:__________________________________________________________________________________________________
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:Sepal.Length_xf (InputLayer)    [(None, 1)]          0                                            
INFO:absl:_____________________________________________________________________________________

INFO:tensorflow:Assets written to: pipelines/iris/Trainer/model/6/Format-Serving/assets


INFO:tensorflow:Assets written to: pipelines/iris/Trainer/model/6/Format-Serving/assets
INFO:absl:Training complete. Model written to pipelines/iris/Trainer/model/6/Format-Serving. ModelRun written to pipelines/iris/Trainer/model_run/6
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 6 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "pipelines/iris/Trainer/model/6"
custom_properties {
  key: "name"
  value {
    string_value: "iris:2021-10-26T17:05:19.130464:Trainer:model:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.3.1"
  }
}
, artifact_type: name: "Model"
)], 'model_run': [Artifact(artifact: uri: "pipelines/iris/Trainer/model_run/6"
custom_properties {
  key: "name"
  value {
    string_value: "iris:2021-10-26T17:05:19.130464:Trainer:model_run:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: 