### Chapter 10 - Setting up data and deployment pipelines

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
import numpy as np

In [None]:
# Load the Fashion MNIST dataset
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

In [None]:
# Define the TFRecord file paths
train_filename = '/content/data/fashion_mnist_train.tfrecord'
test_filename = '/content/data/fashion_mnist_test.tfrecord'

In [None]:
# Create a function to serialize the data and write to the TFRecord file
def serialize_example(image, label):
    image_raw = image.numpy().tobytes()

    # Convert the byte string to a list of floats
    image_float = np.frombuffer(image_raw, dtype=np.float32).tolist()

    # Create the feature dictionary
    feature = {
    'image': tf.train.Feature(float_list=tf.train.FloatList(value=image_float)),
    'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label.numpy()])),
    }
    # Create an Example object
    example = tf.train.Example(features=tf.train.Features(feature=feature))

    return example.SerializeToString()

In [None]:
#  tfrecordWriter
def tfrecordWriter(filename,images,labels):
    # Write the training data to the TFRecord file
    with tf.io.TFRecordWriter(filename) as writer:
        for i in range(len(images)):
            image = tf.convert_to_tensor(images[i])
            label = tf.convert_to_tensor(labels[i])
            example_proto = serialize_example(image, label)
            writer.write(example_proto)

In [None]:
# Writing the training examples
tfrecordWriter(train_filename,train_images,train_labels)
# Writing the test examples
tfrecordWriter(test_filename,test_images,test_labels)

#### Tensorflow extended

In [None]:
!pip install tfx
!pip install tensorflow_model_analysis

In [None]:
import os
from typing import List
import tensorflow as tf
import absl
import tensorflow_model_analysis as tfma
from tfx.components import Evaluator
from tfx.components import ExampleValidator
from tfx.components import ImportExampleGen
from tfx.components import Pusher
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Trainer
from tfx.components import Transform
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

In [None]:
_tfx_root = '/content/tfx_root/'
_data_root = '/content/data/'

In [None]:
context = InteractiveContext(pipeline_root=_tfx_root)

##### Data Ingestion

In [None]:
# Define example gen and run the context
example_gen = ImportExampleGen(input_base=_data_root)
context.run(example_gen)

##### Data Validation

In [None]:
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
context.run(statistics_gen)

In [None]:
# Generates schema based on statistics files.
schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
context.run(schema_gen)

In [None]:
# Performs anomaly detection based on statistics and data schema.
example_validator = ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'])
context.run(example_validator)

#### Data Transformations

In [None]:
%%writefile /content/mnist_transform.py

import absl
import tensorflow as tf
import tensorflow_transform as tft

# Defining the keys to access data and labels

IMAGE_KEY = 'image'
LABEL_KEY = 'label'


def transformed_name(key):
    return key + '_xf'

# TFX Transform will call this function.
def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.

    Args:
    inputs: map from feature keys to raw not-yet-transformed features.

    Returns:
    Map from string feature key to transformed feature operations.
    """
    outputs = {}

    # Transformation to the images
    outputs[transformed_name(IMAGE_KEY)] = (tft.scale_by_min_max(inputs[IMAGE_KEY], -0.5, 0.5))
    # Apply transformations to labels if any
    outputs[transformed_name(LABEL_KEY)] = inputs[LABEL_KEY]

    return outputs

In [None]:
_module_file = '/content/mnist_transform.py'

In [None]:
# Performs transformations and feature engineering in training and serving.
transform = Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=_module_file)

In [None]:
context.run(transform)

#### Training

In [None]:
%%writefile /content/mnist_train.py

import tensorflow as tf
import tensorflow_transform as tft
from tfx.components.trainer.fn_args_utils import FnArgs
from typing import List
from tfx.components.trainer.fn_args_utils import DataAccessor
from tfx_bsl.tfxio import dataset_options
import absl

IMAGE_KEY = 'image'
LABEL_KEY = 'label'

def transformed_name(key):
    return key + '_xf'


def _get_serve_tf_examples_fn(model, tf_transform_output):
    """Returns a function that parses a serialized tf.Example."""

    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        """Returns the output to be used in the serving signature."""
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)
        transformed_features = model.tft_layer(parsed_features)
        return model(transformed_features)

    return serve_tf_examples_fn

# Function to generate features and labels for training
def input_fn(file_pattern: List[str],
             data_accessor: DataAccessor,
             tf_transform_output: tft.TFTransformOutput,
             batch_size: int = 128) -> tf.data.Dataset:
    """Generates features and label for tuning/training.

    Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

    Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
    """
    return data_accessor.tf_dataset_factory(
      file_pattern,
      dataset_options.TensorFlowDatasetOptions(
          batch_size=batch_size, label_key=transformed_name(LABEL_KEY)),
      tf_transform_output.transformed_metadata.schema).repeat()

# Model building function
def build_keras_model() -> tf.keras.Model:
    """Creates a DNN Keras model for classifying MNIST data.

    Returns:
    A Keras Model.
    """
    model = tf.keras.Sequential()
    model.add(
      tf.keras.layers.InputLayer(
          input_shape=(196,), name=transformed_name(IMAGE_KEY)))
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(10))
    model.compile(
      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
      metrics=['sparse_categorical_accuracy'])
    model.summary(print_fn=absl.logging.info)
    return model

# Main function for trainer. TFX Trainer will call this function.
def run_fn(fn_args: FnArgs):
    """Train the model based on given args.
    Args:
    fn_args: Holds args used to train the model as name/value pairs.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = input_fn(fn_args.train_files, fn_args.data_accessor,
                                tf_transform_output, 40)
    eval_dataset = input_fn(fn_args.eval_files, fn_args.data_accessor,
                               tf_transform_output, 40)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
    model = build_keras_model()

    # Write logs to path
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=fn_args.model_run_dir, update_freq='epoch')

    model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback])

    signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(
              model, tf_transform_output).get_concrete_function(
                  tf.TensorSpec(shape=[None], dtype=tf.string, name='examples'))
    }
    model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)

In [None]:
# Define the component ID and path to the train module file
component_id = 'Trainer.mnist'
train_module_file = '/content/mnist_train.py'
# Initialise and run the trainer componenet
trainer = Trainer(
        module_file=train_module_file,
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=500),
        eval_args=trainer_pb2.EvalArgs(num_steps=500)).with_id(component_id)
context.run(trainer)

#### Model Evaluation

In [None]:
# Define accuracy threshold
accuracy_threshold: float = 0.8
# Define the evaluation configuration
eval_config = tfma.EvalConfig(
      model_specs=[tfma.ModelSpec(label_key='label')],
      slicing_specs=[tfma.SlicingSpec()],
      metrics_specs=[
          tfma.MetricsSpec(metrics=[
              tfma.MetricConfig(
                  class_name='SparseCategoricalAccuracy',
                  threshold=tfma.MetricThreshold(
                      value_threshold=tfma.GenericValueThreshold(
                          lower_bound={'value': accuracy_threshold})))
          ])
      ])


In [None]:
# Initialise the evaluator
evaluator = Evaluator(
      examples=example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      eval_config=eval_config).with_id('Evaluator.mnist')
# Run the evaluator
context.run(evaluator)

#### Model Deployment

In [None]:
# Create the directory if its not created
!mkdir /content/serving_model

In [None]:
# Define the path for the model serving directory
_serving_model_dir = '/content/serving_model'

In [None]:
# Initialise the pusher
pusher = Pusher(
      model=trainer.outputs['model'],
      model_blessing=evaluator.outputs['blessing'],
      push_destination=pusher_pb2.PushDestination(
          filesystem=pusher_pb2.PushDestination.Filesystem(
              base_directory=_serving_model_dir))).with_id('Pusher.mnist')
# Run the pusher
context.run(pusher)

#### Creating the pipeline

In [None]:
from tfx.orchestration import pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

In [None]:
def create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     train_module_file: str,
                    transform_module_file: str,
                    serving_model_dir: str,
                     metadata_path: str,
                     beam_pipeline_args: List[str],
                     accuracy_threshold: float = 0.8) -> pipeline.Pipeline:


    # Define example gen
    example_gen = ImportExampleGen(input_base=data_root)
    print("Completed example_gen")
    # Computes statistics over data
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
    print("Completed statistics_gen")
    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
    print("Completed schema_gen")
    # Anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(statistics=statistics_gen.outputs['statistics'],schema=schema_gen.outputs['schema'])
    print("Completed example_validator")
    # Transformations and feature engineering
    transform = Transform(examples=example_gen.outputs['examples'],schema=schema_gen.outputs['schema'],module_file=transform_module_file)
    print("Completed transform")
    # Initialise the trainer componenet
    component_id = 'Trainer.mnist'
    trainer = Trainer(
        module_file=train_module_file,
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=500),
        eval_args=trainer_pb2.EvalArgs(num_steps=500)).with_id(component_id)
    print("Completed trainer")
    # Quality validation of the candidate model.
    eval_config = tfma.EvalConfig(
      model_specs=[tfma.ModelSpec(label_key='label')],
      slicing_specs=[tfma.SlicingSpec()],
      metrics_specs=[
          tfma.MetricsSpec(metrics=[
              tfma.MetricConfig(
                  class_name='SparseCategoricalAccuracy',
                  threshold=tfma.MetricThreshold(
                      value_threshold=tfma.GenericValueThreshold(
                          lower_bound={'value': accuracy_threshold})))
          ])
      ])

    # Compute the evaluation statistics over features of a model.
    evaluator = Evaluator(
      examples=example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      eval_config=eval_config).with_id('Evaluator.mnist')
    print("Completed evaluator")


    # Push the model to the destination folder
    pusher = Pusher(
      model=trainer.outputs['model'],
      model_blessing=evaluator.outputs['blessing'],
      push_destination=pusher_pb2.PushDestination(
          filesystem=pusher_pb2.PushDestination.Filesystem(
              base_directory=serving_model_dir))).with_id('Pusher.mnist')
    print("Completed pusher")
    # Define the pipeline
    pipeline_mnist = pipeline.Pipeline(pipeline_name=pipeline_name,pipeline_root=pipeline_root,
      components=[
          example_gen,
          statistics_gen,
          schema_gen,
          example_validator,
          transform,
          trainer,
          evaluator,
          pusher,
      ],
      enable_cache=True,
      metadata_connection_config=metadata.sqlite_metadata_connection_config(
          metadata_path),
      beam_pipeline_args=beam_pipeline_args)
    print("Completed pipeline_mnist")
    # Return the pipeline
    return pipeline_mnist

In [None]:
!mkdir serving_model
!mkdir pipelines
!mkdir metadata

In [None]:
# name of pipeline
pipeline_name = 'mnist_pipeline'
# Path to tfx root
tfx_root = '/content/tfx_root/'
# Path to .tfrecords for the dataset
data_root = '/content/data/'
# Combined module file for transform and trainer
train_module_file = '/content/mnist_train.py'
transform_module_file = '/content/mnist_transform.py'

# Path which can be listened to by the model server. Pusher will output the
# trained model here.
serving_model_dir = '/content/serving_model/'
# Root path for pipelines
pipeline_root = '/content/pipelines/mnist_pipeline'
# Sqlite ML-metadata db path.
metadata_path = '/content/metadata/metadata.db'
# Pipeline arguments for Beam powered Components.
beam_pipeline_args = [
    '--direct_running_mode=multi_processing',
    # 0 means auto-detect based on on the number of CPUs available
    # during execution time.
    '--direct_num_workers=0',
]

In [None]:
# BeamDagRunner for running the pipeline
BeamDagRunner().run(
      create_pipeline(
          pipeline_name=pipeline_name,
          pipeline_root=pipeline_root,
          data_root=data_root,
          train_module_file=train_module_file,
          transform_module_file=transform_module_file,
          serving_model_dir=serving_model_dir,
          metadata_path=metadata_path,
          beam_pipeline_args=beam_pipeline_args))