##### Copyright 2022 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License").
<!--
    Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
    distributed with this work for additional information
    regarding copyright ownership.  The ASF licenses this file
    to you under the Apache License, Version 2.0 (the
    "License"); you may not use this file except in compliance
    with the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing,
    software distributed under the License is distributed on an
    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    KIND, either express or implied.  See the License for the
    specific language governing permissions and limitations
    under the License.
-->


# RunInference with Sentenced T5 Model from TensorFlow Hub


In this notebook, we walk through the use of the RunInference transform with a [sentence encoder built on T5 model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally with Interactive Runner.


## Install and import packages.

In [None]:
!pip install apache_beam[gcp]==2.41.0
!pip install apache-beam[interactive]==2.41.0
!pip install tensorflow==2.10.0
!pip install tensorflow_text==2.10.0
!pip install keras==2.10.0
!pip install tfx_bsl==1.10.0
!pip install pillow==8.4.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apache_beam[gcp]==2.41.0
  Downloading apache_beam-2.41.0-cp38-cp38-manylinux2010_x86_64.whl (13.1 MB)
[K     |████████████████████████████████| 13.1 MB 3.2 MB/s 
[?25hCollecting cloudpickle<3,>=2.1.0
  Downloading cloudpickle-2.2.0-py3-none-any.whl (25 kB)
Collecting pyarrow<8.0.0,>=0.15.1
  Downloading pyarrow-7.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)
[K     |████████████████████████████████| 26.7 MB 98.6 MB/s 
Collecting orjson<4.0
  Downloading orjson-3.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (278 kB)
[K     |████████████████████████████████| 278 kB 62.5 MB/s 
Collecting dill<0.3.2,>=0.3.1.1
  Downloading dill-0.3.1.1.tar.gz (151 kB)
[K     |████████████████████████████████| 151 kB 61.0 MB/s 
[?25hCollecting requests<3.0.0,>=2.24.0
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |██████████████████████

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting facets-overview<2,>=1.0.0
  Downloading facets_overview-1.0.0-py2.py3-none-any.whl (24 kB)
Collecting ipython<9,>=8
  Downloading ipython-8.7.0-py3-none-any.whl (761 kB)
[K     |████████████████████████████████| 761 kB 7.1 MB/s 
Collecting timeloop<2,>=1.0.2
  Downloading timeloop-1.0.2.tar.gz (2.9 kB)
Collecting google-cloud-dataproc<3.2.0,>=3.0.0
  Downloading google_cloud_dataproc-3.1.1-py2.py3-none-any.whl (186 kB)
[K     |████████████████████████████████| 186 kB 60.0 MB/s 
[?25hCollecting ipykernel<7,>=6
  Downloading ipykernel-6.18.1-py3-none-any.whl (143 kB)
[K     |████████████████████████████████| 143 kB 53.5 MB/s 
Collecting tornado>=6.1
  Downloading tornado-6.2-cp37-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (423 kB)
[K     |████████████████████████████████| 423 kB 57.4 MB/s 
Collecting nest-asyncio
  Downloading 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.10.0
  Downloading tensorflow-2.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (578.1 MB)
[K     |████████████████████████████████| 578.1 MB 7.5 kB/s 
Collecting tensorflow-estimator<2.11,>=2.10.0
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl (438 kB)
[K     |████████████████████████████████| 438 kB 68.4 MB/s 
Collecting keras<2.11,>=2.10.0
  Downloading keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 44.3 MB/s 
[?25hCollecting tensorboard<2.11,>=2.10
  Downloading tensorboard-2.10.1-py3-none-any.whl (5.9 MB)
[K     |████████████████████████████████| 5.9 MB 40.1 MB/s 
Collecting flatbuffers>=2.0
  Downloading flatbuffers-22.11.23-py2.py3-none-any.whl (26 kB)
Installing collected packages: tensorflow-estimator, tensorboard, keras, flatbuffers, tensorflow
  Attempting uninstall: tensorf

In [None]:
import os
import importlib

import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text

from tensorflow import keras

from typing import Any
from typing import Dict
from typing import Iterable
from typing import Optional
from typing import Sequence

import apache_beam as beam
import apache_beam.runners.interactive.interactive_beam as ib

from apache_beam.ml.inference.base import RunInference
from apache_beam.ml.inference.base import ModelHandler
from apache_beam.ml.inference.base import PredictionResult
from apache_beam.internal import pickler
from apache_beam.runners.runner import PipelineResult
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner

from tfx_bsl.public.beam.run_inference import CreateModelHandler
from tfx_bsl.public.proto import model_spec_pb2

2022-11-28 21:06:38.334716: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-28 21:06:38.619452: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-28 21:06:39.677456: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2022-11-28 21:06:39.678648: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinf

## Create a Keras Model from TensorFlow Hub Image

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
PROJECT_ID = '<Project Id>'
GCS_BUCKET = '<GCS Bucket>'

MODEL_PATH = f'{GCS_BUCKET}/st5-base/1'

In [None]:
inp = tf.keras.layers.Input(shape=[], dtype=tf.string, name='input')
hub_url = "https://tfhub.dev/google/sentence-t5/st5-base/1"
imported = hub.KerasLayer(hub_url)
outp = imported(inp)
model = tf.keras.Model(inp, outp)

# Sentenced-T5 model returns a 768-dimensional vector for an English text input.
# Note the 'input' that we will pass in as example's feature key name.
model.summary()

2022-11-28 21:06:57.673124: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-11-28 21:06:57.673231: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c005624a5bf6): /proc/driver/nvidia/version does not exist
2022-11-28 21:06:57.674772: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None,)]                 0         
                                                                 
 keras_layer (KerasLayer)    [(None, 768)]             0         
                                                                 
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


## Save the model with a TF function definition for RunInference()

In [None]:
RAW_DATA_PREDICT_SPEC = {
    'input': tf.io.FixedLenFeature([], tf.string),
}

@tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
def call(serialized_examples):
    features = tf.io.parse_example(serialized_examples, RAW_DATA_PREDICT_SPEC)
    return model(features)

tf.saved_model.save(model, MODEL_PATH, signatures={'serving_default': call})

## Create and test a RunInference pipeline locally

In [None]:
# Creates TensorFlow Example to feed to the ModelHandler.
class ExampleProcessor:
    def create_example(self, feature: tf.string):
        return tf.train.Example(
            features=tf.train.Features(
                  feature={'input' : self.create_feature(feature)})
            )

    def create_feature(self, element: tf.string):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[element.encode()], ))


In [None]:
saved_model_spec = model_spec_pb2.SavedModelSpec(model_path=MODEL_PATH)
inferece_spec_type = model_spec_pb2.InferenceSpecType(saved_model_spec=saved_model_spec)
model_handler = CreateModelHandler(inferece_spec_type)

questions = [
    'what is the official slogan for the 2018 winter olympics?',
]

pipeline = beam.Pipeline(InteractiveRunner())

inference = (pipeline | 'CreateSentences' >> beam.Create(questions)
               | 'Convert input to Tensor' >> beam.Map(lambda x: ExampleProcessor().create_example(x))
               | 'RunInference with T5' >> RunInference(model_handler))

In [None]:
ib.show(inference)