In [None]:
# @title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the "License")

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License

# Use RunInference in Apache Beam
You can use Apache Beam versions 2.40.0 and later with the [RunInference API](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.RunInference) for local and remote inference with batch and streaming pipelines.
The RunInference API leverages Apache Beam concepts, such as the `BatchElements` transform and the `Shared` class, to support models in your pipelines that create transforms optimized for machine learning inference.

For more information about the RunInference API, see [Machine Learning](https://beam.apache.org/documentation/sdks/python-machine-learning) in the Apache Beam documentation.

This example demonstrates how to use the RunInference API with three popular ML frameworks: PyTorch, TensorFlow, and scikit-learn. The three pipelines use a text classification model for generating predictions.

Follow these steps to build a pipeline:
* Read the images.
* If needed, preprocess the text.
* Run inference with the PyTorch, TensorFlow, or Scikit-learn model.
* If needed, postprocess the output.

## RunInference with a PyTorch model

This section demonstrates how to use the RunInference API with a PyTorch model.

### Install dependencies

First, download and install the dependencies.

In [None]:
!pip install --upgrade pip
!pip install apache_beam[gcp]>=2.40.0
!pip install transformers
!pip install google-api-core==1.32

### Install the model

This example uses a pretrained text classification model, [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you). This model is a checkpoint of `DistilBERT-base-uncased`, fine-tuned on the SST-2 dataset.


In [None]:
! git lfs install
! git clone https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english
! ls

Error: Failed to call git rev-parse --git-dir --show-toplevel: "fatal: not a git repository (or any of the parent directories): .git\n"
Git LFS initialized.
fatal: destination path 'distilbert-base-uncased-finetuned-sst-2-english' already exists and is not an empty directory.
'=2.40.0'   distilbert-base-uncased-finetuned-sst-2-english   sample_data


### Install helper functions

The model also uses helper functions.

In [None]:
from collections import defaultdict

import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertConfig

import apache_beam as beam
from apache_beam.ml.inference import RunInference
from apache_beam.ml.inference.base import PredictionResult, KeyedModelHandler
from apache_beam.ml.inference.pytorch_inference import PytorchModelHandlerKeyedTensor


class HuggingFaceStripBatchingWrapper(DistilBertForSequenceClassification):
  """Wrapper around HugginFace model because RunInference requires a batch
  as a list of dicts instead of a dict of lists. Another workaround can be found
  here where they disable batching instead.
  https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py"""
  def forward(self, **kwargs):
    output = super().forward(**kwargs)
    return [dict(zip(output, v)) for v in zip(*output.values())]



class Tokenize(beam.DoFn):
  def __init__(self, model_name: str):
    self._model_name = model_name

  def setup(self):
    self._tokenizer = DistilBertTokenizer.from_pretrained(self._model_name)
  
  def process(self, text_input: str):
    # Pad the token tensors to max length to make sure that all of the tensors
    # are of the same length and stack-able by the RunInference API. Normally, you would batch first
    # then tokenize the batch, padding each tensor the max length in the batch.
    # See: https://beam.apache.org/documentation/sdks/python-machine-learning/#unable-to-batch-tensor-elements
    tokens = self._tokenizer(text_input, return_tensors='pt', padding='max_length', max_length=512)
    # Squeeze because tokenization adds an extra dimension, which is empty,
    # in this case because we tokenize one element at a time.
    tokens = {key: torch.squeeze(val) for key, val in tokens.items()}
    return [(text_input, tokens)]

class PostProcessor(beam.DoFn):
  def process(self, tuple_):
    text_input, prediction_result = tuple_
    softmax = torch.nn.Softmax(dim=-1)(prediction_result.inference['logits']).detach().numpy()
    return [{"input": text_input, "softmax": softmax}]

### Run the pipeline

This section demonstrates how to create and run the RunInference pipeline.

In [None]:
inputs = [
    "This is the worst food I have ever eaten",
    "In my soul and in my heart, I’m convinced I’m wrong!",
    "Be with me always—take any form—drive me mad! only do not leave me in this abyss, where I cannot find you!",
    "Do I want to live? Would you like to live with your soul in the grave?",
    "Honest people don’t hide their deeds.",
    "Nelly, I am Heathcliff!  He’s always, always in my mind: not as a pleasure, any more than I am always a pleasure to myself, but as my own being.",
]

In [None]:
model_handler = PytorchModelHandlerKeyedTensor(
    state_dict_path="./distilbert-base-uncased-finetuned-sst-2-english/pytorch_model.bin",
    model_class=HuggingFaceStripBatchingWrapper,
    model_params={"config": DistilBertConfig.from_pretrained("./distilbert-base-uncased-finetuned-sst-2-english/config.json")},
    device='cuda:0')

keyed_model_handler = KeyedModelHandler(model_handler)

with beam.Pipeline() as pipeline:
  _ = (pipeline | "Create inputs" >> beam.Create(inputs)
                | "Tokenize" >> beam.ParDo(Tokenize("distilbert-base-uncased-finetuned-sst-2-english"))
                | "Inference" >> RunInference(model_handler=keyed_model_handler)
                | "Postprocess" >> beam.ParDo(PostProcessor())
                | "Print" >> beam.Map(lambda x: print(f"Input: {x['input']} -> negative={100 * x['softmax'][0]:.4f}%/positive={100 * x['softmax'][1]:.4f}%"))
  )

  # Remove the CWD from sys.path while we load stuff.


  obj = StockUnpickler.load(self)
  obj = StockUnpickler.load(self)


Input: This is the worst food I have ever eaten -> negative=99.9777%/positive=0.0223%
Input: In my soul and in my heart, I’m convinced I’m wrong! -> negative=1.6313%/positive=98.3687%
Input: Be with me always—take any form—drive me mad! only do not leave me in this abyss, where I cannot find you! -> negative=62.1188%/positive=37.8812%
Input: Do I want to live? Would you like to live with your soul in the grave? -> negative=73.6841%/positive=26.3159%
Input: Honest people don’t hide their deeds. -> negative=0.2377%/positive=99.7623%
Input: Nelly, I am Heathcliff!  He’s always, always in my mind: not as a pleasure, any more than I am always a pleasure to myself, but as my own being. -> negative=0.0672%/positive=99.9328%


## RunInference with a TensorFlow model

This section demonstrates how to use the RunInference API with a TensorFlow model.

Note: Tensorflow models are supported through `tfx-bsl`.

### Install dependencies

First, download and install the dependencies.

In [None]:
!pip install --upgrade pip
!pip install google-api-core==1.32
!pip install apache_beam[gcp]==2.41.0
!pip install tensorflow==2.8
!pip install tfx_bsl
!pip install tensorflow-text==2.8.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apache_beam[gcp]==2.41.0
  Downloading apache_beam-2.41.0-cp37-cp37m-manylinux2010_x86_64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: apache_beam
  Attempting uninstall: apache_beam
    Found existing installation: apache-beam 2.42.0
    Uninstalling apache-beam-2.42.0:
      Successfully uninstalled apache-beam-2.42.0
Successfully installed apache_beam-2.41.0
[0m

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.8
  Downloading https://us-python.pkg.dev/colab-wheels/public/tensorflow/tensorflow-2.8.0%2Bzzzcolab20220506162203-cp37-cp37m-linux_x86_64.whl (668.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m668.3/668.3 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.9,>=2.8.0rc0
  Downloading keras-2.8.0-py2.py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
Collecting tf-estimator-nightly==2.8.0.dev2021122109
  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.5/462.5 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorboard<2.9,>=2.8
  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-text==2.8.1
  Downloading tensorflow_text-2.8.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow<2.9,>=2.8.0
  Downloading tensorflow-2.8.3-cp37-cp37m-manylinux2010_x86_64.whl (497.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m497.9/497.9 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.9,>=2.8
  Using cached tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
Collecting tensorflow-estimator<2.9,>=2.8
  Downloading tensorflow_estimator-2.8.0-py2.py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.3/462.3 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.9,>=2.8.0rc0
  Using cached keras-2.8.0-py2.py3-none-any.whl (1.4 M

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_text as text
from scipy.special import expit

import apache_beam as beam
import tfx_bsl
from tfx_bsl.public.beam import RunInference
from tfx_bsl.public import tfxio
from tfx_bsl.public.proto import model_spec_pb2
from tfx_bsl.public.tfxio import TFExampleRecord
from tensorflow_serving.apis import prediction_log_pb2

### Install the model

Download a pretrained binary classifier to perform sentiment analysis on an IMDB dataset from Google Cloud Storage.
This model was trained by following this [TensorFlow text classification tutorial](https://www.tensorflow.org/tutorials/keras/text_classification).

In [None]:
model_dir = "gs://apache-beam-testing-ml-examples/imdb_bert"

### Install helper functions

The model also uses helper functions.

In [None]:
class ExampleProcessor:
  """
  Process the raw text input to a format suitable for RunInference.
  TensorFlow model handler expects a serialized tf.Example as input
  """
  def create_example(self, feature):
    return tf.train.Example(
        features=tf.train.Features(
              feature={'x' : self.create_feature(feature)})
        )

  def create_feature(self, element):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[element]))

class PredictionProcessor(beam.DoFn):
   """
   Process the RunInference output to return the input text and the softmax probability
   """
   def process(
           self,
           element: prediction_log_pb2.PredictionLog):
       predict_log = element.predict_log
       input_value = tf.train.Example.FromString(predict_log.request.inputs['text'].string_val[0])
       output_value = predict_log.response.outputs
      #  print(output_value)
       yield (f"input is [{input_value.features.feature['x'].bytes_list.value}] output is {expit(output_value['classifier'].float_val)}")

### Prepare the input

This section demonstrates how to prepare the input for your model.

In [None]:
inputs = np.array([
    b"this is such an amazing movie",
    b"The movie was great",
    b"The movie was okish",
    b"The movie was terrible"
])

In [None]:
input_strings_file = 'input_strings.tfrecord'

# Because RunInference is expecting a serialized tf.example as an input, preprocess the input.
# Write the processed input to a file. 
# You can also do this preprocessing as a pipeline step by using beam.Map().

with tf.io.TFRecordWriter(input_strings_file) as writer:
 for i in inputs:
   example = ExampleProcessor().create_example(feature=i)
   writer.write(example.SerializeToString())

### Run the pipeline

This section demonstrates how to create and run the RunInference pipeline.

In [None]:
saved_model_spec = model_spec_pb2.SavedModelSpec(model_path=model_dir)
inference_spec_type = model_spec_pb2.InferenceSpecType(saved_model_spec=saved_model_spec)

# A Beam I/O that reads a file of serialized tf.Examples
tfexample_beam_record = TFExampleRecord(file_pattern='input_strings.tfrecord')

with beam.Pipeline() as pipeline:
    _ = ( pipeline | "Create Input PCollection" >> tfexample_beam_record.RawRecordBeamSource()
                   | "Do Inference" >> RunInference(model_spec_pb2.InferenceSpecType(
                                  saved_model_spec=model_spec_pb2.SavedModelSpec(model_path=model_dir)))
                   | "Post Process" >> beam.ParDo(PredictionProcessor())
                   | beam.Map(print)
        )


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


input is [[b'this is such an amazing movie']] output is [0.99906057]
input is [[b'The movie was great']] output is [0.99307914]
input is [[b'The movie was okish']] output is [0.03274685]
input is [[b'The movie was terrible']] output is [0.00680008]


## RunInference with scikit-learn

This section demonstrates how to use the RunInference API with scikit-learn.

### Install dependencies

First, download and install the dependencies.

In [None]:
!pip install --upgrade pip
!pip install google-api-core==1.32
!pip install apache_beam[gcp]==2.41.0

In [None]:
import pickle

import apache_beam as beam
from apache_beam.ml.inference import RunInference
from apache_beam.ml.inference.sklearn_inference import SklearnModelHandlerNumpy, ModelFileType

### Install the model

To classify movie reviews as either positive or negative, train and save a sentiment analysis pipeline about movie reviews.

This model was trained by following this [scikit-learn tutorial](https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html#exercise-2-sentiment-analysis-on-movie-reviews)

In [None]:
model_dir = "gs://apache-beam-testing-ml-examples/sklearn-text-classification/sklearn_sentiment_analysis_pipeline.pkl"

### Run the pipeline

This section demonstrates how to create and run the RunInference pipeline.

In [None]:
inputs = [
    "In my soul and in my heart, I’m convinced I’m wrong!",
    "Be with me always—take any form—drive me mad! only do not leave me in this abyss, where I cannot find you!",
    "Do I want to live? Would you like to live with your soul in the grave?",
    "Honest people don’t hide their deeds.",
    "Nelly, I am Heathcliff!  He’s always, always in my mind: not as a pleasure, any more than I am always a pleasure to myself, but as my own being.",
]

In [None]:
# Choose an sklearn model handler based on the input data type:
# 1. SklearnModelHandlerNumpy: For using numpy arrays as input.
# 2. SklearnModelHandlerPandas: For using pandas dataframes as input.

# The sklearn model handler supports loading two serialized formats:
# 1. ModelFileType.PICKLE: For models saved using pickle.
# 2. ModelFileType.JOBLIB: For models saved using Joblib.

model_handler = SklearnModelHandlerNumpy(model_uri=model_dir, model_file_type=ModelFileType.PICKLE)

with beam.Pipeline() as pipeline:
  _ = (pipeline | "Create inputs" >> beam.Create(inputs)
                | "Inference" >> RunInference(model_handler=model_handler)
                | "Print" >> beam.Map(lambda x: print(f"input: {x.example} -> {'positive' if x.inference == 0 else 'negative'}"))
  )

input: In my soul and in my heart, I’m convinced I’m wrong! -> negative
input: Be with me always—take any form—drive me mad! only do not leave me in this abyss, where I cannot find you! -> positive
input: Do I want to live? Would you like to live with your soul in the grave? -> positive
input: Honest people don’t hide their deeds. -> negative
input: Nelly, I am Heathcliff!  He’s always, always in my mind: not as a pleasure, any more than I am always a pleasure to myself, but as my own being. -> negative
