In [5]:
import mlflow
import mlflow.keras
from pyspark.sql.types import ArrayType, StringType
from pyspark.sql.functions import col, struct

from tensorflow import keras
import tensorflow as tf

 ### Move model from TesnorFlow to MLFlow registry

In [None]:
model_path =  "/home/jovyan/dist-tf-model/"

In [None]:
restored_keras_model = tf.keras.models.load_model(model_path)

In [None]:
with mlflow.start_run() as run:
    mlflow.keras.log_model(restored_keras_model, "models")

In [9]:
run_id ="425438f8a7b0471d9413684deeb63deb"
experiment_id = "0"

In [6]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
import pyspark.sql.functions 
from pyspark.sql.types import *

spark = SparkSession \
    .builder \
    .appName("Model inference") \
    .getOrCreate()

### Define mlfloyw.pyfunc wrapper for the Model: 

In [7]:
# TIP: Create custom Python pyfunc model that transforms and predicts on inference data
# Allows the inference pipeline to be independent of the model framework used in training pipeline
class KerasCNNModelWrapper(mlflow.pyfunc.PythonModel):
  def __init__(self, model_path):
    self.model_path = model_path

  def load_context(self, context):
    # Load the Keras-native representation of the MLflow
    # model
    print(self.model_path)
    self.model = mlflow.keras.load_model(
        model_uri=self.model_path)

  def predict(self, context, model_input):
    import tensorflow as tf
    import json

    class_def = {
      0: '212.teapot', 
      1: '234.tweezer', 
      2: '196.spaghetti', 
      3: '249.yo-yo', 
    }

    model_input['origin'] = model_input['origin'].str.replace("dbfs:","/dbfs")
    images = model_input['origin']

    rtn_df = model_input.iloc[:,0:1]
    rtn_df['prediction'] = None
    rtn_df['probabilities'] = None

    for index, row in model_input.iterrows():
      image = np.round(np.array(Image.open(row['origin']).resize((224,224)),dtype=np.float32))
      img = tf.reshape(image, shape=[-1, 224, 224, 3])
      class_probs = self.model.predict(img)
      classes = np.argmax(class_probs, axis=1)
      class_prob_dict = dict()
      for key, val in class_def.items():
        class_prob_dict[val] = np.round(np.float(class_probs[0][int(key)]), 3).tolist()
      rtn_df.loc[index,'prediction'] = classes[0]
      rtn_df.loc[index,'probabilities'] = json.dumps(class_prob_dict)

    return rtn_df[['prediction', 'probabilities']].values.tolist()


In [10]:
model_path = f"file:/home/jovyan/mlruns/{experiment_id}/{run_id}/artifacts/models"
wrappedModel = KerasCNNModelWrapper(model_path)
mlflow.pyfunc.log_model("pyfunc_model_v2", python_model=wrappedModel)
print(f"Inside MLflow Run with run_id `{run_id}` and experiment_id `{experiment_id}`")

Inside MLflow Run with run_id `425438f8a7b0471d9413684deeb63deb` and experiment_id `0`


#### Test the model with mlflow.pyfunc

In [12]:
# Test data. Using the same dataframe in this example
images_df = spark.read.parquet( "images_data/silver/augmented")
model_path = f"file:/home/jovyan/mlruns/{experiment_id}/{run_id}/artifacts/models"

# Always use the Production version of the model from the registry
mlflow_model_path = model_path

# Load model as a Spark UDF.
loaded_model = mlflow.pyfunc.spark_udf(spark, mlflow_model_path, result_type=ArrayType(StringType()))

# Predict on a Spark DataFrame.
scored_df = (images_df
             .withColumn('origin', col("content"))
             .withColumn('my_predictions', loaded_model(struct("origin")))
             .drop("origin"))


In [13]:
scored_df.show(5, truncate=False)

PythonException: 
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 604, in main
    process()
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 596, in process
    serializer.dump_stream(out_iter, outfile)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 273, in dump_stream
    return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 81, in dump_stream
    for batch in iterator:
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 266, in init_stream_yield_batches
    for series in iterator:
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper
    result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr>
    result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 105, in <lambda>
    verify_result_type(f(*a)), len(a[0])), arrow_return_type)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/util.py", line 73, in wrapper
    return f(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/mlflow/pyfunc/__init__.py", line 856, in predict
    result = model.predict(pdf)
  File "/opt/conda/lib/python3.9/site-packages/mlflow/pyfunc/__init__.py", line 608, in predict
    return self._model_impl.predict(data)
  File "/opt/conda/lib/python3.9/site-packages/mlflow/keras.py", line 498, in predict
    predicted = _predict(data)
  File "/opt/conda/lib/python3.9/site-packages/mlflow/keras.py", line 485, in _predict
    predicted = pd.DataFrame(self.keras_model.predict(data.values))
  File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1751, in predict
    tmp_batch_outputs = self.predict_function(iterator)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py", line 885, in __call__
    result = self._call(*args, **kwds)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py", line 933, in _call
    self._initialize(args, kwds, add_initializers_to=initializers)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py", line 759, in _initialize
    self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/function.py", line 3066, in _get_concrete_function_internal_garbage_collected
    graph_function, _ = self._maybe_define_function(args, kwargs)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/function.py", line 3463, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/function.py", line 3298, in _create_graph_function
    func_graph_module.func_graph_from_py_func(
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py", line 1007, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py", line 668, in wrapped_fn
    out = weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "/opt/conda/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py", line 994, in wrapper
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    /opt/conda/lib/python3.9/site-packages/keras/engine/training.py:1586 predict_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.9/site-packages/keras/engine/training.py:1576 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.9/site-packages/keras/engine/training.py:1569 run_step  **
        outputs = model.predict_step(data)
    /opt/conda/lib/python3.9/site-packages/keras/engine/training.py:1537 predict_step
        return self(x, training=False)
    /opt/conda/lib/python3.9/site-packages/keras/engine/base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.9/site-packages/keras/engine/sequential.py:369 call
        return super(Sequential, self).call(inputs, training=training, mask=mask)
    /opt/conda/lib/python3.9/site-packages/keras/engine/functional.py:414 call
        return self._run_internal_graph(
    /opt/conda/lib/python3.9/site-packages/keras/engine/functional.py:550 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /opt/conda/lib/python3.9/site-packages/keras/engine/base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.9/site-packages/keras/engine/functional.py:414 call
        return self._run_internal_graph(
    /opt/conda/lib/python3.9/site-packages/keras/engine/functional.py:550 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /opt/conda/lib/python3.9/site-packages/keras/engine/base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /opt/conda/lib/python3.9/site-packages/keras/engine/input_spec.py:229 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer Conv1 is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: (None, 1)

