In [1]:
import tensorflow as tf
import numpy as np
import config
import os

In [2]:
(training_images, training_labels), \
(validation_images, validation_labels) = \
  tf.keras.datasets.mnist.load_data()

In [3]:
"""
  Pre-process the numpy array of images.
  This method scales pixels between [0.0, 1.0], sample-wise.

  Args:
    input_images: numpy.ndarray
      The input images to be pre-processed.

  Returns:  
    numpy.ndarray
      The pre-processed images scaled in the [0.0, 1.0] range.
"""
def pre_process_fp32_images(input_images):
  if not isinstance(input_images, np.ndarray):
    raise TypeError("dataset images expected to be of type numpy.ndarray")
  
  preprocessed_images = tf.pad(input_images, [[0, 0], [2, 2], [2, 2]]) / 255
  preprocessed_images = tf.expand_dims(preprocessed_images, axis=3, name=None)
  return preprocessed_images

"""
  Pre-process the numpy array of images (scaling pixels between [0.0, 1.0]) sample-wise.
  Eventually, the pre-processed images are quantized to the specified quantization type
  and saved in the specified path.

  Args:
    input_images: numpy.ndarray
      The input images to be pre-processed.
    model: tf.lite.Interpreter
      The model to be used for retrieving quantization parameters.
    quantization_type: str ["int8", "uint8", "int16"]
      The quantization type to be used for quantizing the pre-processed images.

  Returns:  
    numpy.ndarray
      The pre-processed images quantized to the specified quantization type.
"""
def pre_process_images_for_quantized_models(input_images, 
                                            model: tf.lite.Interpreter, 
                                            quantization_type: str):
  if not isinstance(input_images, np.ndarray):
    raise TypeError("dataset images expected to be of type numpy.ndarray")
  
  input_details = model.get_input_details()[0]
  input_scale, input_zero_point = input_details["quantization"]
  
  if quantization_type == 'int8':
    quantized_images = tf.cast(pre_process_fp32_images(input_images) / input_scale + input_zero_point, tf.int8)
  elif quantization_type == 'uint8':
    quantized_images = tf.cast(pre_process_fp32_images(input_images) / input_scale + input_zero_point, tf.uint8)
  elif quantization_type == 'int16':
    quantized_images = pre_process_fp32_images(input_images)
  else:
    raise ValueError("quantization type not supported")
  
  return quantized_images

In [4]:
for attr in dir(config):
  if attr.isupper():
    value = getattr(config, attr)
    if isinstance(value, str) and ("/" in value or "\\" in value):
      dir_path = os.path.dirname(value)
      if dir_path:
        os.makedirs(dir_path, exist_ok=True)


In [5]:
"""
  Save the MNIST labels in the specified path.
"""
def save_mnist_labels():
  np.save(config.TRAIN_LABELS_PATH, training_labels)
  np.save(config.VALIDATION_LABELS_PATH, validation_labels)

"""
  Save the first 2000 MNIST labels in the specified path.
"""
def save_first_2k_mnist_labels():
  np.save(config.VALIDATION_LABELS_2K_PATH, validation_labels[:2000])

"""
  Save the first 500 MNIST labels in the specified path.
"""
def save_first_500_mnist_labels():
  np.save(config.VALIDATION_LABELS_500_PATH, validation_labels[:500])

"""
  Save the MNIST preprocessed images (for any fp32 models) in the specified path.
"""
def save_fp32_mnist_data():
  train_X = pre_process_fp32_images(training_images)
  valid_X = pre_process_fp32_images(validation_images)
  np.save(config.FP32_TRAIN_SET_PREPROCESSED_PATH, train_X)
  np.save(config.FP32_VALIDATION_SET_PREPROCESSED_PATH, valid_X)

"""
  Save the MNIST uint8 preprocessed images for lenet5 model in the specified path.
"""
def save_uint8_mnist_x_lenet5_data():
  model = tf.lite.Interpreter(model_path=config.LENET5_U8_MODEL_PATH)
  model.allocate_tensors()
  _train_X = pre_process_images_for_quantized_models(training_images, model, 'uint8')
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.LENET5_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.LENET5_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
"""
  Save the MNIST int8 preprocessed images for lenet5 model in the specified path.
"""
def save_int8_mnist_x_lenet5_data():
  model = tf.lite.Interpreter(model_path=config.LENET5_I8_MODEL_PATH)
  model.allocate_tensors()
  _train_X = pre_process_images_for_quantized_models(training_images, model, 'int8')
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.LENET5_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.LENET5_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])

save_fp32_mnist_data()
save_mnist_labels()
save_first_500_mnist_labels()
save_first_2k_mnist_labels()
save_uint8_mnist_x_lenet5_data()
save_int8_mnist_x_lenet5_data()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
