In [6]:
import tensorflow as tf
import numpy as np
from imagenet2012_utils import ImageNetDataset
import config
from huggingface_hub import hf_hub_download
import shutil
import os
import tarfile

In [None]:
# download ImageNet-2012 dataset

repo_id = "jack-perlo/ILSVRC-2012"
filename = "ILSVRC2012.tar.gz"
local_dir = "./datasets_data/imagenet2012_compressed"
images_tar_path = './datasets_data/imagenet2012_compressed/ILSVRC2012.tar.gz'
imgs_extract_dir = './datasets_data/' # 50k validation images and respective labels

os.makedirs(local_dir, exist_ok=True)
os.makedirs(imgs_extract_dir, exist_ok=True)

if os.listdir(local_dir):
  print(f"✅ {local_dir} already contains data. Skipping download and extraction.")
else: 
  print(f"🚧 Downloading 50k compressed validation images from Imagenet2012 dataset from Hugging Face Hub into {local_dir} ...")
  # Download the model file
  local_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset")
  # Copy to the target directory
  shutil.copy(local_path, os.path.join(local_dir, filename))
  print(f"✅ 50k compressed validation images from ImageNet2012 saved to: {local_dir}/{filename}")

  print(f"\n🚧 Starting decompression from {local_dir}/{filename} into {imgs_extract_dir} ")
  with tarfile.open(images_tar_path, 'r:gz') as imgs_tar:
    imgs_tar.extractall(path=imgs_extract_dir)
  print(f"✅ Decompression complete. Extracted images and labels saved in: {imgs_extract_dir}")

🚧 Downloading 50k compressed validation images from Imagenet2012 dataset from Hugging Face Hub into ./datasets_data/imagenet2012_compressed ...


ILSVRC2012.tar.gz:   0%|          | 0.00/6.67G [00:00<?, ?B/s]

✅ 50k compressed validation images from ImageNet2012 saved to: ./datasets_data/imagenet2012_compressed/ILSVRC2012.tar.gz

🚧 Starting decompression from ./datasets_data/imagenet2012_compressed/ILSVRC2012.tar.gz into ./datasets_data/imagenet2012/ 
✅ Decompression complete. Extracted images and labels saved in: ./datasets_data/imagenet2012/


In [7]:
"""
  Get the numpy array of images for the quantized models.
  Eventually, the images are quantized to the specified quantization type
  and saved in the specified path.

  Args:
    input_images: numpy.ndarray
      The float32 input images to be processed.
    model: tf.lite.Interpreter
      The model to be used for retrieving quantization parameters.
    quantization_type: str ["int8", "uint8", "int16"]
      The quantization type to be used for quantizing the processed images.

  Returns:  
    numpy.ndarray
      The images quantized to the specified quantization type.
"""
def pre_process_images_for_quantized_models(input_images, 
                                            model: tf.lite.Interpreter, 
                                            quantization_type: str):
  if not isinstance(input_images, np.ndarray):
    raise TypeError("dataset images expected to be of type numpy.ndarray")
  
  input_details = model.get_input_details()[0]
  input_scale, input_zero_point = input_details["quantization"]
  
  if quantization_type == 'int8':
    quantized_images = tf.cast(input_images/ input_scale + input_zero_point, tf.int8)
  elif quantization_type == 'uint8':
    quantized_images = tf.cast(input_images/ input_scale + input_zero_point, tf.uint8)
  elif quantization_type == 'int16' or quantization_type == 'fp32':
    return input_images
  else:
    raise ValueError("quantization type not supported")
  
  return quantized_images

In [3]:
for attr in dir(config):
  if attr.isupper():
    value = getattr(config, attr)
    if isinstance(value, str) and ("/" in value or "\\" in value):
      dir_path = os.path.dirname(value)
      if dir_path:
        os.makedirs(dir_path, exist_ok=True)

In [4]:
"""
  Save the IMAGENET2012 labels in the specified path.
"""
def save_imagenet2012_labels():
  np.save(config.VALIDATION_LABELS_PATH, validation_labels)

"""
  Save the first 2000 IMAGENET2012 labels in the specified path.
"""
def save_first_2k_imagenet2012_labels():
  np.save(config.VALIDATION_LABELS_2K_PATH, validation_labels[:2000])

"""
  Save the first 500 IMAGENET2012 labels in the specified path.
"""
def save_first_500_imagenet2012_labels():
  np.save(config.VALIDATION_LABELS_500_PATH, validation_labels[:500])


(_, _) ,\
(_, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='only_labels')

save_imagenet2012_labels()
save_first_2k_imagenet2012_labels()
save_first_500_imagenet2012_labels()

In [None]:
"""
  Save the Imagenet2012 pre-processed and quantized images uint8, int8 for
  ResNet50 quantized models, in the specified paths.
"""

def save_uint8_imagenet2012_x_resnet50_data():
  model = tf.lite.Interpreter(model_path=config.RESNET50_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.RESNET50_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.RESNET50_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
def save_int8_imagenet2012_x_resnet50_data():
  model = tf.lite.Interpreter(model_path=config.RESNET50_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.RESNET50_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.RESNET50_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
(training_images, training_labels) ,\
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='resnet50')

save_uint8_imagenet2012_x_resnet50_data()
save_int8_imagenet2012_x_resnet50_data()

In [None]:
"""
  Save the Imagenet2012 pre-processed and quantized images uint8, int8 for
  ResNet152 quantized models, in the specified paths.
"""

def save_uint8_imagenet2012_x_resnet152_data():
  model = tf.lite.Interpreter(model_path=config.RESNET152_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.RESNET152_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.RESNET152_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
def save_int8_imagenet2012_x_resnet152_data():
  model = tf.lite.Interpreter(model_path=config.RESNET152_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.RESNET152_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.RESNET152_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
(training_images, training_labels) ,\
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='resnet152')

save_uint8_imagenet2012_x_resnet152_data()
save_int8_imagenet2012_x_resnet152_data()

I0000 00:00:1738592138.501825 3116630 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14408 MB memory:  -> device: 0, name: NVIDIA RTX A4000, pci bus id: 0000:01:00.0, compute capability: 8.6
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
RESNET152 uint8 evaluation: 100%|██████████| 10000/10000 [07:25<00:00, 22.45it/s]
RESNET152 uint8 evaluation: 100%|██████████| 2000/2000 [01:29<00:00, 22.43it/s]
RESNET152 uint8 evaluation: 100%|██████████| 500/500 [00:22<00:00, 22.45it/s]
RESNET152 int8 evaluation: 100%|██████████| 10000/10000 [07:24<00:00, 22.50it/s]
RESNET152 int8 evaluation: 100%|██████████| 2000/2000 [01:28<00:00, 22.51it/s]
RESNET152 int8 evaluation: 100%|██████████| 500/500 [00:22<00:00, 22.48it/s]
RESNET152 int16 evaluation: 100%|██████████| 500/500 [15:37<00:00,  1.88s/it]


In [None]:

"""
  Save the Imagenet2012 pre-processed and quantized images uint8, int8 for
  VGG16 quantized models, in the specified paths.
"""

def save_uint8_imagenet2012_x_vgg16_data():
  model = tf.lite.Interpreter(model_path=config.VGG16_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.VGG16_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.VGG16_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])

def save_int8_imagenet2012_x_vgg16_data():
  model = tf.lite.Interpreter(model_path=config.VGG16_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.VGG16_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.VGG16_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])

(training_images, training_labels) ,\
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='vgg16')

save_uint8_imagenet2012_x_vgg16_data()
save_int8_imagenet2012_x_vgg16_data()

VGG16 int16 evaluation: 100%|██████████| 500/500 [21:04<00:00,  2.53s/it]


In [None]:

"""
  Save the imagenet2012 pre-processed and quantized images uint8, int8 for
  MobileNetV1 quantized models, in the specified paths.
"""
def save_uint8_imagenet2012_x_mobilenet_data():
  model = tf.lite.Interpreter(model_path=config.MOBILENET_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.MOBILENET_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.MOBILENET_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
 
def save_int8_imagenet2012_x_mobilenet_data():
  model = tf.lite.Interpreter(model_path=config.MOBILENET_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.MOBILENET_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.MOBILENET_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
(training_images, training_labels) ,\
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='mobilenet')

save_uint8_imagenet2012_x_mobilenet_data()
save_int8_imagenet2012_x_mobilenet_data()

2025-02-04 13:46:02.839176: W tensorflow/core/kernels/data/prefetch_autotuner.cc:52] Prefetch autotuner tried to allocate 6021160192 bytes after encountering the first element of size 6021160192 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
MOBILENET uint8 evaluation: 100%|██████████| 500/500 [00:07<00:00, 63.75it/s]


In [None]:
"""
  Save the imagenet2012 pre-processed and quantized images uint8, int8 for
  MobilenetV2 quantized models, in the specified paths.
"""
def save_uint8_imagenet2012_x_mobilenetV2_data():
  model = tf.lite.Interpreter(model_path=config.MOBILENETV2_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.MOBILENETV2_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.MOBILENETV2_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
def save_int8_imagenet2012_x_mobilenetV2_data():
  model = tf.lite.Interpreter(model_path=config.MOBILENETV2_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.MOBILENETV2_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.MOBILENETV2_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
  
(training_images, training_labels) ,\
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='mobilenetv2')

save_uint8_imagenet2012_x_mobilenetV2_data()
save_int8_imagenet2012_x_mobilenetV2_data()

I0000 00:00:1738335752.090661 3085311 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14308 MB memory:  -> device: 1, name: NVIDIA RTX A4000, pci bus id: 0000:05:00.0, compute capability: 8.6
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
MOBILENETV2 uint8 evaluation: 100%|██████████| 10000/10000 [00:30<00:00, 329.59it/s]
MOBILENETV2 uint8 evaluation: 100%|██████████| 2000/2000 [00:06<00:00, 330.46it/s]
MOBILENETV2 int8 evaluation: 100%|██████████| 10000/10000 [00:29<00:00, 343.11it/s]
MOBILENETV2 int8 evaluation: 100%|██████████| 2000/2000 [00:05<00:00, 345.42it/s]
MOBILENETV2 int16 evaluation: 100%|██████████| 2000/2000 [02:57<00:00, 11.25it/s]


In [None]:
"""
  Save the imagenet2012 pre-processed and quantized images uint8, int8 for
  EfficientnetB0 quantized models, in the specified paths.
"""
def save_uint8_imagenet2012_x_efficientnetB0_data():
  model = tf.lite.Interpreter(model_path=config.EFFICIENTNETB0_U8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'uint8')
  np.save(config.EFFICIENTNETB0_U8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.EFFICIENTNETB0_U8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
def save_int8_imagenet2012_x_efficientnetB0_data():
  model = tf.lite.Interpreter(model_path=config.EFFICIENTNETB0_I8_MODEL_PATH)
  model.allocate_tensors()
  valid_X = pre_process_images_for_quantized_models(validation_images, model, 'int8')
  np.save(config.EFFICIENTNETB0_I8_VALIDATION_SET_PREPROCESSED_PATH, valid_X)
  np.save(config.EFFICIENTNETB0_I8_2K_VALIDATION_SET_PREPROCESSED_PATH, valid_X[:2000])
  
(training_images, training_labels) , \
(validation_images, validation_labels) = \
ImageNetDataset.load_validation_dataset(mode='efficientnetb0')

save_uint8_imagenet2012_x_efficientnetB0_data()
save_int8_imagenet2012_x_efficientnetB0_data()

I0000 00:00:1738751859.385158 3185753 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14370 MB memory:  -> device: 1, name: NVIDIA RTX A4000, pci bus id: 0000:05:00.0, compute capability: 8.6
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
EFFICIENTNETB0 uint8 evaluation: 100%|██████████| 500/500 [00:10<00:00, 46.67it/s]
EFFICIENTNETB0 int8 evaluation: 100%|██████████| 500/500 [00:08<00:00, 58.03it/s]
EFFICIENTNETB0 int16 evaluation: 100%|██████████| 500/500 [02:13<00:00,  3.76it/s]
