# Image Preprocessing

Extracts image features from a directory of images using ResNet50,
applies preprocessing compatible with ImageNet-trained models,
and saves the features in compressed NumPy format (.npz).

## Setup and Imports

In [1]:
# Confirm environment
!conda info


     active environment : northeastern
    active env location : /home/curtis/anaconda3/envs/northeastern
            shell level : 2
       user config file : /home/curtis/.condarc
 populated config files : /home/curtis/anaconda3/.condarc
          conda version : 24.9.2
    conda-build version : 24.9.0
         python version : 3.12.7.final.0
                 solver : libmamba (default)
       virtual packages : __archspec=1=skylake
                          __conda=24.9.2=0
                          __glibc=2.39=0
                          __linux=6.6.87.2=0
                          __unix=0=0
       base environment : /home/curtis/anaconda3  (writable)
      conda av data dir : /home/curtis/anaconda3/etc/conda
  conda av metadata url : None
           channel URLs : https://repo.anaconda.com/pkgs/main/linux-64
                          https://repo.anaconda.com/pkgs/main/noarch
                          https://repo.anaconda.com/pkgs/r/linux-64
                          https://r

In [2]:
# Setup autoreload
%load_ext autoreload
%autoreload 2

In [3]:
import os
import numpy as np
from tqdm.notebook import tqdm
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model
from vtt.config import IMAGENET_MEAN, IMAGENET_STD, IMAGE_SIZE
from vtt.utils import detect_and_set_device

2025-07-09 16:14:50.263618: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-09 16:14:50.273192: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752092090.289496   92687 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752092090.294728   92687 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752092090.305893   92687 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [4]:
# Detect and set up GPU or use CPU
device_used = detect_and_set_device()
print(f"TensorFlow is configured to use: {device_used}")

No GPU devices found despite TensorFlow being built with CUDA. Using CPU.
TensorFlow is configured to use: CPU


2025-07-09 16:14:52.356394: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


## Process Images to Extract Features

In [5]:
def preprocess_image(img_path: str) -> np.ndarray:
    """
    Preprocess a single image for ResNet50.

    Args:
        img_path (str): Path to the image file.

    Returns:
        np.ndarray: Preprocessed image tensor with shape (1, 224, 224, 3).
    """
    img = image.load_img(img_path, target_size=IMAGE_SIZE)
    x = image.img_to_array(img) / 255.0
    mean = np.array(IMAGENET_MEAN)
    std = np.array(IMAGENET_STD)
    x = (x - mean) / std
    return np.expand_dims(x, axis=0)

In [6]:
def extract_features_from_directory(image_dir: str) -> dict:
    """
    Extract ResNet50 features for all images in a directory.

    Args:
        image_dir (str): Path to directory containing images.

    Returns:
        dict: Mapping from image filename to 2048-dim feature vector.
    """
    print(f"Extracting features from images in: {image_dir}")

    # Load ResNet50 model (pretrained on ImageNet)
    base_model = ResNet50(weights="imagenet")
    feature_extractor = Model(
        inputs=base_model.input,
        outputs=base_model.get_layer("avg_pool").output
    )

    features = {}
    image_names = sorted(os.listdir(image_dir))

    for img_name in tqdm(image_names, desc="Processing images"):
        img_path = os.path.join(image_dir, img_name)
        try:
            img_tensor = preprocess_image(img_path)
            feature = feature_extractor.predict(img_tensor, verbose=0)
            features[img_name] = feature.squeeze()
        except Exception as e:
            print(f"Skipping {img_name} due to error: {e}")

    return features

In [7]:
def save_features(features: dict, output_path: str) -> None:
    """
    Save extracted features to a compressed .npz file.

    Args:
        features (dict): Dictionary of image features.
        output_path (str): Path to save the .npz file.
    """
    np.savez_compressed(output_path, **features)
    print(f"Saved features to {output_path}")

In [8]:
# Define paths
# images_dir = "../../data/flickr8k_images/"        # Full directory, only run on GPU
images_dir = "../../data/flickr8k_images/Images"  # Subset of 10 images for proof-of-concept
output_file = "../../data/flickr8k_features.npz"

# Extract and save features
features = extract_features_from_directory(images_dir)
save_features(features, output_file)

Extracting features from images in: ../../data/flickr8k_images/Images


Processing images:   0%|          | 0/10 [00:00<?, ?it/s]

Saved features to ../../data/flickr8k_features.npz


In [9]:
# Load saved features
loaded_features = np.load(output_file)
features = {k: loaded_features[k] for k in loaded_features.files}
features

{'10815824_2997e03d76.jpg': array([0.01160281, 0.        , 0.00789151, ..., 6.9319086 , 0.35592726,
        0.        ], dtype=float32),
 '12830823_87d2654e31.jpg': array([0.2208504 , 0.0161181 , 0.24042735, ..., 6.5869427 , 0.        ,
        0.        ], dtype=float32),
 '17273391_55cfc7d3d4.jpg': array([0.        , 0.38793966, 0.23146243, ..., 7.3355575 , 0.7991518 ,
        0.        ], dtype=float32),
 '19212715_20476497a3.jpg': array([0.       , 0.       , 0.       , ..., 3.5574658, 0.       ,
        0.       ], dtype=float32),
 '23445819_3a458716c1.jpg': array([0.00965334, 0.        , 0.50003254, ..., 3.7650876 , 0.5753617 ,
        0.        ], dtype=float32),
 '27782020_4dab210360.jpg': array([0.02738906, 0.08970343, 0.28756386, ..., 7.796936  , 0.5479751 ,
        0.        ], dtype=float32),
 '33108590_d685bfe51c.jpg': array([0.        , 0.01866938, 0.1363944 , ..., 7.4394717 , 0.53011143,
        0.        ], dtype=float32),
 '35506150_cbdb630f4f.jpg': array([0.10631111, 