In [1]:
try:
    import google.colab  # noqa: F401

    %pip install -q dataeval[onnx] maite-datasets opencv-python-headless
except Exception:
    pass

In [2]:
import os

import cv2
import numpy as np
import requests
from maite_datasets.object_detection import VOCDetection

from dataeval import Embeddings
from dataeval.encoders import OnnxEncoder
from dataeval.selection import Limit, Select
from dataeval.utils.onnx import to_encoding_model

In [3]:
def download_onnx_model(url, save_path):
    """Downloads the ONNX model if it doesn't exist locally."""
    if os.path.exists(save_path):
        print(f"Model already exists at {save_path}")
        return

    print(f"Downloading model from {url}...")
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                f.write(chunk)
        print("Download complete.")
    else:
        raise Exception(f"Failed to download model. Status code: {response.status_code}")

In [4]:
# Download and prepare the model
model_url = "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx"
model_path = "data/resnet50-v2-7.onnx"

download_onnx_model(model_url, model_path)

# Find the embedding layer and create an in-memory model that outputs it

encoding_model, embedding_layer = to_encoding_model(model_path)
print(f"Embedding layer: {embedding_layer}")
print(f"In-memory encoding model: ({len(encoding_model):,} bytes)")

Downloading model from https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx...


Download complete.


Embedding layer: resnetv24_flatten0_reshape0
In-memory encoding model: (102,452,438 bytes)


In [5]:
# Define transforms for ResNet50 input requirements
def preprocess(image: np.ndarray) -> np.ndarray:
    """Preprocess image for ResNet50: CHW->HWC, resize, normalize, HWC->CHW."""
    hwc = image.transpose(1, 2, 0)  # Transpose to HWC
    resized = cv2.resize(hwc, (224, 224))  # Resize using standard bi-linear interpolation
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    normalized = (resized.astype(np.float32) / 255.0 - mean) / std  # Normalize
    chw = normalized.transpose(2, 0, 1)  # Transpose back to CHW
    return chw

In [6]:
# Load VOC dataset
dataset = VOCDetection(root="./data", image_set="val", year="2012", download=True)
print(f"Dataset size: {len(dataset)} images")

Dataset size: 5823 images


In [7]:
# Create the encoder with our in-memory embedding model
encoder = OnnxEncoder(
    model=encoding_model,
    batch_size=16,
    transforms=preprocess,
    output_name=embedding_layer,  # Specify which output to use
)

print(encoder)

OnnxEncoder(model=<102452438 bytes>, batch_size=16, output_name='resnetv24_flatten0_reshape0')


In [8]:
# Generate embeddings using the Embeddings class
# We'll use a subset for demonstration
subset = Select(dataset, Limit(100))
embeddings = Embeddings(subset, encoder=encoder)

print(f"Embeddings shape: {embeddings.shape}")

[0;93m2026-02-04 08:16:24.630077382 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card0/device/vendor"[m


Embeddings shape: (100, 2048)


In [9]:
### TEST ASSERTION CELL ###
assert embeddings.shape[0] == 100
assert embeddings.shape[1] == 2048