In [2]:
from datasets import load_from_disk
from transformers import pipeline
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
hf_dataset = load_from_disk("/workspaces/ASE-Model-Retrieval/data/imagenet/.cache/hf_datasets")
classifier = pipeline(
    task="image-classification", 
    model="timm/mobilenetv3_small_100.lamb_in1k", 
    device=0  # Ensure using GPU
)

def classify_batch(batch):
    images = [Image.open(image).convert("RGB") for image in batch["image"]]
    return {"predictions": classifier(images)}

all_results = {}

Device set to use cuda:0


In [4]:
count = 0
for dataset_name, dataset in hf_dataset.items():
    if count == 10:
        break
    print(f"Running inference on {dataset_name} with {len(dataset)} images...")

    # Apply batched inference using `map()`
    results_dataset = dataset.map(classify_batch, batched=True, batch_size=128)

    # Store results in dictionary
    all_results[dataset_name] = results_dataset
    count += 1

Running inference on subclass_EntleBucher-German_shepherd-bluetick-croquet_ball-tench with 250 images...
Running inference on subclass_Border_terrier-Lakeland_terrier-Lhasa-Norfolk_terrier-Sussex_spaniel with 250 images...
Running inference on subclass_Border_collie-Pomeranian-cairn-kelpie-terrapin with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 64.11 examples/s]


Running inference on subclass_English_foxhound-Scottie-baseball-giant_schnauzer-minibus with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 79.66 examples/s]


Running inference on subclass_Norwich_terrier-banana-bloodhound-kite-minivan with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 79.38 examples/s]


Running inference on subclass_Norwich_terrier-Scottie-dingo-minivan-red_wolf with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 71.53 examples/s]


Running inference on subclass_Bouvier-Hungarian_pointer-jackfruit-leopard-miniature_poodle with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 78.58 examples/s]


Running inference on subclass_custard_apple-flat_coated_retriever-minibus-red_wolf-sports_car with 250 images...


Map:   0%|          | 0/250 [00:00<?, ? examples/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Map: 100%|██████████| 250/250 [00:03<00:00, 73.35 examples/s]


Running inference on subclass_Italian_greyhound-Labrador_retriever-Welsh_springer_spaniel-bloodhound-snow_leopard with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 73.76 examples/s]


Running inference on subclass_beaker-brown_bear-groenendael-keeshond-stingray with 250 images...


Map: 100%|██████████| 250/250 [00:03<00:00, 78.43 examples/s]


In [5]:
for item in all_results:
    print(item)

subclass_EntleBucher-German_shepherd-bluetick-croquet_ball-tench
subclass_Border_terrier-Lakeland_terrier-Lhasa-Norfolk_terrier-Sussex_spaniel
subclass_Border_collie-Pomeranian-cairn-kelpie-terrapin
subclass_English_foxhound-Scottie-baseball-giant_schnauzer-minibus
subclass_Norwich_terrier-banana-bloodhound-kite-minivan
subclass_Norwich_terrier-Scottie-dingo-minivan-red_wolf
subclass_Bouvier-Hungarian_pointer-jackfruit-leopard-miniature_poodle
subclass_custard_apple-flat_coated_retriever-minibus-red_wolf-sports_car
subclass_Italian_greyhound-Labrador_retriever-Welsh_springer_spaniel-bloodhound-snow_leopard
subclass_beaker-brown_bear-groenendael-keeshond-stingray
