In [1]:
from datasets import load_from_disk
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
import os
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import json

In [2]:
HF_DATASET_DIR = "/workspaces/ASE-Model-Retrieval/data/imagenet/.cache/hf_datasets"
MODELS_CSV_PATH = "/workspaces/ASE-Model-Retrieval/models/model-list.csv"
LABEL_MAP_PATH = "/workspaces/ASE-Model-Retrieval/data/imagenet/label_mapping.json"

In [3]:
hf_dataset_dict = load_from_disk(HF_DATASET_DIR)
hf_dataset_dict

DatasetDict({
    subclass_Appenzeller-Ibizan_hound-Scottish_deerhound-West_Highland_white_terrier-baseball: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_Sealyham_terrier-bloodhound-dipper-tench-white_wolf: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_German_short_haired_pointer-Leonberg-Maltese-husky-pickup_truck: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_beer_bottle-giant_schnauzer-golden_retriever-tiger-white_wolf: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_Norwegian_elkhound-Rhodesian_ridgeback-fire_engine-indigo-toy_terrier: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_Irish_terrier-Shetland_sheepdog-dingo-miniature_pinscher-softcoated_wheaten_terrier: Dataset({
        features: ['image', 'label'],
        num_rows: 250
    })
    subclass_Scottie-golden

In [4]:
models_df = pd.read_csv(MODELS_CSV_PATH)
models_df

Unnamed: 0,model-id,hf-link,base-model,dataset
0,timm/mobilenetv3_small_100.lamb_in1k,https://huggingface.co/timm/mobilenetv3_small_...,,
1,timm/resnet50.a1_in1k,https://huggingface.co/timm/resnet50.a1_in1k,,
2,timm/vit_base_patch16_clip_224.openai,https://huggingface.co/timm/vit_base_patch16_c...,,
3,timm/inception_v3.tv_in1k,https://huggingface.co/timm/inception_v3.tv_in1k,,
4,timm/swinv2_large_window12to16_192to256.ms_in2...,https://huggingface.co/timm/swinv2_large_windo...,,
...,...,...,...,...
95,timm/focalnet_small_lrf.ms_in1k,https://huggingface.co/timm/focalnet_small_lrf...,,
96,timm/darknetaa53.c2ns_in1k,https://huggingface.co/timm/darknetaa53.c2ns_in1k,,
97,timm/tiny_vit_21m_384.dist_in22k_ft_in1k,https://huggingface.co/timm/tiny_vit_21m_384.d...,,
98,timm/regnetz_c16.ra3_in1k,https://huggingface.co/timm/regnetz_c16.ra3_in1k,,


In [11]:
def batch_classification(images, model_id, gpu_id, batch_size=128):
    print(f"Batch classification for {model_id}.")
    classifier = pipeline(model=model_id, device=gpu_id)

    with open(LABEL_MAP_PATH, "r") as f:
        label_map = json.load(f)

    predictions = []

    for img_path, pred in zip(
        images, tqdm(classifier(images, batch_size=batch_size), total=len(images))
    ):
        print(f"Image Path: {img_path}")  # Debugging print
        print(f"Raw Prediction: {pred}")  # Debugging print
        corrected_pred = [
            {"label": label_map.get(p["label"], p["label"]), "score": p["score"]}
            for p in pred
        ]
        predictions.append(
            {"image": os.path.basename(img_path), "prediction": corrected_pred}
        )

    return predictions

In [12]:
test_images = hf_dataset_dict["subclass_Afghan_hound-Egyptian-Lhasa-groenendael-mud"][
    "image"
]
model_id = "timm/vit_base_patch16_clip_224.openai"

predictions = batch_classification(test_images, model_id, gpu_id=0)
predictions


Batch classification for timm/vit_base_patch16_clip_224.openai.


Some weights of TimmWrapperModel were not initialized from the model checkpoint at timm/vit_base_patch16_clip_224.openai and are newly initialized: ['timm_model.blocks.0.attn.proj.bias', 'timm_model.blocks.0.attn.proj.weight', 'timm_model.blocks.0.attn.qkv.bias', 'timm_model.blocks.0.attn.qkv.weight', 'timm_model.blocks.0.mlp.fc1.bias', 'timm_model.blocks.0.mlp.fc1.weight', 'timm_model.blocks.0.mlp.fc2.bias', 'timm_model.blocks.0.mlp.fc2.weight', 'timm_model.blocks.0.norm1.bias', 'timm_model.blocks.0.norm1.weight', 'timm_model.blocks.0.norm2.bias', 'timm_model.blocks.0.norm2.weight', 'timm_model.blocks.1.attn.proj.bias', 'timm_model.blocks.1.attn.proj.weight', 'timm_model.blocks.1.attn.qkv.bias', 'timm_model.blocks.1.attn.qkv.weight', 'timm_model.blocks.1.mlp.fc1.bias', 'timm_model.blocks.1.mlp.fc1.weight', 'timm_model.blocks.1.mlp.fc2.bias', 'timm_model.blocks.1.mlp.fc2.weight', 'timm_model.blocks.1.norm1.bias', 'timm_model.blocks.1.norm1.weight', 'timm_model.blocks.1.norm2.bias', 'ti

Image Path: /workspaces/ASE-Model-Retrieval/data/imagenet/.cache/subclass_datasets/Afghan_hound-Egyptian-Lhasa-groenendael-mud/ILSVRC2012_val_00044644_n02088094.JPEG
Raw Prediction: [[[0.3550671935081482, -0.008811945095658302, 2.3144185543060303, 1.452605962753296, -2.941840171813965, -0.10323367267847061, -0.07401008903980255, 0.4826471507549286, -0.29685357213020325, -0.7688724398612976, -0.5645765066146851, 0.015666181221604347, -0.6953496932983398, -0.7283363342285156, 0.8738304376602173, -1.0661613941192627, 0.22611495852470398, 0.019671980291604996, -0.5307816863059998, -0.4820336401462555, -0.2602290213108063, -0.6556744575500488, -0.5446066856384277, -0.5066070556640625, -1.3017550706863403, -0.36254385113716125, -0.06081005558371544, 0.020424237474799156, 0.13680414855480194, -0.012482898309826851, 2.1670238971710205, -0.04915987700223923, 0.6250429749488831, 1.282014012336731, 0.4632066786289215, -2.1047801971435547, 0.43582314252853394, 0.8966407179832458, 0.179470777511596

TypeError: list indices must be integers or slices, not str

In [12]:
def evaluate_model(predictions, dataset):
    ground_truth = {os.path.basename(item["image"]): item["label"] for item in dataset}
    y_true = []
    y_pred = []
    y_pred_top5 = []

    for entry in predictions:
        img_name = entry["image"]
        if img_name in ground_truth:
            true_label = ground_truth[img_name]
            pred_labels = [pred["label"] for pred in entry["prediction"]]

            y_true.append(true_label)
            y_pred.append(pred_labels[0])
            y_pred_top5.append(pred_labels)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    top1_accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="weighted"
    )

    top5_correct = sum(1 for true, preds in zip(y_true, y_pred_top5) if true in preds)
    top5_accuracy = top5_correct / len(y_true)

    class_report = classification_report(y_true, y_pred, output_dict=True)

    return {
        "Top-1 Accuracy": top1_accuracy,
        "Top-5 Accuracy": top5_accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1,
        "Classification Report": class_report,
    }

In [2]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

In [13]:
def process_model(model_id):
    results = []
    for dataset_name, dataset in tqdm(hf_dataset_dict.items(), desc=f"Processing {model_id}", leave=True):
        print(f"Evaluating {model_id} on {dataset_name}")
        predictions = batch_classification(dataset["image"], model_id, gpu_id=0)
        result = evaluate_model(predictions, dataset)
        results_df = pd.DataFrame([{
            "model_id": model_id,
            "dataset": dataset_name,
            **result
        }])

        file_path = f"evaluation_results/{dataset_name}.csv"
        results_df.to_csv(file_path, index=False)
        print(f"Results saved: {file_path}")

    return results

os.makedirs("evaluation_results", exist_ok=True)
models_df["evaluation_results"] = models_df["model-id"].apply(process_model)
log_results = [entry for sublist in models_df["evaluation_results"] for entry in sublist]
results_df = pd.DataFrame(log_results)
results_df.to_csv("model_evaluation_log.csv", index=False)

Processing timm/mobilenetv3_small_100.lamb_in1k:   0%|          | 0/312 [00:00<?, ?it/s]

Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_Appenzeller-Ibizan_hound-Scottish_deerhound-West_Highland_white_terrier-baseball
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 577325.43it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   0%|          | 1/312 [00:01<06:03,  1.17s/it]

Results saved: evaluation_results/subclass_Appenzeller-Ibizan_hound-Scottish_deerhound-West_Highland_white_terrier-baseball.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_Sealyham_terrier-bloodhound-dipper-tench-white_wolf
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 608614.04it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   1%|          | 2/312 [00:08<24:04,  4.66s/it]

Results saved: evaluation_results/subclass_Sealyham_terrier-bloodhound-dipper-tench-white_wolf.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_German_short_haired_pointer-Leonberg-Maltese-husky-pickup_truck
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 522713.56it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   1%|          | 3/312 [00:10<17:53,  3.48s/it]

Results saved: evaluation_results/subclass_German_short_haired_pointer-Leonberg-Maltese-husky-pickup_truck.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_beer_bottle-giant_schnauzer-golden_retriever-tiger-white_wolf
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 453684.49it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   1%|▏         | 4/312 [00:12<15:13,  2.97s/it]

Results saved: evaluation_results/subclass_beer_bottle-giant_schnauzer-golden_retriever-tiger-white_wolf.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_Norwegian_elkhound-Rhodesian_ridgeback-fire_engine-indigo-toy_terrier
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 494498.91it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   2%|▏         | 5/312 [00:14<13:51,  2.71s/it]

Results saved: evaluation_results/subclass_Norwegian_elkhound-Rhodesian_ridgeback-fire_engine-indigo-toy_terrier.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_Irish_terrier-Shetland_sheepdog-dingo-miniature_pinscher-softcoated_wheaten_terrier
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
100%|█████████▉| 249/250 [00:00<00:00, 582477.24it/s]
Processing timm/mobilenetv3_small_100.lamb_in1k:   2%|▏         | 6/312 [00:16<12:50,  2.52s/it]

Results saved: evaluation_results/subclass_Irish_terrier-Shetland_sheepdog-dingo-miniature_pinscher-softcoated_wheaten_terrier.csv
Evaluating timm/mobilenetv3_small_100.lamb_in1k on subclass_Scottie-golden_retriever-leatherback-miniature_pinscher-mud
Batch classification for timm/mobilenetv3_small_100.lamb_in1k.


Device set to use cuda:0
Processing timm/mobilenetv3_small_100.lamb_in1k:   2%|▏         | 6/312 [00:19<16:14,  3.18s/it]


KeyboardInterrupt: 

In [1]:
import os
import pandas as pd

In [2]:
def clean_up_partial_results(model_id):
    eval_results_dir = "/workspaces/ASE-Model-Retrieval/models/evaluation_results"
    for file_name in os.listdir(eval_results_dir):
        file_path = os.path.join(eval_results_dir, file_name)
        if file_name.endswith(".csv"):
            df = pd.read_csv(file_path)
            if "model_id" in df.columns and model_id in df["model_id"].values:
                print(f"Removing previous results for {model_id} in {file_path}")
                df = df[df["model_id"] != model_id]
                df.to_csv(file_path, index=False)
                print(f"Cleaned: {file_path}")

In [6]:
clean_up_partial_results("timm/mobilenetv2_050.lamb_in1k")

Removing previous results for timm/mobilenetv2_050.lamb_in1k in /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_Sealyham_terrier-bloodhound-dipper-tench-white_wolf.csv
Cleaned: /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_Sealyham_terrier-bloodhound-dipper-tench-white_wolf.csv
Removing previous results for timm/mobilenetv2_050.lamb_in1k in /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_Appenzeller-Ibizan_hound-Scottish_deerhound-West_Highland_white_terrier-baseball.csv
Cleaned: /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_Appenzeller-Ibizan_hound-Scottish_deerhound-West_Highland_white_terrier-baseball.csv
Removing previous results for timm/mobilenetv2_050.lamb_in1k in /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_beer_bottle-giant_schnauzer-golden_retriever-tiger-white_wolf.csv
Cleaned: /workspaces/ASE-Model-Retrieval/models/evaluation_results/subclass_beer_bottle-giant_schnauzer-gol