In [1]:
import numpy as np
import torch
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, classification_report, f1_score
from transformers import ViTForImageClassification, ViTImageProcessor
from datasets import load_dataset
from torch.utils.data import DataLoader

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = load_dataset("uoft-cs/cifar10", split="test")

## ViT classification performance

In [4]:
model_id = "nateraw/vit-base-patch16-224-cifar10"
model = ViTForImageClassification.from_pretrained(model_id)
model.to(device)
model.eval()

processor = ViTImageProcessor.from_pretrained(model_id)

In [5]:
def preprocess_function(examples):
    images = [img.convert("RGB") for img in examples["img"]]
    inputs = processor(images, return_tensors="pt")
    inputs["labels"] = examples["label"]
    return inputs

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    batch_size=32,
    remove_columns=dataset.column_names,
)

In [6]:
tokenized_dataset.set_format("torch")
test_dataloader = DataLoader(tokenized_dataset, batch_size=64, shuffle=False)

In [7]:
all_preds = []
all_labels = []

for batch in tqdm(test_dataloader):
    pixel_values = batch['pixel_values'].to(device)
    labels = batch['labels']

    with torch.no_grad():
        outputs = model(pixel_values)
        logits = outputs.logits

    predictions = torch.argmax(logits, dim=-1).cpu().numpy()

    all_preds.extend(predictions)
    all_labels.extend(labels.numpy())

  0%|          | 0/157 [00:00<?, ?it/s]

In [8]:
y_true = np.array(all_labels)
y_pred = np.array(all_preds)

accuracy = accuracy_score(y_true, y_pred)
print(f'accuracy: {accuracy}')

f1 = f1_score(y_true, y_pred, average='weighted')
print(f"weighted f1: {f1}")

target_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
report = classification_report(y_true, y_pred, target_names=target_names, digits=4)
print(report)

accuracy: 0.9852
weighted f1: 0.9851943763781306
              precision    recall  f1-score   support

    airplane     0.9939    0.9830    0.9884      1000
  automobile     0.9783    0.9910    0.9846      1000
        bird     0.9910    0.9940    0.9925      1000
         cat     0.9735    0.9550    0.9642      1000
        deer     0.9920    0.9870    0.9895      1000
         dog     0.9607    0.9790    0.9698      1000
        frog     0.9901    0.9980    0.9940      1000
       horse     0.9950    0.9910    0.9930      1000
        ship     0.9930    0.9930    0.9930      1000
       truck     0.9849    0.9810    0.9830      1000

    accuracy                         0.9852     10000
   macro avg     0.9852    0.9852    0.9852     10000
weighted avg     0.9852    0.9852    0.9852     10000



## MobileNetv2 classification performance

In [9]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

In [10]:
model_id = "AiresPucrs/Mobilenet-v2-CIFAR-10"
model = AutoModelForImageClassification.from_pretrained(model_id)
model.to(device)
model.eval()

processor = AutoImageProcessor.from_pretrained(model_id)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [11]:
def preprocess_function(examples):
    images = [img.convert("RGB") for img in examples["img"]]
    inputs = processor(images, return_tensors="pt")
    inputs["labels"] = examples["label"]
    return inputs

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    batch_size=32,
    remove_columns=dataset.column_names,
)

In [12]:
tokenized_dataset.set_format("torch")
test_dataloader = DataLoader(tokenized_dataset, batch_size=64, shuffle=False)

In [13]:
all_preds = []
all_labels = []

for batch in tqdm(test_dataloader):
    pixel_values = batch['pixel_values'].to(device)
    labels = batch['labels']

    with torch.no_grad():
        outputs = model(pixel_values)
        logits = outputs.logits

    predictions = torch.argmax(logits, dim=-1).cpu().numpy()

    all_preds.extend(predictions)
    all_labels.extend(labels.numpy())

  0%|          | 0/157 [00:00<?, ?it/s]

In [14]:
y_true = np.array(all_labels)
y_pred = np.array(all_preds)

accuracy = accuracy_score(y_true, y_pred)
print(f'accuracy: {accuracy}')

f1 = f1_score(y_true, y_pred, average='weighted')
print(f"weighted f1: {f1}")

target_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
report = classification_report(y_true, y_pred, target_names=target_names, digits=4)
print(report)

accuracy: 0.9185
weighted f1: 0.918531871509154
              precision    recall  f1-score   support

    airplane     0.9456    0.9040    0.9243      1000
  automobile     0.9267    0.9740    0.9498      1000
        bird     0.8557    0.9370    0.8945      1000
         cat     0.8478    0.8520    0.8499      1000
        deer     0.9442    0.8800    0.9110      1000
         dog     0.8862    0.8800    0.8831      1000
        frog     0.9361    0.9520    0.9440      1000
       horse     0.9600    0.9360    0.9478      1000
        ship     0.9172    0.9750    0.9452      1000
       truck     0.9803    0.8950    0.9357      1000

    accuracy                         0.9185     10000
   macro avg     0.9200    0.9185    0.9185     10000
weighted avg     0.9200    0.9185    0.9185     10000

