In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import timm
from torch.utils.data import DataLoader
import time

# ImageNet Sketch dataset (Wang, Ge et al., 2019)

In [None]:
# Download ImageNet-Sketch from Google Drive
!pip install -q -U gdown
!gdown --id 1Mj0i5HBthqH1p_yeXzsg22gZduvgoNeA

In [None]:
!unzip -q ImageNet-Sketch.zip -d .

data_dir = "sketch"

In [None]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Assuming the extracted data is in a folder named 'sketch'
sketch_dataset = datasets.ImageFolder(data_dir, transform=transform)
sketch_loader = DataLoader(sketch_dataset, batch_size=64, shuffle=False, num_workers=2)

print("Classes:", len(sketch_dataset.classes))
print("Total images:", len(sketch_dataset))

Classes: 1000
Total images: 50889


3 pre-trained models on ImageNet from Torchvision

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models = {
    "ResNet50": torchvision.models.resnet50(weights='IMAGENET1K_V1').to(device).eval(),
    "ViT-B/16": torchvision.models.vit_b_16(weights='IMAGENET1K_V1').to(device).eval(),
    "EffNet-B3": torchvision.models.efficientnet_b3(weights='IMAGENET1K_V1').to(device).eval(),
}

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 153MB/s]


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth


100%|██████████| 330M/330M [00:05<00:00, 65.9MB/s]


Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth


100%|██████████| 47.2M/47.2M [00:00<00:00, 165MB/s]


In [None]:
from tqdm.auto import tqdm

@torch.no_grad()
def evaluate(model, loader):
    correct, total = 0, 0
    for imgs, labels in tqdm(loader, desc="Evaluating"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return correct / total

In [None]:
results = {}
for name, model in models.items():
    print(f"Evaluating {name}...")
    acc = evaluate(model, sketch_loader)
    results[name] = acc
    print(f"{name}: {acc*100:.2f}%\n")

# Base Model performance

To evaluate these performances against their base result for ImageNet dataset, I took the accuracies as reported for ImageNet-1K from PyTorch documentation [[link]](https://docs.pytorch.org/vision/main/models.html#table-of-all-available-classification-weights).

In [None]:
imagenet_baseline = {
    "ResNet50": 76.13,
    "ViT-B/16": 81.072,
    "EffNet-B3": 82.008,
}

print("\n--- Results ---")
for name in results:
    drop = imagenet_baseline[name] - results[name]*100
    print(f"{name}: Sketch Acc = {results[name]*100:.2f}%, "
          f"Baseline = {imagenet_baseline[name]:.1f}%, "
          f"Drop = {drop:.2f}%")


--- Results ---
ResNet50: Sketch Acc = 24.09%, Baseline = 76.1%, Drop = 52.04%
ViT-B/16: Sketch Acc = 29.41%, Baseline = 81.1%, Drop = 51.66%
EffNet-B3: Sketch Acc = 34.09%, Baseline = 82.0%, Drop = 47.92%


**For each of these models, to what extent does the performance drop due to the shift in distribution?**

Looking at these performance numbers as compared to base accuracies from PyTorch documentation, all of the models had roughly $\approx50\%$ drop in the accuracies.