In [1]:
!pip install --upgrade datasets
!pip install --upgrade transformers
!pip install patool
!pip install ptflops

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
from urllib.request import urlretrieve

url = (
    "https://zenodo.org/records/2235448/files/blur.tar?download=1"
)

filename = "blur.tar"

urlretrieve(url, filename)

('blur.tar', <http.client.HTTPMessage at 0x7c07a1e45490>)

In [3]:
import patoolib
patoolib.extract_archive("blur.tar", outdir="/content")

INFO patool: Extracting blur.tar ...
INFO:patool:Extracting blur.tar ...
INFO patool: running /usr/bin/tar --extract --gzip --force-local --file blur.tar --directory /content
INFO:patool:running /usr/bin/tar --extract --gzip --force-local --file blur.tar --directory /content
INFO patool: ... blur.tar extracted to `/content'.
INFO:patool:... blur.tar extracted to `/content'.


'/content'

In [4]:
import numpy as np
import os
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as trn
import torchvision.transforms.functional as trnF
import torchvision.models as models
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
from torch.autograd import Variable as V
#import calibration_tools

from copy import copy
from tqdm import tqdm

from transformers import AutoImageProcessor, AutoFeatureExtractor
from transformers import ViTForImageClassification, PvtForImageClassification, SwinForImageClassification, CvtForImageClassification, LevitForImageClassification, ResNetForImageClassification, EfficientNetForImageClassification

In [16]:
model_name = 'microsoft/swin-base-patch4-window7-224' #@param ["google/vit-base-patch16-224", "Zetatech/pvt-large-224", "google/vit-large-patch16-224", "microsoft/swin-base-patch4-window7-224", "microsoft/swin-large-patch4-window7-224", "facebook/levit-256", "facebook/levit-384", "microsoft/cvt-21-384-22k", "microsoft/resnet-152", "google/efficientnet-b7", "google/efficientnet-b4"]

if model_name == 'google/vit-base-patch16-224':
  model_id = 'vit-base-patch16-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = ViTForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'google/vit-large-patch16-224':
  model_id = 'vit-large-patch16-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = ViTForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'microsoft/swin-base-patch4-window7-224':
  model_id = 'swin-base-patch4-window7-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = SwinForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'microsoft/swin-large-patch4-window7-224':
  model_id = 'swin-large-patch4-window7-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = SwinForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'facebook/levit-256':
  model_id = 'levit-256'
  processor = AutoFeatureExtractor.from_pretrained(model_name)
  net = LevitForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.crop_size["height"]

elif model_name == 'facebook/levit-384':
  model_id = 'levit-384'
  processor = AutoFeatureExtractor.from_pretrained(model_name)
  net = LevitForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.crop_size["height"]

elif model_name == 'microsoft/cvt-21-384-22k':
  model_id = 'cvt-21-384-22k'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = CvtForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,384,384)
  size = processor.size["shortest_edge"]

elif model_name == 'microsoft/resnet-152':
  model_id = 'resnet-152'
  image_res = (3,224,224)
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = ResNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["shortest_edge"]

elif model_name == 'google/efficientnet-b4':
  model_id = 'efficientnet-b4'
  image_res = (3,600,600)
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = EfficientNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["height"]

elif model_name == 'google/efficientnet-b7':
  model_id = 'efficientnet-b7'
  image_res = (3,600,600)
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = EfficientNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["height"]

elif model_name == 'Zetatech/pvt-large-224':
  model_id = 'pvt-large-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  net = PvtForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

# Define transformations based on the processor
image_mean, image_std = processor.image_mean, processor.image_std


test_transform = trn.Compose(
    [
        trn.Resize(size),
        trn.CenterCrop(size),
        trn.ToTensor(),
        trn.Normalize(mean=image_mean, std=image_std),
    ]
)

net.cuda()
net.eval()

SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0): SwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfOutput(

In [19]:
from tqdm import tqdm

def show_performance(distortion_name):
    errs = []

    for severity in range(1, 6):
        distorted_dataset = dset.ImageFolder(
            root='/content/' + distortion_name + '/' + str(severity),
            transform=test_transform)

        distorted_dataset_loader = torch.utils.data.DataLoader(
            distorted_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

        correct = 0
        # Convert mask to a tensor
        #if mask is not None:
          #mask = torch.tensor(mask, dtype=torch.bool).cuda()  # Ensure mask is on the same device

        with torch.no_grad():
          for batch_idx, (data, target) in tqdm(enumerate(distorted_dataset_loader), total=len(distorted_dataset_loader), desc="Processing Batches"):
            data = data.cuda()  # Move data to GPU
            target = target.cuda()  # Move target to GPU
            output = net(data)  # Forward pass
            pred = output.logits.max(1)[1]  # Predicted classes
            correct += pred.eq(target).sum().item()  # Count correct predictions

        errs.append(1 - 1.*correct / len(distorted_dataset))

    print('\n=Average', tuple(errs))
    return np.mean(errs)

In [21]:
import collections

print('\nUsing ImageNet data')

distortions = [
    #'gaussian_noise', 'shot_noise', 'impulse_noise',
    #'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur',
    'motion_blur'
    #'snow', 'frost', 'fog', 'brightness',
    #'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression',
    #'speckle_noise', 'gaussian_blur', 'spatter', 'saturate'
]

error_rates = []
for distortion_name in distortions:
    rate = show_performance(distortion_name)
    error_rates.append(rate)
    print('Distortion: {:15s}  | CE (unnormalized) (%): {:.2f}'.format(distortion_name, 100 * rate))


#print('mCE (%): {:.2f}'.format(100 * np.mean(error_rates)))


Using ImageNet data


Processing Batches: 100%|██████████| 782/782 [10:24<00:00,  1.25it/s]
Processing Batches: 100%|██████████| 782/782 [10:46<00:00,  1.21it/s]
Processing Batches: 100%|██████████| 782/782 [10:46<00:00,  1.21it/s]
Processing Batches: 100%|██████████| 782/782 [10:45<00:00,  1.21it/s]
Processing Batches: 100%|██████████| 782/782 [10:45<00:00,  1.21it/s]


=Average (0.20694, 0.2481, 0.32702, 0.43942000000000003, 0.52206)
Distortion: motion_blur      | CE (unnormalized) (%): 34.87
mCE (unnormalized by AlexNet errors) (%): 34.87



