In [None]:
!pip install --upgrade transformers
!pip install --upgrade datasets
!pip install --upgrade huggingface_hub
!pip install evaluate
!pip install accelerate
!pip install ptflops

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
from datasets import load_dataset
from huggingface_hub import login
from transformers import AutoImageProcessor, AutoFeatureExtractor
from torchvision.transforms import (CenterCrop,
                                    Compose,
                                    Normalize,
                                    RandomHorizontalFlip,
                                    RandomResizedCrop,
                                    Resize,
                                    ToTensor)
from torch.utils.data import DataLoader
import torch
from transformers import ViTForImageClassification, PvtForImageClassification, SwinForImageClassification, CvtForImageClassification, LevitForImageClassification, ResNetForImageClassification, EfficientNetForImageClassification
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
dataset = 'Tsomaros/ImageNet-C-shot_noise-severity_5'  #@param ["Tsomaros/ImageNet-C-shot_noise-severity_1", "Tsomaros/ImageNet-C-shot_noise-severity_2", "Tsomaros/ImageNet-C-shot_noise-severity_3", "Tsomaros/ImageNet-C-shot_noise-severity_4", "Tsomaros/ImageNet-C-shot_noise-severity_5"]

val_ds = load_dataset(dataset)

README.md:   0%|          | 0.00/27.3k [00:00<?, ?B/s]

validation-00000-of-00004.parquet:   0%|          | 0.00/464M [00:00<?, ?B/s]

validation-00001-of-00004.parquet:   0%|          | 0.00/462M [00:00<?, ?B/s]

validation-00002-of-00004.parquet:   0%|          | 0.00/459M [00:00<?, ?B/s]

validation-00003-of-00004.parquet:   0%|          | 0.00/460M [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [None]:
def val_transforms(examples):
    examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['image']]
    return examples

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return dict(accuracy=accuracy_score(predictions, labels))

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
model_name = 'microsoft/cvt-21-384-22k' #@param ["google/vit-base-patch16-224", "Zetatech/pvt-large-224", "google/vit-large-patch16-224", "microsoft/swin-base-patch4-window7-224", "microsoft/swin-large-patch4-window7-224", "facebook/levit-256", "facebook/levit-384", "microsoft/cvt-21-384-22k", "microsoft/resnet-152", "google/efficientnet-b7", "google/efficientnet-b4"]

if model_name == 'google/vit-base-patch16-224':
  model_id = 'vit-base-patch16-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = ViTForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'google/vit-large-patch16-224':
  model_id = 'vit-large-patch16-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = ViTForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'microsoft/swin-base-patch4-window7-224':
  model_id = 'swin-base-patch4-window7-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = SwinForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'microsoft/swin-large-patch4-window7-224':
  model_id = 'swin-large-patch4-window7-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = SwinForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

elif model_name == 'facebook/levit-256':
  model_id = 'levit-256'
  processor = AutoFeatureExtractor.from_pretrained(model_name)
  model = LevitForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.crop_size["height"]

elif model_name == 'facebook/levit-384':
  model_id = 'levit-384'
  processor = AutoFeatureExtractor.from_pretrained(model_name)
  model = LevitForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.crop_size["height"]

elif model_name == 'microsoft/cvt-21-384-22k':
  model_id = 'cvt-21-384-22k'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = CvtForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,384,384)
  size = processor.size["shortest_edge"]

elif model_name == 'microsoft/resnet-152':
  model_id = 'resnet-152'
  image_res = (3,224,224)
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = ResNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["shortest_edge"]

elif model_name == 'google/efficientnet-b4':
  model_id = 'efficientnet-b4'
  image_res = (3,600,600)
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = EfficientNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["height"]

elif model_name == 'google/efficientnet-b7':
  model_id = 'efficientnet-b7'
  image_res = (3,600,600)
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = EfficientNetForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  size = processor.size["height"]

elif model_name == 'Zetatech/pvt-large-224':
  model_id = 'pvt-large-224'
  processor = AutoImageProcessor.from_pretrained(model_name)
  model = PvtForImageClassification.from_pretrained(model_name, ignore_mismatched_sizes=True)
  image_res = (3,224,224)
  size = processor.size["height"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

image_mean, image_std = processor.image_mean, processor.image_std

normalize = Normalize(mean=image_mean, std=image_std)

_val_transforms = Compose(
    [
        Resize(size),
        CenterCrop(size),
        ToTensor(),
        normalize,
    ]
)

val_ds.set_transform(val_transforms)

In [None]:
args = TrainingArguments(
    output_dir = f'{model_id}_ImageNet-C_shot_noise_5',
    run_name = f'{model_id}_ImageNet-C_shot_noise_5',
    eval_strategy="epoch",
    per_device_eval_batch_size=32,
    logging_dir='logs',
    remove_unused_columns=False,
    do_eval=True
)

trainer = Trainer(
    model,
    args,
    eval_dataset=val_ds,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    tokenizer=processor,
)

  trainer = Trainer(


In [None]:
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** eval metrics *****
  eval_validation_accuracy               =     0.3238
  eval_validation_loss                   =     3.7499
  eval_validation_model_preparation_time =     0.0108
  eval_validation_runtime                = 0:23:39.12
  eval_validation_samples_per_second     =     35.233
  eval_validation_steps_per_second       =      1.101


In [None]:
from ptflops import get_model_complexity_info

macs, params = get_model_complexity_info(model, image_res, as_strings=False, print_per_layer_stat=False, verbose=False)
macs = round(int(macs) / 1000000000, 1)
params = round(int(params) / 1000000, 1)

print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Computational complexity:       19.2    
Number of parameters:           31.6    


In [None]:
import json

filename = '/content/cvt-21-384-22k_ImageNet-C_shot_noise_5/eval_results.json'

with open(filename) as fp:
  dictObj = json.load(fp)

dictObj.update({"parameters": params,
                "GMacs": macs,
                "Device": torch.cuda.get_device_name(0),
                "Model Id": model_id,
                "Datase": dataset})

In [None]:
with open(filename, 'w') as json_file:
    json.dump(dictObj, json_file,
                        indent=4,
                        separators=(',',': '))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%mkdir /content/drive/MyDrive/imagenet-c/Noise/shot_noise/{model_id}

In [None]:
%cp {filename} /content/drive/MyDrive/imagenet-c/Noise/shot_noise/{model_id}