# Exploration and Comparison of Transformers for Image Classification

## Swin (Swin Transformer)


### Prerequisites

In [5]:
import os
os.chdir('..')

import torch
import torch.nn as nn

from transformers import AutoImageProcessor
from datasets import load_dataset, concatenate_datasets

from src.dataset_builder import ImageDataset
from src.models import Backbone
from src.train import train_model, evaluate_model

from utils.config import Config
from utils.train_utils import *
from utils.models_utils import *

### GPU

In [6]:
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        vram = torch.cuda.get_device_properties(i).total_memory / (1024 ** 3)
        print(f"  VRAM: {vram:.2f} GB")
else:
    print("CUDA is not available.")

GPU 0: NVIDIA A100 80GB PCIe MIG 1g.10gb
  VRAM: 9.50 GB


#### Data preparation

In [7]:
train, val, test = load_dataset('timm/resisc45', split=['train', 'validation', 'test'])

In [8]:
processor = AutoImageProcessor.from_pretrained(model_names['Swin'])

preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

In [9]:
train_split = ImageDataset(dataset=train, processor=processor)
val_split = ImageDataset(dataset=val, processor=processor)
test_split = ImageDataset(dataset=test, processor=processor)

In [10]:
num_classes = train_split.get_num_classes()

### Model

In [11]:
config = Config()
model = Backbone(model_name=model_names['Swin'], num_classes=num_classes)

config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/113M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([45]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([45, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
get_model_params(model)

Parameters: 27.55M


### Linear probing

In [13]:
train_model(
    model,
    train_split,
    val_split,
    config,
    architecture='swin',
    fine_tune=False,
)

Train: 100%|██████████| 2363/2363 [03:19<00:00, 11.86it/s]
Val: 100%|██████████| 788/788 [01:07<00:00, 11.65it/s]


Epochs: 1/2 | train_loss: 2.5979 | train_acc: 0.4962 | val_loss: 1.6962 | val_acc: 0.7090


Train: 100%|██████████| 2363/2363 [03:25<00:00, 11.52it/s]
Val: 100%|██████████| 788/788 [01:05<00:00, 12.04it/s]

Epochs: 2/2 | train_loss: 1.4358 | train_acc: 0.7276 | val_loss: 1.1213 | val_acc: 0.7663





### Evaluation

In [14]:
evaluate_model(
    model,
    test_split,
    config,
)

Test: 100%|██████████| 788/788 [01:03<00:00, 12.36it/s]

test_loss: 1.1740 | test_acc: 0.7484



