In [1]:
model_checkpoint = "microsoft/swin-tiny-patch4-window7-224" # pre-trained model from which to fine-tune
batch_size = 32 # batch size for training and evaluation

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Load Data

In [7]:
from datasets import load_dataset
import os

In [9]:
dataset = load_dataset("imagefolder", data_dir=os.path.abspath('datasets'))

Resolving data files:   0%|          | 0/2000 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/2000 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [10]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 2000
    })
})

### Preprocessing the data

In [12]:
from transformers import AutoImageProcessor

image_processor  = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor 

2023-11-07 17:27:16.912109: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


ViTImageProcessor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ViTImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

In [17]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if "height" in image_processor.size:
    size = (image_processor.size["height"], image_processor.size["width"])
    crop_size = size
    max_size = None
elif "shortest_edge" in image_processor.size:
    size = image_processor.size["shortest_edge"]
    crop_size = (size, size)
    max_size = image_processor.size.get("longest_edge")

train_transforms = Compose(
        [
            RandomResizedCrop(crop_size),
            RandomHorizontalFlip(),
            ToTensor(),
            normalize,
        ]
    )

val_transforms = Compose(
        [
            Resize(size),
            CenterCrop(crop_size),
            ToTensor(),
            normalize,
        ]
    )

def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch

In [14]:
# split up training into training + validation
splits = dataset["train"].train_test_split(test_size=0.1)
train_ds = splits['train']
val_ds = splits['test']

In [18]:
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)

In [19]:
train_ds[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=400x400>,
 'label': 0,
 'pixel_values': tensor([[[0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707],
          [0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707],
          [0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707],
          ...,
          [0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707],
          [0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707],
          [0.5707, 0.5707, 0.5707,  ..., 0.5707, 0.5707, 0.5707]],
 
         [[0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954],
          [0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954],
          [0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954],
          ...,
          [0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954],
          [0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954],
          [0.6954, 0.6954, 0.6954,  ..., 0.6954, 0.6954, 0.6954]],
 
         [[0.8448, 0.8448, 0.8448,  ..., 0.8448, 0.8448, 0.8448],
  

### Training the model