In [1]:
!pip install -q transformers datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os

import torch
from sklearn.model_selection import StratifiedKFold
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
from transformers import ViTImageProcessor, ViTForImageClassification

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
pretrain_model = "google/vit-base-patch16-224-in21k"
dataset_path = "/content/drive/MyDrive/database/FBG/"
size = 224

epochs = 100
batch_size = 64
learning_rate = 0.0001
num_labels = len(os.listdir(dataset_path))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
# Define the transformation
transform = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=image_mean, std=image_std),
])

# Load the dataset from the root folder
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

In [6]:
feature_extractor = ViTImageProcessor.from_pretrained(pretrain_model)

model = ViTForImageClassification.from_pretrained(pretrain_model, num_labels=num_labels)
model.to(device)

optimizer = Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Initialize the KFold class
k_folds = 5
kfold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=22)

# K-Fold Cross Validation
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset, dataset.targets)):
    print(f'FOLD {fold}')
    print('--------------------------------')

    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)

    # Define data loaders for training and testing data in this fold
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_subsampler)

    # Epochs loop
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}\n")

        model.train()
        total_train = 0
        correct_train = 0
        total_loss_train = 0.0

        for data, targets in tqdm(train_loader):
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            loss = criterion(outputs, targets)

            total_loss_train += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += targets.size(0)
            correct_train += (predicted == targets).sum().item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_accuracy = 100 * correct_train / total_train
        avg_train_loss = total_loss_train / len(train_loader)

        model.eval()
        correct_eval = 0
        total_eval = 0
        total_loss_eval = 0.0

        with torch.no_grad():
            for data, targets in tqdm(test_loader):
                data, targets = data.to(device), targets.to(device)
                outputs = model(data)
                loss = criterion(outputs, targets)

                total_loss_eval += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_eval += targets.size(0)
                correct_eval += (predicted == targets).sum().item()

        eval_accuracy = 100 * correct_eval / total_eval
        avg_eval_loss = total_loss_eval / len(test_loader)

        print(
            f"train_loss: {avg_train_loss:.4f}, train_acc: {train_accuracy:.2f}%, val_loss: {avg_eval_loss:.4f}, val_acc: {eval_accuracy:.2f}%")


FOLD 0
--------------------------------
Epoch 1



  0%|          | 0/73 [01:12<?, ?it/s]


TypeError: ignored