In [None]:
import os 
import torch 
from torch.utils.data import Dataset 
from torchvision.utils import make_grid 
import numpy as np
from transformers import (
    ViTForImageClassification,
    ViTImageProcessor, 
    Trainer, 
    TrainingArguments,
    EarlyStoppingCallback,  
)
from PIL import Image 
import matplotlib.pyplot as plt 


ModuleNotFoundError: No module named 'transformers'

In [7]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, processor, split = 'train'):
        self.data_dir = os.path.join(data_dir, split)
        self.processor = processor
        self.classes = ['NORMAL','PNEUMONIA']
        self.class_to_index = {cls: i for i, cls in enumerate(self.classes)}

        self.images = []
        self.labels = []
        for class_name in self.classes:
            class_path = os.path.join(self.data_dir, class_name)
            for file in os.listdir(class_path):
                if file.endswith('.jpg') or file.endswith('.png') or file.endswith('.jpeg'):
                    self.images.append(os.path.join(class_path, file))
                    self.labels.append(self.class_to_index[class_name])

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        encoding = self.processor(image, return_tensors = 'pt')
        pixel_values = encoding['pixel_values'].squeeze()

        return{
            'pixel_values':pixel_values,
            'labels': torch.tensor(label, dtype= torch.long)
        }
    


In [None]:
data_dir = 'data'
model_name = 'google/vit-base-patch16-224-in21k'
out_dir = './vit-finetuned2'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

processor = ViTImageProcessor.from_pretrained(model_name)
model = ViTForImageClassification.from_pretrained(model_name,
                                                  num_labels = 2,
                                                  id2label = {0:'NORMAL', 1:'PNEUMONIA',},
                                                  label2id = {'NORMAL':0,'PNEUMONIA':1})



train_dataset = CustomDataset(data_dir, processor, 'train')
val_dataset = CustomDataset(data_dir, processor, 'val')

x = CustomDataset(data_dir = data_dir, processor = processor, split = 'train')


training_args = TrainingArguments(
    output_dir=out_dir,
    per_device_train_batch_size=16,
    per_device_eval_batch_size= 16,
    num_train_epochs= 5, 
    learning_rate= 5e-5,
    weight_decay=0.01,
    warmup_steps= 10,
    eval_strategy='steps',
    eval_steps= 200,
    load_best_model_at_end= True,
    metric_for_best_model= 'eval_loss',
    report_to='none',
    data_seed = 42,
    fp16=torch.cuda.is_available(),
    remove_unused_columns= False,
    logging_steps=100,  
    logging_strategy='steps',
    save_steps = 400,
    save_strategy= 'steps'

)
trainer  = Trainer(
    model = model,
    args = training_args,
    eval_dataset= val_dataset, 
    train_dataset= train_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)] 
)

trainer.train()

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
