In [None]:
# !git clone https://github.com/ankitsunil530/blood-cell-classification.git
# %cd blood-cell-classification

fatal: destination path 'blood-cell-classification' already exists and is not an empty directory.
/content/blood-cell-classification/blood-cell-classification


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Blood Cell Classification**

## Data loading and preprocessing

### Subtask:
Load the blood cell dataset and preprocess the images for use with the vision transformer and Performer models. This will involve resizing, normalization, and splitting the data into training, validation, and test sets.


In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [3]:
DATASET_PATH = "/content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset"
IMG_HEIGHT = 224
IMG_WIDTH = 224

def load_images_and_labels(dataset_path):
    images = []
    labels = []
    class_names = sorted(os.listdir(dataset_path))
    class_to_idx = {class_name: i for i, class_name in enumerate(class_names)}

    for class_name in class_names:
        class_dir = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_dir):
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                    images.append(img)
                    labels.append(class_to_idx[class_name])

    return np.array(images), np.array(labels), class_names


In [4]:
for root, dirs, files in os.walk(DATASET_PATH):
    level = root.replace(DATASET_PATH, '').count(os.sep)
    indent = ' ' * 4 * (level)
    print(f'{indent}{os.path.basename(root)}/')



Blood_Cells_Dataset/
    dataset2-master/
        dataset2-master/
            images/
                TEST_SIMPLE/
                    MONOCYTE/
                    NEUTROPHIL/
                    LYMPHOCYTE/
                    EOSINOPHIL/
                TEST/
                    NEUTROPHIL/
                    MONOCYTE/
                    LYMPHOCYTE/
                    EOSINOPHIL/
                TRAIN/
                    NEUTROPHIL/
                    MONOCYTE/
                    LYMPHOCYTE/
                    EOSINOPHIL/
    dataset-master/
        dataset-master/
            Annotations/
            JPEGImages/


In [5]:
def load_images_and_labels(dataset_path):
    images = []
    labels = []
    # Update base directories to reflect the actual structure
    base_image_dirs = [
        os.path.join('dataset2-master', 'dataset2-master', 'images', 'TRAIN'),
        os.path.join('dataset2-master', 'dataset2-master', 'images', 'TEST'),
        os.path.join('dataset2-master', 'dataset2-master', 'images', 'TEST_SIMPLE')
    ]
    class_names = set()

    # First pass to collect all unique class names
    for base_img_dir_name in base_image_dirs:
        base_img_dir_path = os.path.join(dataset_path, base_img_dir_name)
        if os.path.isdir(base_img_dir_path):
            for class_name in os.listdir(base_img_dir_path):
                class_dir_path = os.path.join(base_img_dir_path, class_name)
                if os.path.isdir(class_dir_path):
                    class_names.add(class_name)

    class_names = sorted(list(class_names))
    class_to_idx = {class_name: i for i, class_name in enumerate(class_names)}

    print(f"Found classes: {class_names}")

    # Second pass to load images
    for base_img_dir_name in base_image_dirs:
        base_img_dir_path = os.path.join(dataset_path, base_img_dir_name)
        if os.path.isdir(base_img_dir_path):
            for class_name in os.listdir(base_img_dir_path):
                class_dir_path = os.path.join(base_img_dir_path, class_name)
                print(f"Processing directory: {class_dir_path}")
                if os.path.isdir(class_dir_path) and class_name in class_to_idx:
                    for img_name in os.listdir(class_dir_path):
                        img_path = os.path.join(class_dir_path, img_name)
                        if os.path.isfile(img_path) and img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                            img = cv2.imread(img_path)
                            if img is not None:
                                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                                images.append(img)
                                labels.append(class_to_idx[class_name])
                            else:
                                print(f"Could not load image: {img_path}")

    print(f"Loaded {len(images)} images and {len(labels)} labels.")
    return np.array(images), np.array(labels), class_names

images, labels, class_names = load_images_and_labels(DATASET_PATH)

# Split the data - combining images from TRAIN, TEST, TEST_SIMPLE before splitting
# Using a fixed random state for reproducibility
X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.2, stratify=labels, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")
print(f"Class names: {class_names}")


Found classes: ['EOSINOPHIL', 'LYMPHOCYTE', 'MONOCYTE', 'NEUTROPHIL']
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/images/TRAIN/NEUTROPHIL
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/images/TRAIN/MONOCYTE
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/images/TRAIN/LYMPHOCYTE
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/images/TRAIN/EOSINOPHIL
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/images/TEST/NEUTROPHIL
Processing directory: /content/drive/MyDrive/Deep Learning Lab/Blood_Cells_Dataset/Blood_Cells_Dataset/dataset2-master/dataset2-master/ima

## Data loading and preprocessing

### Subtask:
Load the blood cell dataset and preprocess the images for use with the vision transformer and Performer models. This will involve resizing, normalization, and splitting the data into training, validation, and test sets.


In [8]:
class BloodCellDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        """
        Args:
            images (numpy array): Array of image data (H, W, C).
            labels (numpy array): Array of corresponding labels.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)

        return image, label


In [9]:
# Define transformations
# Using common mean and std dev for ImageNet as a starting point
# A more accurate approach would be to calculate these from the dataset
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.ToPILImage(), # Convert numpy array to PIL Image for transforms
    transforms.RandomResizedCrop(IMG_HEIGHT),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

val_test_transform = transforms.Compose([
    transforms.ToPILImage(), # Convert numpy array to PIL Image for transforms
    transforms.Resize(IMG_HEIGHT),
    transforms.CenterCrop(IMG_HEIGHT),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# Create Dataset instances
train_dataset = BloodCellDataset(X_train, y_train, transform=train_transform)
val_dataset = BloodCellDataset(X_val, y_val, transform=val_test_transform)
test_dataset = BloodCellDataset(X_test, y_test, transform=val_test_transform)

# Create DataLoader instances
batch_size = 32 # Using the already defined batch_size
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Number of training batches: {len(train_dataloader)}")
print(f"Number of validation batches: {len(val_dataloader)}")
print(f"Number of test batches: {len(test_dataloader)}")


Number of training batches: 313
Number of validation batches: 40
Number of test batches: 40


## Vision transformer model implementation

### Subtask:
Implement the Vision Transformer model for blood cell classification.


In [10]:
import torch.nn as nn
from torchvision.models import vit_b_16, ViT_B_16_Weights

In [28]:
class BloodCellViT(nn.Module):
    def __init__(self, num_classes=4):
        super(BloodCellViT, self).__init__()
        # Load pre-trained ViT-Base/16
        weights = ViT_B_16_Weights.DEFAULT
        self.vit = vit_b_16(weights=None)
        num_ftrs = self.vit.heads.head.in_features
        self.vit.heads.head = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.vit(x)


num_classes = 4 # Based on the previous data loading step
model = BloodCellViT(num_classes=num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"Model initialized and moved to {device}")
print(model)

Model initialized and moved to cuda
BloodCellViT(
  (vit): VisionTransformer(
    (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (encoder): Encoder(
      (dropout): Dropout(p=0.0, inplace=False)
      (layers): Sequential(
        (encoder_layer_0): EncoderBlock(
          (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (self_attention): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (dropout): Dropout(p=0.0, inplace=False)
          (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): MLPBlock(
            (0): Linear(in_features=768, out_features=3072, bias=True)
            (1): GELU(approximate='none')
            (2): Dropout(p=0.0, inplace=False)
            (3): Linear(in_features=3072, out_features=768, bias=True)
            (4): Dropout(p=0.0, inplace=False)
          )
        )
        (encoder_layer_1): 

## Performer model implementation

### Subtask:
Implement the Performer model for blood cell classification.


In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
def linear_attention(q, k, v):
    attention_weights = torch.matmul(q, k.transpose(-2, -1))
    attention_weights = F.softmax(attention_weights, dim=-1)
    output = torch.matmul(attention_weights, v)
    return output

class PatchEmbedding(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.num_patches = (img_size // patch_size) ** 2
        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.proj(x)
        x = x.flatten(2)
        x = x.transpose(1, 2)
        return x

class PerformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, mlp_ratio=4., drop_rate=0., attn_drop_rate=0.):
        super().__init__()
        self.norm1 = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=attn_drop_rate, batch_first=True) # Using standard MultiheadAttention for now, would replace with efficient attention
        self.norm2 = nn.LayerNorm(embed_dim)
        mlp_hidden_dim = int(embed_dim * mlp_ratio)
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, mlp_hidden_dim),
            nn.GELU(),
            nn.Dropout(drop_rate),
            nn.Linear(mlp_hidden_dim, embed_dim),
            nn.Dropout(drop_rate)
        )

    def forward(self, x):
        x = x + self.attn(self.norm1(x), self.norm1(x), self.norm1(x))[0]
        x = x + self.mlp(self.norm2(x))
        return x

class PerformerModel(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768, num_layers=12, num_heads=12, mlp_ratio=4., num_classes=4, drop_rate=0., attn_drop_rate=0.):
        super().__init__()
        self.num_classes = num_classes
        self.embed_dim = embed_dim

        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
        num_patches = self.patch_embed.num_patches

        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        self.blocks = nn.ModuleList([
            PerformerBlock(embed_dim, num_heads, mlp_ratio, drop_rate, attn_drop_rate)
            for _ in range(num_layers)
        ])

        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        # Initialize weights
        nn.init.trunc_normal_(self.pos_embed, std=.02)
        nn.init.trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward_features(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)

        x = x + self.pos_embed
        x = self.pos_drop(x)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        return x[:, 0]

    def forward(self, x):
        x = self.forward_features(x)
        x = self.head(x)
        return x


In [17]:

img_size = IMG_HEIGHT
patch_size = 16
in_channels = 3
embed_dim = 768
num_layers = 12
num_heads = 12
mlp_ratio = 4
num_classes = num_classes

performer_model = PerformerModel(
    img_size=img_size,
    patch_size=patch_size,
    in_channels=in_channels,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    mlp_ratio=mlp_ratio,
    num_classes=num_classes
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
performer_model.to(device)

print(f"Performer Model initialized and moved to {device}")
print(performer_model)

Performer Model initialized and moved to cuda
PerformerModel(
  (patch_embed): PatchEmbedding(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-11): 12 x PerformerBlock(
      (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
      )
      (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): Sequential(
        (0): Linear(in_features=768, out_features=3072, bias=True)
        (1): GELU(approximate='none')
        (2): Dropout(p=0.0, inplace=False)
        (3): Linear(in_features=3072, out_features=768, bias=True)
        (4): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (head): Linear(in_features=768, out_features=4, bias=True)
)


## Model training

### Subtask:
Train both the Vision Transformer and Performer models on the training data.


# Newly Initialized ViT Model

In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vit_b_16, ViT_B_16_Weights
from tqdm import tqdm

# class BloodCellViT(nn.Module):
#     def __init__(self, num_classes=4):
#         super(BloodCellViT, self).__init__()
#         # Pretrained ViT model
#         weights = ViT_B_16_Weights.DEFAULT
#         self.vit = vit_b_16(weights=weights)
#         num_ftrs = self.vit.heads.head.in_features
#         self.vit.heads.head = nn.Linear(num_ftrs, num_classes)

#     def forward(self, x):
#         return self.vit(x)

def train_vit(model, train_loader, val_loader, criterion, optimizer, device, epochs=5):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}] → Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%")

        val_acc = test_vit(model, val_loader, device)
        print(f"Validation Accuracy: {val_acc:.2f}%")

    return model

def test_vit(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 4
criterion = nn.CrossEntropyLoss()

vit_model = BloodCellViT(num_classes)
optimizer = optim.AdamW(vit_model.parameters(), lr=1e-4)

vit_model = train_vit(vit_model, train_dataloader, val_dataloader, criterion, optimizer, device, epochs=5)
vit_test_acc = test_vit(vit_model, test_dataloader, device)
print(f"\n✅ Final ViT Test Accuracy: {vit_test_acc:.2f}%")


Epoch 1/5: 100%|██████████| 313/313 [06:24<00:00,  1.23s/it]


Epoch [1/5] → Loss: 1.4277, Train Acc: 28.13%
Validation Accuracy: 56.12%


Epoch 2/5: 100%|██████████| 313/313 [06:24<00:00,  1.23s/it]


Epoch [2/5] → Loss: 0.8736, Train Acc: 62.50%
Validation Accuracy: 83.21%


Epoch 3/5: 100%|██████████| 313/313 [06:24<00:00,  1.23s/it]


Epoch [3/5] → Loss: 0.6239, Train Acc: 72.64%
Validation Accuracy: 84.73%


Epoch 4/5: 100%|██████████| 313/313 [06:25<00:00,  1.23s/it]


Epoch [4/5] → Loss: 0.7036, Train Acc: 69.95%
Validation Accuracy: 86.89%


Epoch 5/5: 100%|██████████| 313/313 [06:28<00:00,  1.24s/it]


Epoch [5/5] → Loss: 0.5512, Train Acc: 75.38%
Validation Accuracy: 86.49%

✅ Final ViT Test Accuracy: 84.74%


# **PreTrained ViT Model**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vit_b_16, ViT_B_16_Weights
from tqdm import tqdm

class BloodCellViT(nn.Module):
    def __init__(self, num_classes=4):
        super(BloodCellViT, self).__init__()
        # Pretrained ViT model
        weights = ViT_B_16_Weights.DEFAULT
        self.vit = vit_b_16(weights=weights)
        num_ftrs = self.vit.heads.head.in_features
        self.vit.heads.head = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.vit(x)

def train_vit(model, train_loader, val_loader, criterion, optimizer, device, epochs=5):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}] → Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%")

        val_acc = test_vit(model, val_loader, device)
        print(f"Validation Accuracy: {val_acc:.2f}%")

    return model

def test_vit(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 4
criterion = nn.CrossEntropyLoss()

vit_model = BloodCellViT(num_classes)
optimizer = optim.AdamW(vit_model.parameters(), lr=1e-4)

vit_model = train_vit(vit_model, train_dataloader, val_dataloader, criterion, optimizer, device, epochs=5)
vit_test_acc = test_vit(vit_model, test_dataloader, device)
print(f"\n✅ Final ViT Test Accuracy: {vit_test_acc:.2f}%")


Epoch 1/5: 100%|██████████| 313/313 [06:29<00:00,  1.24s/it]


Epoch [1/5] → Loss: 0.5570, Train Acc: 75.55%
Validation Accuracy: 91.93%


Epoch 2/5: 100%|██████████| 313/313 [06:27<00:00,  1.24s/it]


Epoch [2/5] → Loss: 0.3417, Train Acc: 85.24%
Validation Accuracy: 96.56%


Epoch 3/5: 100%|██████████| 313/313 [06:28<00:00,  1.24s/it]


Epoch [3/5] → Loss: 0.3072, Train Acc: 87.06%
Validation Accuracy: 96.24%


Epoch 4/5: 100%|██████████| 313/313 [06:27<00:00,  1.24s/it]


Epoch [4/5] → Loss: 0.2820, Train Acc: 87.87%
Validation Accuracy: 96.64%


Epoch 5/5: 100%|██████████| 313/313 [06:28<00:00,  1.24s/it]


Epoch [5/5] → Loss: 0.2511, Train Acc: 89.58%
Validation Accuracy: 97.04%

✅ Final ViT Test Accuracy: 97.12%


In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm


def train_performer(model, train_loader, val_loader, criterion, optimizer, device, epochs=5):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}] → Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%")

        val_acc = test_performer(model, val_loader, device)
        print(f"Validation Accuracy: {val_acc:.2f}%")

    return model


def test_performer(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(performer_model.parameters(), lr=1e-4)


performer_model = train_performer(performer_model, train_dataloader, val_dataloader, criterion, optimizer, device, epochs=5)

performer_test_acc = test_performer(performer_model, test_dataloader, device)
print(f"\n✅ Final Performer Test Accuracy: {performer_test_acc:.2f}%")


Epoch 1/5: 100%|██████████| 313/313 [06:19<00:00,  1.21s/it]


Epoch [1/5] → Loss: 1.4087, Train Acc: 30.07%
Validation Accuracy: 46.52%


Epoch 2/5: 100%|██████████| 313/313 [06:19<00:00,  1.21s/it]


Epoch [2/5] → Loss: 0.9530, Train Acc: 57.67%
Validation Accuracy: 77.30%


Epoch 3/5: 100%|██████████| 313/313 [06:19<00:00,  1.21s/it]


Epoch [3/5] → Loss: 0.6807, Train Acc: 70.94%
Validation Accuracy: 83.37%


Epoch 4/5: 100%|██████████| 313/313 [06:19<00:00,  1.21s/it]


Epoch [4/5] → Loss: 0.5925, Train Acc: 74.37%
Validation Accuracy: 87.77%


Epoch 5/5: 100%|██████████| 313/313 [06:21<00:00,  1.22s/it]


Epoch [5/5] → Loss: 0.5276, Train Acc: 76.98%
Validation Accuracy: 88.09%

✅ Final Performer Test Accuracy: 88.90%
