# **Ανάλυση Ιατρικών Εικόνων MedMNIST με CNN, Transfer Learning & Vision Transformers (PyTorch)**

Σκλαβενίτης Γεώργιος 10708 gsklaven@ece.auth.gr

# 0. Περιγραφή του Dataset

In [None]:
# Install the MedMNIST library
!pip install medmnist

In [None]:
# All imports needed
import numpy as np
import torch
import torchvision
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torchvision.models as models
from collections import Counter
from torch import nn as nn
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Import dataset
from medmnist import BloodMNIST

# Create dataset splits (train, validation, test)
# 'download=True' downloads the dataset if it doesn't exist
# 'size=28' specifies the image resolution (28x28) as given in medmnist.com
train_dataset = BloodMNIST(split='train', download=True, size=28)
val_dataset = BloodMNIST(split='val', download=True, size=28)
test_dataset = BloodMNIST(split='test', download=True, size=28)

In [None]:
# Check for other image dimensions (e.g., 32x32)
try:
    dataset_32 = BloodMNIST(split="train", download=True, size=32)
    img, _ = dataset_32[0]
    print(f"Test 32x32: Success. The images have size: {img.size}")
except Exception as e:
    print("The 32x32 version is not available for this dataset.")

In [None]:
# Display basic image information
first_image, first_label = train_dataset[0]
image_array = np.array(first_image)

print(f"Image Information")
print(f"Resolution: {image_array.shape}")

# Check if the image is RGB or Grayscale
if len(image_array.shape) == 3 and image_array.shape[2] == 3:
    print("Type: RGB")
else:
    print("Type: Grayscale")

In [None]:
# Display dataset and class information
info = train_dataset.info

description = info['description']
print(f"Description: {description}")

labels_map = info['label']

print(f"\nClasses: {len(labels_map)}")
for key, value in labels_map.items():
    print(f"Label {key}: {value}")

In [None]:
print(f"Size of Splits")
print(f"Training set: {len(train_dataset)} images")
print(f"Validation set: {len(val_dataset)} images")
print(f"Test set: {len(test_dataset)} images")

In [None]:
# Extract all labels from the training set to check distribution
train_labels = [y[0] for _, y in train_dataset]
counts = Counter(train_labels)

print(f"--- Class Distribution (Imbalance Check) ---")
print("Number of images per class in Training Set:")
for label_id, count in sorted(counts.items()):
    class_name = labels_map[str(label_id)]
    print(f" - {class_name}: {count}")

In [None]:
# Visualize 5 random samples
fig, axes = plt.subplots(1, 5, figsize=(15, 5))
indices = np.random.choice(len(train_dataset), 5, replace=False)

for i, idx in enumerate(indices):
    img, label = train_dataset[idx]
    class_name = labels_map[str(label[0])]

    axes[i].imshow(img) # Display image
    axes[i].set_title(class_name) # Set title
    axes[i].axis('off') # Hide axes

plt.suptitle("Sample Images from BloodMNIST") # Central title
plt.show()

# Global Functions

In [None]:
def train_loop(model, dataloader, optimizer, criterion, device):
    """
    Executes one training cycle (epoch) for a model.

    Args:
        model (torch.nn.Module): The model to be trained.
        dataloader (torch.utils.data.DataLoader): The DataLoader for the training set.
        optimizer (torch.optim.Optimizer): The optimizer.
        criterion (torch.nn.Module): The loss function.
        device (torch.device): The device (CPU or GPU) on which training will be executed.

    Returns:
        tuple: A tuple containing the average loss and accuracy.
    """

    # Set the model to training mode
    model.train()

    total_loss = 0
    correct = 0
    total = 0

    # Iterate over training data
    for batch, (images, labels) in enumerate(dataloader):

        # Transfer images and labels to the specified device
        images = images.to(device)
        labels = labels.to(device)
        labels = labels.squeeze(1) # Convert labels to 1D (if necessary)

        output = model(images) # Forward pass (input -> model -> prediction)
        loss = criterion(output, labels) # Calculate the loss function
        total_loss += loss.item()

        optimizer.zero_grad() # Zero out previous gradients
        loss.backward() # Backpropagation (gradient calculation)
        optimizer.step() # Update parameters

        preds = output.argmax(dim=1) # Get the class with the highest probability
        correct += (preds == labels).sum().item() # Count correct predictions
        total += labels.size(0) # Count total samples

        if batch % 100 == 0:
          current = batch * dataloader.batch_size + len(images)
          print(f"loss: {loss.item():>7f}  [{current:>5d}/{len(dataloader.dataset):>5d}]")

    return total_loss / len(dataloader), correct / total # Return average loss and accuracy

In [None]:
def test_loop(model, dataloader, criterion, device):
    """
    Evaluates the performance of a model on a dataset.

    Args:
        model (torch.nn.Module): The model to be evaluated.
        dataloader (torch.utils.data.DataLoader): The DataLoader for the validation/test set.
        criterion (torch.nn.Module): The loss function.
        device (torch.device): The device (CPU or GPU) on which the evaluation will be executed.

    Returns:
        tuple: A tuple containing the average loss, accuracy, all predictions
               and all true labels.
    """

    # Set the model to evaluation mode (disables dropout/batchnorm updates)
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    all_preds = [] # List to store all predictions
    all_labels = [] # List to store all true labels

    # Disable gradient calculation to save memory and speed
    with torch.no_grad():
        for images, labels in dataloader:

            # Transfer images and labels to the specified device
            images = images.to(device)
            labels = labels.to(device)
            labels = labels.squeeze(1) # Convert labels to 1D (if necessary)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.detach().cpu().numpy()) # Store predictions on CPU
            all_labels.extend(labels.detach().cpu().numpy()) # Store labels on CPU

    return total_loss / len(dataloader), correct / total, np.array(all_preds), np.array(all_labels)

In [None]:
def run_training(model, train_loader, val_loader, optimizer, criterion, device, epochs):
    """
    Executes the training and evaluation process of a model for a specified number of epochs.

    Args:
        model (torch.nn.Module): The model to be trained.
        train_loader (torch.utils.data.DataLoader): DataLoader for the training set.
        val_loader (torch.utils.data.DataLoader): DataLoader for the validation set.
        optimizer (torch.optim.Optimizer): The optimizer.
        criterion (torch.nn.Module): The loss function.
        device (torch.device): The device (CPU or GPU).
        epochs (int): The number of epochs.

    Returns:
        dict: A dictionary with the history of losses and accuracies for training and validation.
    """
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    print(f"Starting training on {device} for {epochs} epochs...")

    for epoch in range(epochs):
        # Training
        t_loss, t_acc = train_loop(model, train_loader, optimizer, criterion, device)

        # Evaluation on the Validation set
        v_loss, v_acc, _, _ = test_loop(model, val_loader, criterion, device)

        # Store history
        history['train_loss'].append(t_loss)
        history['train_acc'].append(t_acc)
        history['val_loss'].append(v_loss)
        history['val_acc'].append(v_acc)

        # Print progress per epoch
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {t_loss:.4f} | Val Loss: {v_loss:.4f} | Val Acc: {v_acc:.4f}")

    return history

In [None]:
def evaluate_and_visualize(model, test_loader, criterion, device, history, class_names):
    """
    Visualizes training curves, evaluates the model on the test set
    and displays the confusion matrix and classification report.

    Args:
        model (torch.nn.Module): The trained model.
        test_loader (torch.utils.data.DataLoader): DataLoader for the test set.
        criterion (torch.nn.Module): The loss function.
        device (torch.device): The device (CPU or GPU).
        history (dict): The training history (losses, accuracies).
        class_names (list): List of class names.
    """

    # Visualize loss and accuracy curves
    epochs_range = range(1, len(history['train_loss']) + 1)

    plt.figure(figsize=(14, 5))

    # Loss curve
    plt.subplot(1, 2, 1) # Create a 1x2 subplot, in the 1st position
    plt.plot(epochs_range, history['train_loss'], label='Training Loss')
    plt.plot(epochs_range, history['val_loss'], label='Validation Loss', linestyle='--')
    plt.title('Training & Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    # Accuracy curve
    plt.subplot(1, 2, 2) # Create a 1x2 subplot, in the 2nd position
    plt.plot(epochs_range, history['val_acc'], label='Validation Accuracy', color='green')
    plt.plot(epochs_range, history['train_acc'], label='Training Accuracy', color='blue')
    plt.title('Training & Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    plt.show()

    # Evaluate on Test Set
    print("\nEvaluating on Test Set...")
    test_loss, test_acc, preds, labels = test_loop(model, test_loader, criterion, device)
    print(f"Final Test Loss: {test_loss:.4f}")
    print(f"Final Test Accuracy: {test_acc*100:.2f}%")

    # Confusion Matrix
    cm = confusion_matrix(labels, preds)

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

    # Classification Report
    print("\nClassification Report:")
    print(classification_report(labels, preds, target_names=class_names))

# 1. CNN από την αρχή

MODELS

In [None]:
# Training Transformation
# Introduce 'randomness' here to make the dataset more challenging and robust
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # 50% probability for horizontal flip
    transforms.RandomRotation(degrees=10),   # Random rotation +/- 10 degrees
    transforms.ToTensor()                    # Convert to Tensor (necessary for PyTorch)
])

# Validation & Test Transformation (WITHOUT Augmentation)
# Keep the images 'clean' for accurate evaluation
eval_transform = transforms.Compose([
    transforms.ToTensor()
])

# Define Datasets with appropriate transformations
train_dataset = BloodMNIST(
    split='train',
    download=True,
    size=28,
    transform=train_transform # Apply training transformations
)

val_dataset = BloodMNIST(
    split='val',
    download=True,
    size=28,
    transform=eval_transform # Apply evaluation transformations
)

test_dataset = BloodMNIST(
    split='test',
    download=True,
    size=28,
    transform=eval_transform # Apply evaluation transformations
)

In [None]:
BATCH_SIZE = 64

# Create DataLoaders for each split
train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # shuffle=True for random order in training data
val_dataloader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) # shuffle=False for consistent evaluation
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("DataLoaders are ready!")

SIMPLE

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=8):
        super(SimpleCNN, self).__init__()

        # --- BLOCK 1 ---
        # Input image: [Batch, 3, 28, 28] (RGB)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1) # 3 input channels (RGB)

        # Output size calculation Conv1:
        # Formula: ((input_size - kernel_size + 2 * padding) / stride) + 1
        # (28 - 3 + 2 * 1) / 1 + 1 = 28
        # Output shape after Conv1: [Batch, 32, 28, 28]

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Output size calculation Pool1:
        # 28 / 2 = 14
        # Output shape after Pool1: [Batch, 32, 14, 14]


        # --- BLOCK 2 ---
        # Input to Conv2 is the output of Pool1: 14x14 with 32 channels
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) # 32 input channels, 64 output channels

        # Output size calculation Conv2:
        # (14 - 3 + 2 * 1) / 1 + 1 = 14
        # Output shape after Conv2: [Batch, 64, 14, 14]

        # Output size calculation Pool2:
        # 14 / 2 = 7
        # Output shape after Pool2: [Batch, 64, 7, 7]


        # --- BLOCK 3 ---
        # Input to Conv3 is the output of Pool2: 7x7 with 64 channels
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        # Output size calculation Conv3:
        # (7 - 3 + 2 * 1) / 1 + 1 = 7
        # Output shape after Conv3: [Batch, 128, 7, 7]

        # Output size calculation Pool3:
        # 7 / 2 = 3.5 -> PyTorch rounds down (floor) to an integer
        # Result = 3
        # Output shape after Pool3: [Batch, 128, 3, 3]


        # Flatten and Fully Connected
        # Final Tensor shape: [Batch, 128, 3, 3]
        # Flattening: Channels * Height * Width
        self.flatten_dim = 128 * 3 * 3  # = 1152
        self.fc = nn.Linear(self.flatten_dim, num_classes) # Fully Connected layer with 8 output classes

    def forward(self, x):
        # Apply Block 1
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)

        # Apply Block 2
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)

        # Apply Block 3
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)

        # Flatten parameters for the Linear layer
        x = x.flatten(1)

        # Classification
        x = self.fc(x)
        return x

# Transfer the model to GPU if available, otherwise to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN(num_classes=8).to(device)
print(model)

In [None]:
# Hyperparameters

# Seed for reproducibility of results
torch.manual_seed(42)
# Define epochs
EPOCHS = 50
# Define learning rate
learning_rate = 1e-3

# Initialize optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Define Loss Function
criterion = nn.CrossEntropyLoss()

# Run training
history = run_training(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)

# Retrieve class names for visualization
class_names = [labels_map[str(i)] for i in range(8)]

# Evaluate and visualize results
evaluate_and_visualize(model, test_dataloader, criterion, device, history, class_names)

BATCH NORMALIZATION

In [None]:
class CNNWithBatch(nn.Module):
    def __init__(self, num_classes=8):
        super(CNNWithBatch, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32) # Batch Normalization after conv1
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64) # Batch Normalization after conv2

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128) # Batch Normalization after conv3

        self.flatten_dim = 128 * 3 * 3  # = 1152
        self.fc = nn.Linear(self.flatten_dim, num_classes)

    def forward(self, x):
        # Apply Block 1
        x = self.conv1(x)
        x = self.bn1(x) # Apply Batch Normalization
        x = F.relu(x)
        x = self.pool(x)

        # Apply Block 2
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)

        # Apply Block 3
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.pool(x)

        # Flatten parameters for the Linear layer
        x = x.flatten(1)

        # Classification
        x = self.fc(x)
        return x

# Transfer the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_with_batch = CNNWithBatch(num_classes=8).to(device)
print(model_with_batch)

In [None]:
# Hyperparameters

# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 45
# Define learning rate
learning_rate = 5e-4

# Initialize optimizer
optimizer = torch.optim.Adam(model_with_batch.parameters(), lr=learning_rate)
# Define Loss Function
criterion = nn.CrossEntropyLoss()

# Run training
history = run_training(model_with_batch, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)
# Retrieve class names for visualization
class_names = [labels_map[str(i)] for i in range(8)]
# Evaluate and visualize results
evaluate_and_visualize(model_with_batch, test_dataloader, criterion, device, history, class_names)

LAYER NORMALIZATION

In [None]:
class CNNWithLayerNorm(nn.Module):
    def __init__(self, num_classes=8):
        super(CNNWithLayerNorm, self).__init__()

        # --- BLOCK 1 ---
        # Input: [Batch, 3, 28, 28]
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # After conv1 + pool, spatial dimensions are 14x14 with 32 channels.
        # LayerNorm requires the exact shape of the feature map (C, H, W).
        self.ln1 = nn.LayerNorm([32, 14, 14])

        # --- BLOCK 2 ---
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

        # After conv2 + pool, spatial dimensions are 7x7 with 64 channels.
        self.ln2 = nn.LayerNorm([64, 7, 7])

        # --- BLOCK 3 ---
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        # After conv3 + pool, spatial dimensions are 3x3 with 128 channels.
        self.ln3 = nn.LayerNorm([128, 3, 3])

        # --- FLATTEN & FULLY CONNECTED ---
        self.flatten_dim = 128 * 3 * 3
        self.fc = nn.Linear(self.flatten_dim, num_classes)

    def forward(self, x):
        # Block 1
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.ln1(x)  # Apply Layer Norm at the end of the block

        # Block 2
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.ln2(x)

        # Block 3
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.ln3(x)

        # Flatten and classify
        x = x.flatten(1)
        x = self.fc(x)
        return x

# Transfer the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize model
model_ln = CNNWithLayerNorm(num_classes=8).to(device)

In [None]:
# Hyperparameters

# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 45
# Define learning rate
learning_rate = 5e-4

# Initialize optimizer
optimizer = torch.optim.Adam(model_ln.parameters(), lr=learning_rate)
# Define Loss Function
criterion = nn.CrossEntropyLoss()

# Run training
history_ln = run_training(model_ln, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)
class_names = [labels_map[str(i)] for i in range(8)]
# Evaluate
evaluate_and_visualize(model_ln, test_dataloader, criterion, device, history_ln, class_names)

DROPOUT

In [None]:
class CNNWithDropout(nn.Module):
    def __init__(self, num_classes=8, drop_percent=0.5):
        super(CNNWithDropout, self).__init__()

        # Input: 28x28 -> Output: 14x14
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        # Input: 14x14 -> Output: 7x7
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        # Input: 7x7 -> Output: 3x3
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Dropout layer
        self.dropout = nn.Dropout(p=drop_percent)

        # Flatten
        self.flatten_dim = 128 * 3 * 3
        self.fc = nn.Linear(self.flatten_dim, num_classes)

    def forward(self, x):
        # Block 1
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)

        # Block 2
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)

        # Block 3
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.pool(x)

        # Flatten
        x = x.flatten(1)

        # Apply Dropout before the final Fully Connected layer
        x = self.dropout(x)

        # Classification
        x = self.fc(x)
        return x

In [None]:
# Transfer the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 35
# Define learning rate
learning_rate = 1e-3

# Define list of dropout rates to test
dropout_rates = [0.2, 0.5, 0.7]

# Iterate for each dropout rate
for dp in dropout_rates:
    print(f"Training with Dropout: {dp}")

    # Initialize a new model for each dropout rate
    model_with_dropout = CNNWithDropout(num_classes=8, drop_percent=dp).to(device)

    # Initialize optimizer
    optimizer = torch.optim.Adam(model_with_dropout.parameters(), lr=learning_rate)

    # Define Loss Function
    criterion = nn.CrossEntropyLoss()

    history = run_training(model_with_dropout, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)
    class_names = [labels_map[str(i)] for i in range(8)]
    evaluate_and_visualize(model_with_dropout, test_dataloader, criterion, device, history, class_names)

WEIGHT DECAY

In [None]:
# Transfer the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 35
# Define learning rate
learning_rate = 1e-4

# Weight Decay values to test
weight_decay_values = [1e-4, 1e-3, 1e-2]

# Iterate for each Weight Decay value
for wd in weight_decay_values:
    print(f"Training with Weight Decay: {wd}")

    # Initialize model with Dropout Percent = 0.2 (for consistency)
    model_with_weight_decay = CNNWithDropout(num_classes=8, drop_percent=0.2).to(device)

    # Initialize optimizer for this specific Weight Decay value
    optimizer = torch.optim.Adam(
        model_with_weight_decay.parameters(),
        lr=learning_rate,
        weight_decay=wd # Apply Weight Decay in the optimizer
    )

    # Define Loss Function
    criterion = nn.CrossEntropyLoss()

    history = run_training(model_with_weight_decay, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)
    class_names = [labels_map[str(i)] for i in range(8)]
    evaluate_and_visualize(model_with_weight_decay, test_dataloader, criterion, device, history, class_names)

MIX IT UP

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Define the Class with Layer Normalization & Dropout
class CNNLayerNormDropout(nn.Module):
    def __init__(self, num_classes=8, drop_percent=0.2):
        super(CNNLayerNormDropout, self).__init__()

        # Block 1: Conv -> ReLU -> Pool -> LayerNorm
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.ln1 = nn.LayerNorm([32, 14, 14])

        # Block 2: Conv -> ReLU -> Pool -> LayerNorm
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.ln2 = nn.LayerNorm([64, 7, 7])

        # Block 3: Conv -> ReLU -> Pool -> LayerNorm
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.ln3 = nn.LayerNorm([128, 3, 3])

        # Classifier: Flatten -> Dropout -> Linear
        self.flatten_dim = 128 * 3 * 3
        self.fc = nn.Linear(self.flatten_dim, num_classes)
        self.dropout = nn.Dropout(p=drop_percent) # Dropout before the FC layer

    def forward(self, x):
        # Approach: Conv  -> ReLU -> Pool -> LayerNorm
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.ln1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.ln2(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool3(x)
        x = self.ln3(x)

        x = x.flatten(1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42) # Seed for reproducibility

# Fixed parameters for the "Ultimate Combo"
EPOCHS = 40
BEST_WD = 1e-4 # Optimal Weight Decay value from previous experiments
BEST_DROP = 0.2 # Optimal Dropout value from previous experiments

# Learning Rate values to test
learning_rate_values = [1e-4, 1e-3, 5e-3]

# --- Experiment Loop ---
for lr in learning_rate_values:
    print(f" Training with Learning Rate: {lr} ")
    print(f" (Config: LayerNorm, Drop={BEST_DROP}, WD={BEST_WD})")

    # Initialize NEW model in each loop
    model = CNNLayerNormDropout(num_classes=8, drop_percent=BEST_DROP).to(device)

    # Optimizer with current LR and fixed Weight Decay
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=lr,
        weight_decay=BEST_WD
    )

    # Loss Function
    criterion = nn.CrossEntropyLoss()

    # Training
    history = run_training(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs=EPOCHS)

    # Evaluation
    print(f"Evaluating LR: {lr}...")
    class_names = [labels_map[str(i)] for i in range(8)]
    evaluate_and_visualize(model, test_dataloader, criterion, device, history, class_names)

# 2. Transfer Learning με CNN

Feature Extraction

In [None]:
BATCH_SIZE_FE = 64

train_dataloader_fe = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE_FE, shuffle=True)
val_dataloader_fe = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE_FE, shuffle=False)
test_dataloader_fe = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_FE, shuffle=False)

print("DataLoaders are ready!")

In [None]:
# Load pre-trained ResNet18
feature_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# "Freeze" all parameters of the base model
for param in feature_model.parameters():
    param.requires_grad = False

# Replace the classifier head
num_features = feature_model.fc.in_features # Number of incoming features to the last FC layer
feature_model.fc = nn.Linear(num_features, 8) # New FC layer with 8 outputs

# "Unfreeze" the parameters of the new classifier head
for param in feature_model.fc.parameters():
    param.requires_grad = True

# Transfer the model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
feature_model.to(device)

In [None]:
# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 20
# Define learning rate
learning_rate = 1e-3

# We choose optimizer only for the trainable (fc) layers.
optimizer = torch.optim.Adam(feature_model.fc.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training (Feature Extraction)
print(f"Starting Feature Extraction")
history_fe = run_training(feature_model, train_dataloader_fe, val_dataloader_fe, optimizer, criterion, device, EPOCHS)

class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(feature_model, test_dataloader_fe, criterion, device, history_fe, class_names)

Fine Tuning

In [None]:
BATCH_SIZE_FT = 32 # Smaller batch size for fine-tuning

train_dataloader_fine = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE_FT, shuffle=True)
val_dataloader_fine = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)
test_dataloader_fine = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)

print("DataLoaders are ready!")

In [None]:
transfer_model = feature_model # Continue with the model trained in Feature Extraction

# "Freeze" all parameters initially
for param in transfer_model.parameters():
    param.requires_grad = False

# "Unfreeze" the last convolutional blocks (e.g., layer3 and layer4 of ResNet)
for param in transfer_model.layer3.parameters():
    param.requires_grad = True
for param in transfer_model.layer4.parameters():
    param.requires_grad = True

# Also, the classifier head must be unfrozen
for param in transfer_model.fc.parameters():
    param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transfer_model.to(device)

In [None]:
# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 15 # Fewer epochs for fine-tuning, as the model is already close to a solution
# Define learning rate
learning_rate = 1e-4

# We choose optimizer only for the parameters that have been set as trainable (requires_grad=True)
optimizer_fine = torch.optim.Adam(filter(lambda p: p.requires_grad, transfer_model.parameters()), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

print(f"Starting Fine Tuning")
history_tr = run_training(transfer_model, train_dataloader_fine, val_dataloader_fine, optimizer_fine, criterion, device, EPOCHS)

class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(transfer_model, test_dataloader_fine, criterion, device, history_tr, class_names)

RESIZE BEFORE

In [None]:
# 1. Training Transformation (with Data Augmentation)
# Includes Resize to 224x224 and Normalization specifically for ImageNet pre-trained models
train_transform_new = transforms.Compose([
    transforms.Resize(224), # Resize to 224x224 pixels
    transforms.RandomHorizontalFlip(p=0.5),  # 50% probability for horizontal flip
    transforms.RandomRotation(degrees=10),   # Random rotation +/- 10 degrees
    transforms.ToTensor(),                    # Convert to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean and standard deviation
                         std=[0.229, 0.224, 0.225]),
])

# 2. Validation & Test Transformation (WITHOUT Augmentation)
# Includes Resize to 224x224 and Normalization
eval_transform_new = transforms.Compose([
    transforms.Resize(224), # Resize to 224x224 pixels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean and standard deviation
                         std=[0.229, 0.224, 0.225]),
])

# 3. Define Datasets with appropriate transformations
train_dataset = BloodMNIST(
    split='train',
    download=True,
    size=28, # The original dataset is 28x28, but it will be resized with the transform
    transform=train_transform_new
)

val_dataset = BloodMNIST(
    split='val',
    download=True,
    size=28,
    transform=eval_transform_new
)

test_dataset = BloodMNIST(
    split='test',
    download=True,
    size=28,
    transform=eval_transform_new
)

In [None]:
BATCH_SIZE_FE = 64

train_dataloader_fe2 = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE_FE, shuffle=True)
val_dataloader_fe2 = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE_FE, shuffle=False)
test_dataloader_fe2 = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_FE, shuffle=False)

print("DataLoaders are ready!")

In [None]:
# Load pre-trained ResNet18
feature_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# "Freeze" all parameters of the base model
for param in feature_model.parameters():
    param.requires_grad = False

# Replace the classifier head for our problem (8 classes)
num_features = feature_model.fc.in_features
feature_model.fc = nn.Linear(num_features, 8)

# "Unfreeze" the parameters of the new classifier head
for param in feature_model.fc.parameters():
    param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
feature_model.to(device)

In [None]:
# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 20
# Define learning rate
learning_rate = 1e-3

# We choose optimizer only for the trainable (fc) layers.
optimizer = torch.optim.Adam(feature_model.fc.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training (Feature Extraction)
print(f"Starting Feature Extraction")
history_fe = run_training(feature_model, train_dataloader_fe2, val_dataloader_fe2, optimizer, criterion, device, EPOCHS)

class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(feature_model, test_dataloader_fe2, criterion, device, history_fe, class_names)

In [None]:
BATCH_SIZE_FT = 32

train_dataloader_fine2 = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE_FT, shuffle=True)
val_dataloader_fine2 = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)
test_dataloader_fine2 = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)

print("DataLoaders are ready!")

In [None]:
transfer_model = feature_model # Continue with the model trained in Feature Extraction

# "Freeze" all parameters initially
for param in transfer_model.parameters():
    param.requires_grad = False

# "Unfreeze" the last convolutional blocks (e.g., layer3 and layer4 of ResNet)
for param in transfer_model.layer3.parameters():
    param.requires_grad = True
for param in transfer_model.layer4.parameters():
    param.requires_grad = True

# Also, the classifier head must be unfrozen
for param in transfer_model.fc.parameters():
    param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transfer_model.to(device)

In [None]:
# Hyperparameters
# Seed for reproducibility
torch.manual_seed(42)
# Define epochs
EPOCHS = 15
# Define learning rate
learning_rate = 1e-4

# We choose optimizer only for the parameters that have been set as trainable (requires_grad=True)
optimizer_fine = torch.optim.Adam(filter(lambda p: p.requires_grad, transfer_model.parameters()), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

print(f"Starting Fine Tuning")
history_tr = run_training(transfer_model, train_dataloader_fine2, val_dataloader_fine2, optimizer_fine, criterion, device, EPOCHS)

class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(transfer_model, test_dataloader_fine2, criterion, device, history_tr, class_names)

# 3. Μικρός Vision Transformer (DeiT)


In [None]:
# Independent run for easy use
import timm # Import the timm library for Vision Transformers
from medmnist import BloodMNIST

# Create dataset splits (train, validation, test)
# 'size=28' specifies the image resolution (28x28)
train_dataset = BloodMNIST(split='train', download=True, size=28)
val_dataset = BloodMNIST(split='val', download=True, size=28)
test_dataset = BloodMNIST(split='test', download=True, size=28)

In [None]:
# 1. Training Transformation (with Data Augmentation)
# Includes Resize to 224x224 (as required by DeiT) and Normalization
train_transform_new = transforms.Compose([
    transforms.Resize(224), # Resize to 224x224 pixels, necessary for DeiT
    transforms.RandomHorizontalFlip(p=0.5),  # 50% probability for horizontal flip
    transforms.RandomRotation(degrees=10),   # Random rotation +/- 10 degrees
    transforms.ToTensor(),                    # Convert to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean and standard deviation
                         std=[0.229, 0.224, 0.225]),
])

# 2. Validation & Test Transformation (WITHOUT Augmentation)
# Includes Resize to 224x224 and Normalization
eval_transform_new = transforms.Compose([
    transforms.Resize(224), # Resize to 224x224 pixels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean and standard deviation
                         std=[0.229, 0.224, 0.225]),
])

# 3. Define Datasets with appropriate transformations
train_dataset = BloodMNIST(
    split='train',
    download=True,
    size=28,
    transform=train_transform_new
)

val_dataset = BloodMNIST(
    split='val',
    download=True,
    size=28,
    transform=eval_transform_new
)

test_dataset = BloodMNIST(
    split='test',
    download=True,
    size=28,
    transform=eval_transform_new
)

In [None]:
BATCH_SIZE_TIM = 32 # Smaller batch size for ViT

train_dataloader_tim = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE_TIM, shuffle=True)
val_dataloader_tim = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE_TIM, shuffle=False)
test_dataloader_tim = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_TIM, shuffle=False)

print("DataLoaders are ready!")

In [None]:
# Load pre-trained DeiT (Tiny version)
vit_model = timm.create_model('deit_tiny_patch16_224', pretrained=True) # pretrained=True loads weights from ImageNet

# "Freeze" all parameters of the base model
for param in vit_model.parameters():
    param.requires_grad = False

# Adapt Classifier Head
num_in_features = vit_model.head.in_features # Number of incoming features to the head
vit_model.head = nn.Linear(num_in_features, 8) # 8 classes for BloodMNIST

# Transfer the model to the device
vit_model.to(device)

In [None]:
# Hyperparameters
EPOCHS_FE = 15 # Fewer epochs for feature extraction
LR_FE = 1e-3

# Optimizer trains only the classifier head
optimizer_fe = torch.optim.Adam(vit_model.head.parameters(), lr=LR_FE)
criterion = nn.CrossEntropyLoss()

print(f"Starting Feature Extraction (Only Head) for {EPOCHS_FE} epochs...")
history_vit_fe = run_training(vit_model, train_dataloader_tim, val_dataloader_tim, optimizer_fe, criterion, device, EPOCHS_FE)

class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(vit_model, test_dataloader_tim, criterion, device, history_vit_fe, class_names)

In [None]:
BATCH_SIZE_FT = 16
train_loader_ft = DataLoader(train_dataset, batch_size=BATCH_SIZE_FT, shuffle=True)
val_loader_ft = DataLoader(val_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)
test_loader_ft = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE_FT, shuffle=False)

# Unfreeze the last Blocks
for param in vit_model.blocks[-2:].parameters(): # Unfreeze the last 2 transformer blocks
    param.requires_grad = True

# Also, the norm and head must be unfrozen
for param in vit_model.norm.parameters():
    param.requires_grad = True
for param in vit_model.head.parameters():
    param.requires_grad = True

# Fine-Tuning Hyperparamaters
EPOCHS_FT = 10
LR_FT = 1e-4

# Optimizer for the unfrozen parameters
optimizer_ft = torch.optim.Adam(filter(lambda p: p.requires_grad, vit_model.parameters()), lr=LR_FT)
criterion = nn.CrossEntropyLoss()
history_vit_ft = run_training(vit_model, train_loader_ft, val_loader_ft, optimizer_ft, criterion, device, EPOCHS_FT)

# Evaluation
class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(vit_model, test_loader_ft, criterion, device, history_vit_ft, class_names)

# Βελτιώσεις

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Model Definition (CNN with Dropout & Batch Norm)
class CNNWithDropout(nn.Module):
    def __init__(self, num_classes=8, drop_percent=0.5):
        super(CNNWithDropout, self).__init__()
        # Input: 28x28 -> Output: 14x14
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        # Input: 14x14 -> Output: 7x7
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        # Input: 7x7 -> Output: 3x3
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=drop_percent)
        self.flatten_dim = 128 * 3 * 3
        self.fc = nn.Linear(self.flatten_dim, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.flatten(1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)

EPOCHS = 40
LR = 1e-3
WEIGHT_DECAY = 1e-4
DROP_PERCENT = 0.2  # Optimal values

# Model Initialization
model = CNNWithDropout(num_classes=8, drop_percent=DROP_PERCENT).to(device)

# Calculate weights for Class Imbalance
class_counts = torch.tensor([852, 2181, 1085, 2026, 849, 993, 2330, 1643], dtype=torch.float)
weights = 1.0 / class_counts      # Inverse frequency
weights = weights / weights.sum() # Normalize
weights = weights.to(device)

# Define Loss with Label Smoothing
criterion = nn.CrossEntropyLoss(weight=weights, label_smoothing=0.1)

# Optimizer with Weight Decay
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

# Learning Rate Scheduler (Cosine Annealing)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

print("Starting Advanced Experiment: Weighted Loss + Scheduler + WD")

history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

for epoch in range(EPOCHS):
    # Training Step
    train_loss, train_acc = train_loop(model, train_dataloader, optimizer, criterion, device)

    # Validation Step
    val_loss, val_acc, _, _ = test_loop(model, val_dataloader, criterion, device)

    # Scheduler Step (Update LR after validation)
    scheduler.step()

    # Logging
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | LR: {current_lr:.6f}")

# Final Evaluation
print("Evaluating on Test Set...")
class_names = [labels_map[str(i)] for i in range(8)]
evaluate_and_visualize(model, test_dataloader, criterion, device, history, class_names)