**Neural Network for duck bones Image Classification based on tipology**

**Author**: Elisa Paperini, Nevio Dubbini

**License**: CC-BY-SA 4.0

**Year**: 2024 (last version)

**Description**

This script, executed on Google Colab, uses PyTorch to fine-tune a pre-trained VGG16 model with IMAGENET1K_V1 weights for a 5-class classification task. A custom fully connected layer replaces the original classifier's last layer.

The model's performance is evaluated using a training-validation-test set split, balancing computational cost and accuracy. Hyperparameters are optimized via grid search. The Adam optimizer and CrossEntropyLoss functions are employed. The network is trained for 100 epochs.


### Set the environment

In [None]:
# Load libraries

# OS and file handling libraries
import os
import glob
import pathlib
from PIL import Image
import regex as re

# Numerical computing and data manipulation
import numpy as np
import itertools # Useful for iterating over multiple variables
import matplotlib.pyplot as plt # Plotting utilities

# PyTorch for deep learning
import torch
import torch.nn as nn # Neural network modules
import torch.nn.functional as F # Functional API for layers and activation functions
from torch.utils.data import Dataset, DataLoader, Subset # Data loading utilities
from torch.optim import Adam, lr_scheduler # Optimizer and learning rate scheduler

# Torchvision for image processing and pre-trained models
import torchvision
from torchvision import datasets, models, transforms # Datasets, pre-trained models, and data transformations
import torchvision.utils # Utility functions for visualization
import torchvision.datasets as dsets # Alternative dataset module
from torchvision.transforms import v2  # Advanced image transformations (newer API in torchvision)

# Computer vision utilities (OpenCV)
import cv2 # Used for image processing

# Scikit-learn for model evaluation metrics and k-fold cross-validation
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import KFold

# Google Colab-specific library for mounting Google Drive (needed for loading data from Drive)
from google.colab import drive # Only required when using Google Colab

# Library for extracting .zip archives
!pip install pyunpack  # Install pyunpack if not already installed
from pyunpack import Archive  # Used for extracting compressed files

Collecting pyunpack
  Downloading pyunpack-0.3-py2.py3-none-any.whl.metadata (863 bytes)
Collecting easyprocess (from pyunpack)
  Downloading EasyProcess-1.1-py3-none-any.whl.metadata (855 bytes)
Collecting entrypoint2 (from pyunpack)
  Downloading entrypoint2-1.1-py2.py3-none-any.whl.metadata (1.0 kB)
Downloading pyunpack-0.3-py2.py3-none-any.whl (4.1 kB)
Downloading EasyProcess-1.1-py3-none-any.whl (8.7 kB)
Downloading entrypoint2-1.1-py2.py3-none-any.whl (9.9 kB)
Installing collected packages: entrypoint2, easyprocess, pyunpack
Successfully installed easyprocess-1.1 entrypoint2-1.1 pyunpack-0.3


In [None]:
# Checking for availability of a CUDA-enabled GPU and assigns the device accordingly.
# If a GPU is available, it will be used for computations; otherwise, the CPU will be used.
device = torch.device ('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()
print(device)

cpu


In [None]:
# Import the 'drive' module for interacting with Google Drive
from google.colab import drive
# Mount Google Drive to the '/content/drive' directory
drive.mount('/content/drive')

In [None]:
# Unzip the zip folder (replace with your path) to the directory where images are stored ('/tmp').
# The directory is deleted when the session is ended.
Archive('/content/drive/MyDrive/bones_detection_tipology.zip').extractall('/tmp')

### Data preprocessing

In [None]:
# Preprocessing steps applied to training data
train_transform = v2.Compose([
                  v2.Resize(size=(224, 224), antialias=True), # Resize images, using antialiasing
                  v2.RandomRotation(degrees=(-2, 2)), # Randomly rotate images in the [-2. 2] degrees interval
                  v2.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 0.5)), # Apply a Gaussian blur to the images
                  v2.ToTensor(),  # Change the pixel range from 0-255 to 0-1, numpy to tensors
                  v2.ToDtype(torch.float32, scale=True), # Convert the tensor to the torch.float32 data type required for PyTorch
                  v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize the image tensor using the specified mean and standard deviation
                  ])

# Preprocessing steps applied to validation data
val_transform = v2.Compose([
                # v2.ToImage(),  # Convert to tensor (only needed if you had a PIL image) : [0, 255] -> [0, 1]
                v2.Resize(size=(224, 224), antialias=True),  # Resize images, using antialiasing
                v2.RandomRotation(degrees=(-2, 2)), # Randomly rotate images in the [-2. 2] degrees interval
                v2.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 0.5)), # Apply a Gaussian blur to the images
                v2.ToTensor(),  # Change the pixel range from 0-255 to 0-1, numpy to tensors
                v2.ToDtype(torch.float32, scale=True),    # Convert the tensor to the torch.float32 data type required for PyTorch
                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize the image tensor using the specified mean and standard deviation
                ])



In [None]:
# Path to training and validation directory (replace with your paths)
data_dir = '/tmp' # Temporary directory where the data was extracted
train_path = os.path.join(data_dir, 'bones_train') # Path to the training directory
val_path =  os.path.join(data_dir, 'bones_validation') # Path to the validation directory

# Load original images and labels, apply transforms for training and validation
train_data = dsets.ImageFolder(root=train_path, transform=train_transform)
val_data = dsets.ImageFolder(root=val_path, transform=val_transform)

# Loading dataset for training and validation
batch_size_real = 8 # Batch size
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size_real, shuffle=True, num_workers=2, persistent_workers=True) # Create the training data loader
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size_real, shuffle=True, num_workers=2, persistent_workers=True) # Create the validation data loader

class_names_tr = train_data.classes # Extract the class names from the training dataset
class_names_va = val_data.classes # Extract the class names from the validation dataset

In [None]:
# Print information about the validation dataset
print('Validation Dataset:')
print(f'Total samples: {len(val_loader.dataset)}')  # Display the number of samples in the validation set
print(f'Dataset type: {type(val_loader.dataset)}')  # Show the dataset type
print('------')

# Print information about the training dataset
print('Training Dataset:')
print(f'Total samples: {len(train_loader.dataset)}')  # Display the number of samples in the training set
print(f'Dataset type: {type(train_loader.dataset)}')  # Show the dataset type

Validation
Dataset ImageFolder
    Number of datapoints: 367
    Root location: /tmp/bones_validation
    StandardTransform
Transform: Compose(
                 ToImage()
                 Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
                 RandomRotation(degrees=[-2.0, 2.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
                 GaussianBlur(kernel_size=(5, 5), sigma=[0.1, 0.5])
                 ToDtype(scale=True)
                 Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False)
           )
------
Training
Dataset ImageFolder
    Number of datapoints: 1283
    Root location: /tmp/bones_train
    StandardTransform
Transform: Compose(
                 Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
                 RandomRotation(degrees=[-2.0, 2.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
                 GaussianBlur(kernel_si

In [None]:
# Calculate the number of training and validation images
train_count=len(glob.glob(train_path+'/**/*.jpg')) # Count all JPG files recursively in the training directory
val_count=len(glob.glob(val_path+'/**/*.jpg')) # Count all JPG files recursively in the validation directory

print('Number of images in train dataset:', train_count)
print('Number of images in validation dataset:', val_count)

# Retrieve the class categories (labels)
root=pathlib.Path(train_path) # Convert training path to a pathlib object for easier file handling
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()]) # Extract class names from folder names and sort them

print('Class labels:', classes)
print('Total number of classes:', len(classes))

1283 367
['CMC', 'COR', 'HUM', 'TMT', 'TT']
5


### Define a Convolutional Neural Network

In [None]:
# Load the pre-trained VGG16 model with ImageNet weights
model_ft = models.vgg16(weights='IMAGENET1K_V1')

# Freeze convolutional layers and the fully connected block except for its output layer
# Replace the latter to adapt to the new classification task
for param in model_ft.parameters():
    param.requires_grad = False

# Get the number of input features for the final classifier layer
n_inputs = model_ft.classifier[6].in_features

# Replace the last classifier layer with a custom fully connected layer
# This adapts the model to the new classification task with 5 output classes
model_ft.classifier[6] = nn.Linear(in_features=n_inputs, out_features=5)

# Move the model to the specified device (CPU or GPU)
model_ft = model_ft.to(device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:03<00:00, 158MB/s]


### Early Stop Function

In [None]:
# Define a function for Early Stopping.
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        """
        Class to manage early stopping during model training.
        Args:
          patience (int): Number of epochs with worsening validation loss to tolerate before stopping training.
          min_delta (float): Minimum improvement in validation loss considered significant.
          save_path (str): Path to save the model with the best validation loss.
        """
        self.patience = patience # Number of epochs to wait before stopping if no improvement
        self.min_delta = min_delta # Minimum change in validation loss to be considered as an improvement
        self.counter = 0 # Counter to track how many epochs have passed without improvement
        self.min_validation_loss = float('inf') # Initialize the minimum validation loss as infinity

    def early_stop(self, validation_loss):
        """
        Control whether training should be stopped based on the current validation loss.

        Args:
          validation_loss (float): The validation loss of the current epoch.
          model (torch.nn.Module): The model to save if the best validation loss is found.
        Returns:
          bool: True if training should be stopped, False otherwise.
        """
        if validation_loss < self.min_validation_loss:
            # If the validation loss improves, update the minimum loss and reset counter
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            # If validation loss worsens beyond the minimum delta, increase the counter
            self.counter += 1
            if self.counter >= self.patience:
                # Stop training if the patience limit is reached
                return True
        return False # Continue training if conditions for stopping are not met

### Hyperparameters Grid

In [None]:
# Define a grid of hyperparameters
learning_rates = [0.001, 0.0001] # Different learning rates to test
batch_sizes = [32, 64] # Different batch sizes to test
weight_decay= [0.001, 0.0001] # Different weight decay values to test
step_size= [3, 10] # Step size values for learning rate scheduler

# Initialize variables to track the best model configuration
best_accuracy = 0.0 # Stores the highest validation accuracy achieved
best_hyperparams = {} # Dictionary to store the best hyperparameters
num_epochs=100 # Set the number of epochs for training

# Store results for each combination
results = []

# Iterate over all possible combinations of hyperparameters
for lr, batch_size, weight_decay, step_size, in itertools.product(learning_rates, batch_sizes, weight_decay, step_size):
    print(f"Training with lr={lr}, batch_size={batch_size}, weight_decay={weight_decay}, step_size={step_size}")

    # Create data loaders with the current batch size
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True) # Training data loader
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False) # Validation data loader

    # Re-initialize the optimizer with the current learning rate
    optimizer_ft = Adam(model_ft.classifier.parameters(), lr=lr, weight_decay=weight_decay)
    loss_function_ft = nn.CrossEntropyLoss() # Loss function for multi-class classification
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=step_size, gamma=0.1) # Learning rate scheduler
    early_stopper = EarlyStopper(patience=5, min_delta=10) # Initialize early stopping mechanism

    # Training loop
    for epoch in range(num_epochs):
        model_ft.train() # Set model to training mode
        train_accuracy = 0.0
        train_loss = 0.0

        for images, labels in train_loader: # Iterate over training batches
            images, labels = images.to(device), labels.to(device)  # Move data to GPU if available
            optimizer_ft.zero_grad() # Reset gradients
            outputs = model_ft(images) # Forward pass
            loss = loss_function_ft(outputs, labels) # Compute loss
            loss.backward() # Backpropagate
            optimizer_ft.step() # Update weights

            train_loss += loss.cpu().data * images.size(0) # Accumulate training loss
            _, prediction = torch.max(outputs.data, 1) # Get predicted class labels
            train_accuracy += int(torch.sum(prediction == labels.data)) # Count correct predictions

        train_accuracy /= len(train_data) # Compute training accuracy
        train_loss /= len(train_data) # Compute average training loss

        # Evaluate the model on validation set
        model_ft.eval() # Set model to evaluation mode
        val_accuracy = 0.0
        val_loss = 0.0

        with torch.no_grad(): # Disable gradient computation for validation
            for images, labels in val_loader: # Iterate over validation batches
                images, labels = images.to(device), labels.to(device)  # Move data to GPU if available
                outputs = model_ft(images) # Forward pass
                loss = loss_function_ft(outputs, labels) # Compute validation loss
                val_loss += loss.cpu().data * images.size(0) # Accumulate validation loss
                _, prediction = torch.max(outputs.data, 1) # Get predicted class labels
                val_accuracy += int(torch.sum(prediction == labels.data)) # Count correct predictions

        val_accuracy /= len(val_data) # Compute validation accuracy
        val_loss /= len(val_data) # Compute average validation loss

        # Print training and validation metrics
        print(f'Epoch: {epoch} Train Accuracy: {train_accuracy} Train Loss: {train_loss} Val. Accuracy: {val_accuracy} Val. Loss: {val_loss}')

        # Check early stopping condition
        if early_stopper.early_stop(val_loss):
            break # Stop training if validation loss does not improve

        # Save the model if it achieves the best validation accuracy so far
        if val_accuracy > best_accuracy:
            torch.save(model_ft.state_dict(), 'best_checkpoint_ft.model')  # Save the model state
            best_accuracy = val_accuracy # Update best accuracy
            best_hyperparams = {'lr': lr, 'batch_size': batch_size, 'num_epochs': num_epochs} # Store best hyperparameters

    # Append results for the current combination of hyperparameters
    results.append((lr, batch_size, num_epochs, val_accuracy))

# Print the best hyperparameter configuration found
print(f'Best hyperparameters: {best_hyperparams} with validation accuracy: {best_accuracy}')

Training with lr=0.001, batch_size=32, weight_decay=0.001, step_size=3
Epoch: 0 Train Accuracy: 0.7872174590802806 Train Loss: 0.6266056299209595 Val. Accuracy: 0.9809264305177112 Val. Loss: 0.1496143341064453
Epoch: 1 Train Accuracy: 0.9431021044427124 Train Loss: 0.19932575523853302 Val. Accuracy: 0.9727520435967303 Val. Loss: 0.11325333267450333
Epoch: 2 Train Accuracy: 0.9532346063912704 Train Loss: 0.17263907194137573 Val. Accuracy: 0.9727520435967303 Val. Loss: 0.10012005269527435
Epoch: 3 Train Accuracy: 0.9540140296180826 Train Loss: 0.15043985843658447 Val. Accuracy: 0.9700272479564033 Val. Loss: 0.09694067388772964
Epoch: 4 Train Accuracy: 0.9579111457521434 Train Loss: 0.12537121772766113 Val. Accuracy: 0.989100817438692 Val. Loss: 0.05828523635864258
Epoch: 5 Train Accuracy: 0.9649259547934529 Train Loss: 0.107463039457798 Val. Accuracy: 0.9809264305177112 Val. Loss: 0.06496129184961319
Epoch: 6 Train Accuracy: 0.9633671083398285 Train Loss: 0.11636640131473541 Val. Accurac

### Training

In [None]:
# Initialization of lists to store training and validation metrics
summary_loss_train = [] # Stores training loss for each epoch
summary_acc_train = [] # Stores training accuracy for each epoch
summary_loss_val = [] # Stores validation loss for each epoch
summary_acc_val = [] # Stores validation accuracy for each epoch
summary_precision_train = [] # Stores training precision for each epoch
summary_recall_train = [] # Stores training recall for each epoch
summary_f1_train = [] # Stores training F1-score for each epoch
summary_precision_val = [] # Stores validation precision for each epoch
summary_recall_val = [] # Stores validation recall for each epoch
summary_f1_val = [] # Stores validation F1-score for each epoch

# Load the pre-trained VGG16 model with ImageNet weights
model_ft = models.vgg16(weights='IMAGENET1K_V1')

# Freeze all layers of the pre-trained model
for param in model_ft.parameters():
    param.requires_grad = False # Prevents updates to pre-trained weights

# Modify the final fully connected layer for the classification task
n_inputs = model_ft.classifier[6].in_features # Get the input size of the final layer
model_ft.classifier[6] = nn.Linear(in_features=n_inputs, out_features=5) # Replace last layer with 5 output neurons
model_ft = model_ft.to(device) # Move model to GPU if available

# Store results for each combination
results = []

# Create DataLoaders with the current batch size
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True) # Training data loader
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False) # Validation data loader

# Re-initialize the optimizer with the defined hyperparameters
optimizer_ft = Adam(model_ft.classifier.parameters(), lr=0.01, weight_decay=0.001) # Adam optimizer
loss_function_ft = nn.CrossEntropyLoss() # Cross-entropy loss function for classification
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=8, gamma=0.1) # Learning rate scheduler
early_stopper = EarlyStopper(patience=5, min_delta=10) # Early stopping mechanism

best_accuracy = 0.0 # Track the best validation accuracy

# Training loop for 70 epochs
for epoch in range(70):
    model_ft.train() # Set model to training mode
    train_accuracy = 0.0
    train_loss = 0.0
    all_train_labels = []
    all_train_preds = []

    for images, labels in train_loader: # Iterate through training batches
        images, labels = images.to(device), labels.to(device) # Move data to GPU if available
        optimizer_ft.zero_grad() # Reset gradients
        outputs = model_ft(images) # Forward pass
        loss = loss_function_ft(outputs, labels) # Compute loss
        loss.backward() # Backpropagation
        optimizer_ft.step() # Update weights

        train_loss += loss.cpu().data * images.size(0) # Accumulate loss
        _, prediction = torch.max(outputs.data, 1) # Get predicted class
        train_accuracy += int(torch.sum(prediction == labels.data)) # Count correct predictions
        all_train_labels.extend(labels.cpu().numpy()) # Store true labels
        all_train_preds.extend(prediction.cpu().numpy()) # Store predicted labels

    train_accuracy /= len(train_data) # Compute training accuracy
    train_loss /= len(train_data) # Compute average training loss

    # Compute precision, recall, and F1-score for training data
    train_precision = precision_score(all_train_labels, all_train_preds, average='macro')
    train_recall = recall_score(all_train_labels, all_train_preds, average='macro')
    train_f1 = f1_score(all_train_labels, all_train_preds, average='macro')

    # Validation phase
    model_ft.eval() # Set model to evaluation mode
    val_accuracy = 0.0
    val_loss = 0.0
    all_val_labels = []
    all_val_preds = []

    with torch.no_grad(): # Disable gradient computation for validation
        for images, labels in val_loader: # Iterate through validation batches
            images, labels = images.to(device), labels.to(device) # Move data to GPU if available
            outputs = model_ft(images) # Forward pass
            loss = loss_function_ft(outputs, labels) # Compute validation loss
            val_loss += loss.cpu().data * images.size(0) # Accumulate loss
            _, prediction = torch.max(outputs.data, 1) # Get predicted class
            val_accuracy += int(torch.sum(prediction == labels.data)) # Count correct predictions
            all_val_labels.extend(labels.cpu().numpy()) # Store true labels
            all_val_preds.extend(prediction.cpu().numpy()) # Store predicted labels

    val_accuracy /= len(val_data) # Compute validation accuracy
    val_loss /= len(val_data) # Compute average validation loss

    # Compute precision, recall, and F1-score for validation data
    val_precision = precision_score(all_val_labels, all_val_preds, average='macro', zero_division=0)
    val_recall = recall_score(all_val_labels, all_val_preds, average='macro')
    val_f1 = f1_score(all_val_labels, all_val_preds, average='macro')

    # Store training and validation metrics
    summary_loss_train.append(train_loss.item())
    summary_acc_train.append(train_accuracy)
    summary_precision_train.append(train_precision)
    summary_recall_train.append(train_recall)
    summary_f1_train.append(train_f1)

    summary_loss_val.append(val_loss.item())
    summary_acc_val.append(val_accuracy)
    summary_precision_val.append(val_precision)
    summary_recall_val.append(val_recall)
    summary_f1_val.append(val_f1)

    # Print training and validation results for the current epoch
    print(f'Epoch: {epoch} Train Accuracy: {train_accuracy:.4f} Train Loss: {train_loss:.4f} '
          f'Val. Accuracy: {val_accuracy:.4f} Val. Loss: {val_loss:.4f} '
          f'Train Precision: {train_precision:.4f} Train Recall: {train_recall:.4f} Train F1: {train_f1:.4f} '
          f'Val Precision: {val_precision:.4f} Val Recall: {val_recall:.4f} Val F1: {val_f1:.4f}')

    # Apply early stopping if validation loss does not improve
    if early_stopper.early_stop(val_loss):
        break # Stop training

    # Save the model if it achieves the best validation accuracy so far
    if val_accuracy > best_accuracy:
        torch.save(model_ft.state_dict(), 'best_checkpoint_ft.model') # Save model state
        best_accuracy = val_accuracy # Update best validation accuracy

# Print the highest validation accuracy achieved
print(f'Best validation accuracy: {best_accuracy}')

Plots

In [None]:
# Convert the summary_loss_val tensor to a CPU tensor
summary_loss_val_cpu= torch.tensor(summary_loss_val, device = 'cpu')

# Convert the CPU tensor to a Python list
summary_loss_val_cpu_lt=list(summary_loss_val_cpu)
print(summary_loss_val_cpu_lt) # Print the converted validation loss list

# Create a figure with two subplots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15,6)) # Set figure size and create two subplots

# Create a list of integers from 0 to 100 to use as the x-axis values (assuming 100 epochs)
x = [i for i in range(100)]

# Convert summary_acc_train to a list
sommario_acc_train_array = []
for idx in range(len(summary_acc_train)):
    sommario_acc_train_array.append(summary_acc_train[idx]) # Append accuracy values from training

# Convert summary_acc_train to a list
sommario_acc_val_array = [] # Initialize an empty list
for idx in range(len(summary_acc_val)):
    sommario_acc_val_array.append(summary_acc_val[idx]) # Append accuracy values from training

# Plot training and validation loss on the first subplot (ax1)
ax1.plot(x, summary_loss_train [:100], label = 'Training Loss') # Plot training loss
ax1.plot(x, summary_loss_val_cpu_lt [:100], label = 'Validation Loss') # Plot validation loss
ax1.legend() # Add legend to distinguish the curves

# Plot training and validation accuracy on the second subplot (ax2)
ax2.set_title("Accuracy") # Set title for accuracy plot
ax2.plot(x, sommario_acc_train_array, label='Training Accuracy') # Plot training accuracy
ax2.plot(x, sommario_acc_val_array, label='Validation Accuracy') # Plot validation accuracy
ax2.legend() # Add legend

# Alternative way to plot accuracy with a different length
#ax2.plot(x, summary_acc_train [:35], label='Training Accuracy')
#ax2.plot(x, summary_acc_val [:35], label='Validation Accuracy')
#ax2.legend()

# Display the plots
plt.show()

### Inference
We need to evaluate the trained network on the test dataset to determine how well it has learned. Although the model has undergone multiple training iterations, we must verify its effectiveness. This is done by making predictions using the trained network and comparing the predicted class labels with the actual ground-truth labels. If the prediction matches the correct label, the sample is counted as a correct prediction. This evaluation helps assess the model's generalization ability on unseen data.

In [None]:
# Importing test dataset
class ImageDataset(Dataset):
    """
    Custom dataset for loading images and their labels from a specified directory.

    Attributes:
      root (str): Directory where the images are stored.
      transform (callable, optional): Optional transform to be applied on a sample.
      images (list): List of file paths for all images in the directory.
      labels (list): List of labels corresponding to each image.
    """

    def __init__(self, root, transform=None):
        """
        Initialize the dataset with the directory containing images and optional transformations.

        Args:
          root (str): Directory containing image files.
          transform (callable, optional): A function/transform that takes in an image and returns a transformed version.
        """
        self.root = root # Store the root directory
        self.transform = transform # Store the transformation function
        self.images = [os.path.join(root, file) for file in os.listdir(root)] # List of all image file paths
        self.labels = [self.get_label(file) for file in os.listdir(root)] # Extract labels from filenames

    def __len__(self):
        """
        Return the total number of images in the dataset.

        Returns:
          int: Number of images.
        """
        return len(self.images) # Return the total number of images

    def __getitem__(self, idx):
        """
        Retrieve an image and its label at the given index, and optionally apply transformations.

        Args:
          idx (int): Index of the image to retrieve.

        Returns:
          tuple: Transformed image and its label as a tensor.
        """
        img_path = self.images[idx] # Get the image file path
        image = Image.open(img_path).convert("RGB") # Open image and convert to RGB format

        if self.transform:
            image = self.transform(image) # Apply transformations if specified

        label = self.labels[idx] # Retrieve the corresponding label
        return image, torch.tensor(label) # Return the image and label as a tensor

    def get_label(self, filename):
        """
        Extract the label from the filename based on predefined class names.

        Args:
          filename (str): Name of the image file.

        Returns:
          int: Numeric label corresponding to the class name.
        """
        # Define the regex pattern for matching class names
        patterns = r'CMC|COR|HUM|TMT|TT'

        # Find the first match of the pattern in the filename
        match = re.search(patterns, filename)

        # Define the mapping from class names to indices
        class_to_idx = {'CMC': 0, 'COR': 1, 'HUM': 2, 'TMT': 3, 'TT': 4}

        if match:
          class_name = match.group(0)  # Extract the matched class name
          return class_to_idx[class_name] # Return the corresponding label index
        else:
          raise ValueError("Class name not found in the filename") # Raise an error if no class name is found

# Loading dataset for testing

# Path to test directory
test_path =  os.path.join(data_dir, 'bones_test')

# If images are divided into different folders, use ImageFolder
# test_data = dsets.ImageFolder(root=test_path, transform=val_transform)
# test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=2)

# If images are all in a single directory without class subfolders, use the custom ImageDataset (replace with your path)
test_dataset = ImageDataset(root='/tmp/bones_test', transform= val_transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=2, persistent_workers=True)

# Count the number of test images
test_count=len(glob.glob(test_path+'/*.jpg'))
print(test_count) # Print the total number of test images

In [None]:
# Load the checkpoint from the specified file
checkpoint = torch.load('best_checkpoint_ft.model')  # Load saved model weights from the checkpoint file

# Initialize a VGG16 model with pre-trained ImageNet weights
model_ft = models.vgg16(weights='IMAGENET1K_V1')  # Load pre-trained VGG16 model

# Modify the classifier to have 5 output classes
n_inputs = model_ft.classifier[6].in_features  # Get the number of input features for the last layer
model_ft.classifier[6] = nn.Linear(n_inputs, 5)  # Replace the last layer to match the number of classes

# Load the model state (weights) from the checkpoint into the initialized model
model_ft.load_state_dict(checkpoint)  # Restore the saved model parameters

In [None]:
# Calculate overall metrics for the test set

model_ft.eval() # Set the model to evaluation mode (disables dropout and batch normalization)
test_accuracy = 0.0 # Initialize test accuracy counter
test_loss = 0.0 # Initialize test loss counter
all_test_labels = [] # List to store all ground-truth labels
all_test_preds = [] # List to store all predicted labels

with torch.no_grad(): # Disable gradient computation for efficiency
    for images, labels in test_loader: # Iterate over test dataset batches
        images, labels = images.to(device), labels.to(device) # Move data to GPU/CPU

        outputs = model_ft.to(device)(images) # Perform forward pass

        loss = loss_function_ft(outputs, labels) # Compute loss
        test_loss += loss.cpu().data * images.size(0) # Accumulate total test loss

        _, prediction = torch.max(outputs.data, 1) # Get predicted class with highest probability
        test_accuracy += int(torch.sum(prediction == labels.data)) # Count correct predictions

        all_test_labels.extend(labels.cpu().numpy()) # Store true labels
        all_test_preds.extend(prediction.cpu().numpy()) # Store predicted labels

# Compute average accuracy and loss over the entire test dataset
test_accuracy /= len(test_dataset)
test_loss /= len(test_dataset)

# Compute additional classification metrics
test_precision = precision_score(all_test_labels, all_test_preds, average='macro') # Compute precision
test_recall = recall_score(all_test_labels, all_test_preds, average='macro') # Compute recall
test_f1 = f1_score(all_test_labels, all_test_preds, average='macro') # Compute F1-score

# Print test performance metrics
print(f'Test Accuracy: {test_accuracy:.4f} Test Loss: {test_loss:.4f} '
      f'Test Precision: {test_precision:.4f} Test Recall: {test_recall:.4f} Test F1: {test_f1:.4f}')

# Print the classification report and confusion matrix for the test set
print('Classification Report:')
print(classification_report(all_test_labels, all_test_preds, target_names=classes)) # Generate detailed report

print('Confusion Matrix:')
print(confusion_matrix(all_test_labels, all_test_preds)) # Print confusion matrix

In [None]:
# Confusion matrix plot

cm = confusion_matrix(all_test_labels, all_test_preds) # Compute confusion matrix
classes = ['CMC', 'COR', 'HUM', 'TMT', 'TT'] # Class labels for the confusion matrix

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix', cmap=plt.cm.Greens):
    """
    This function prints and plots the confusion matrix.

    Args:
      cm (array): Confusion matrix data.
      classes (list): List of class labels.
      normalize (bool): Whether to normalize the confusion matrix.
      title (str): Title of the plot.
      cmap: Colormap for the plot.

    If `normalize=True`, the confusion matrix will be displayed as percentages.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # Normalize by row (true labels)
        print("Normalized Confusion Matrix")
    else:
        print("Confusion Matrix, without Normalization")

    plt.figure(figsize=(7, 5)) # Set figure size
    plt.imshow(cm, interpolation='nearest', cmap=cmap) # Display confusion matrix as an image
    plt.title(title) # Set title
    plt.colorbar() # Add color bar to indicate values

    tick_marks = np.arange(len(classes)) # Get tick positions for class labels
    plt.xticks(tick_marks, classes, rotation=45) # Set class names on x-axis
    plt.yticks(tick_marks, classes) # Set class names on x-axis

    # Define text format and threshold for text color contrast
    fmt = '.2f' if normalize else 'd' # Format numbers as decimals if normalized, otherwise as integers
    thresh = cm.max() / 2. # Set threshold for text color contrast

    # Iterate through confusion matrix values and display them in the plot
    for i, j in np.ndindex(cm.shape):
        plt.text(j, i, format(cm[i, j], fmt), # Display text in each cell
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black") # Use contrasting colors for better visibility

    plt.tight_layout() # Adjust layout for better fit
    plt.ylabel('True Label') # Label for y-axis
    plt.xlabel('Predicted Label') # Label for x-axis
    plt.show() # Display the plot

# Plot non-normalized confusion matrix
plot_confusion_matrix(cm, classes, normalize=False)