In [None]:
# Program Outline
# ===============

# 1. Read CSV file of only benign and malignant images
# 2. Train-test split
# 3. Instantiate VGG16 model
# 4. Prepare for model training
# 5. Train and save model, printing metrics/results
# 6. Test model
# 7. Print results


In [None]:
# Import all relevant packages here
# =================================

from google.colab import auth
from google.colab import drive
import pandas as pd
import numpy as np
import os
from PIL import Image
from sklearn import preprocessing
import torch
from sklearn.model_selection import train_test_split
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import threading
import itertools
import sys
import time

In [None]:
# Standalone functions
# ====================

def load_images(path, names):
  '''Load all images from fitz17k denoted by a list of image names'''
  image_list = []
  for i in range (0,len(X_names)):
    img_file = os.path.join(path, X_names[i]+'.jpg')
    image = np.array(Image.open(img_file))
    # Normalise image into range 0-1
    if (np.max(image)-np.min(image) != 0):
      image = (image-np.min(image))/(np.max(image)-np.min(image))

    # Append to list of images
    image_list.append(image)

  # Convert image list to np array, and return
  image_array = np.asarray(image_list)

  return image_array

In [None]:
# Authenticate user
auth.authenticate_user()

In [None]:
# Unmount first if previously mounted
try:
    drive.flush_and_unmount()
except:
    pass

# Mount Google drive
drive.mount('/content/drive/')

dp = '/content/drive/My Drive/Colab Notebooks/AKproject'

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive/


In [None]:
# 1. Read CSV file of only benign and malignant images
# ====================================================

csv_name = os.path.join(dp, 'fitzpatrick17k_B&Monly.csv')
df = pd.read_csv(csv_name)
df.head()
data = df.to_numpy()
X = data[:,0]
y_string = data[:,5]
print(X[0:5])
print(y_string[0:5])

# Convert strings in y to numbered labels
y = preprocessing.LabelEncoder().fit_transform(y_string)
print(y[0:5])

# Convert y to tensor
y = torch.from_numpy(y)

# Print sizes
print(f"Size of X: {X.shape[0]}")
print(f"Size of y: {y.shape[0]}")

['d2bac3c9e4499032ca8e9b07c7d3bc40' '45f7fe0e10214e32e890cad9d29d4811'
 'b87804452f60aa162a6d29c0f66a2466' '4c3f795cf8eb72b946f9bd2642cf23c1'
 '99247c9fe486aa9ab71686c8e676c135']
['benign' 'malignant' 'malignant' 'malignant' 'benign']
[0 1 1 1 0]
Size of X: 4497
Size of y: 4497


In [None]:
# 2. Train-test split
# ===================

# Stratify train-test split by labels, 25% test set size
stratify = y
test_size = 0.25
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=stratify, test_size=test_size)

print(f"Size of X_train: {X_train.shape[0]}")
print(f"Size of X_test: {X_test.shape[0]}")
print(f"Size of y_train: {y_train.shape[0]}")
print(f"Size of y_test: {y_test.shape[0]}")

Size of X_train: 3372
Size of X_test: 1125
Size of y_train: 3372
Size of y_test: 1125


# Instantiating a Model + Image Transforms
We're going to follow  the architecture outlined in the Fitzpatrick 17k's debut paper: https://arxiv.org/pdf/2104.09957


The model used in the paper is VGG16 with all gradients frozen bar the final dense layer, which was modified from

- nn.Linear(4096, 1000, bias=True)

to

- nn.Sequential(nn.Linear(4096, 256), nn.ReLU(), nn.Dropout(0.4), nn.Linear(256, len(label_codes)), nn.LogSoftmax(dim=1))

They also make use of the Adam optimizer and Negative Log-Likelihood loss function, which does require explicit denotion of the Softmax activation function in the final dense layer.

As for the image transforms for the purpose of data augmentation:
- Conversion to Python Imaging Library image object (compatibility)
- Random crop over [80%,100%] of the image area, before resizing to 256×256
- Random rotation up to ±15°
- Random brightness, contrast, saturation and hue shift
- Random left-to-right image flip, 50% chance
- Crop the centre 224×224 region from the now 256×256 image to match ImageNet standards. This is because VGG16 architecture by default assumes images are of this dimension
- Converts the now PIL image to a PyTorch tensor, giving it shape:[Colours,Height,Width] and normalised pixel values in [0,1], since any PyTorch model requires a tensor as input
- Normalisation of the image by the top row of means and bottom row of standard deviations in accordance with ImageNet means and SDs. Not that this cannot be used on PIL objects but tensors only, and is done per channel i.e. (pseudocode)

  NormalisationTensor = [[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]]
  meanNorm = NormalisationTensor[0]  # [0.485, 0.456, 0.406]
  stdNorm = NormalisationTensor[1]   # [0.229, 0.224, 0.225]

  For input tensor t and each colour channel n
      out[n] = (t[n] - meanNorm[n]) / stdNorm[n]

In [None]:
# 3. Instantiate VGG16 model
# ==========================

model = models.vgg16(pretrained=True, progress=True)
# Pretraining makes training faster as the model isn't completely naive
# Progress bar to show download progress
# Model download only has to be done once per runtime

# We're going to follow NN architecture identical to that from the Fitzpatrick
# 17k paper, but modify it for binary classification
print(f"Number of label types: {len(y.unique())}") # Sanity check
for param in model.parameters():
  param.requires_grad = False # Freeze all gradients in the pretrained VGG16 model
model.classifier[6] = nn.Sequential(nn.Linear(4096, 256),
                                    nn.ReLU(),
                                    nn.Dropout(0.4),
                                    nn.Linear(256, len(y.unique())),
                                    nn.Sigmoid()) # Use sigmoid for binary classification

# Print number of parameters
total_params = sum(p.numel() for p in model.parameters())
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total number of parameters: {total_params}")
print(f"Number of trainable parameters: {total_trainable_params}")
print(f"Percentage of trainable parameters: {np.round(total_trainable_params*100/total_params, 2)}%")

# Instantiate loss function and optimizer
loss_fun = nn.NLLLoss()
optim = torch.optim.Adam(model.classifier[6].parameters())

# Check if CUDA is available, set device, empty cache, move model and loss function to device
CUDA = torch.cuda.is_available()
print(f"CUDA Status: {CUDA}")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if CUDA:
  torch.cuda.empty_cache()
model.to(device)
loss_fun = loss_fun.to(device)

Number of label types: 2
Total number of parameters: 135309890
Number of trainable parameters: 1049346
Percentage of trainable parameters: 0.78%
CUDA Status: False


By modifying the final dense layer, we also "unfreeze" it - so during
training, only the final layer of the classifier network is trained. This is for a few reasons:
- Faster training - fewer parameters to update during backpropagation
- Transfer learning - the pretrained VGG16 learned good feature extractors from ImageNet already, so keeping these weights is preferred and helps keep the model from overfitting to the training data

In [None]:
# 4. Prepare for Model Training
# ===============================

# Create list of image transforms for data augmentation
# We will take this straight from the Fitzpatrick 17k paper
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(),
    transforms.RandomHorizontalFlip(),
    transforms.CenterCrop(size=224),  # Image net standards
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Set number of epochs, batch size
n_epochs = 1
batch_size = 256
# There are 3372 training samples, so 14 epochs required for a full pass with a batch size of 256