In [None]:
import numpy as np 
from tqdm.notebook import tqdm
import pandas as pd 
import os

In [None]:
os.listdir('data')

In [None]:
# load the metadata
data = pd.read_csv('data/HAM10000_metadata.csv')

In [None]:
data['dx'].value_counts()

In [None]:
# number of classes
data['dx'].nunique()

In [None]:
!pip install barbar pytorch-ignite

In [None]:
pip install -q torch-summary

In [None]:
!pip install tensorboard

In [None]:
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

In [None]:
import torch
import os
import random
import torchvision
import pandas as pd
import torch.nn as nn
import numpy as np
from torchvision import transforms, datasets
# from shutil import copyfile, move
# from torch.utils.tensorboard import SummaryWriter
# from barbar import Bar
# from torchsummary import summary
# from ignite.metrics import Accuracy
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, roc_auc_score

In [None]:
model2 = torchvision.models.resnet50(pretrained=True)#.to(device)

model2.fc = nn.Linear(2048, 7)#.to(device)

In [None]:
model2

# split the data into train and validation

In [None]:
def split_data(source, training, validation, split_size):    
    files = []
    for filename in os.listdir(source):
        file = source + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * split_size)
    validation_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    validation_set = shuffled_set[:validation_length]

    for filename in training_set:
        this_file = source + filename
        destination = training + filename
        copyfile(this_file, destination)

    for filename in validation_set:
        this_file = source + filename
        destination = validation + filename
        copyfile(this_file, destination)

# create necessary folders to store the specific images

In [None]:
try:
    os.mkdir('skin_models/')
    os.mkdir('HAM10000')
    os.mkdir('orig/')
    os.mkdir('orig/0.MEL/')
    os.mkdir('orig/1.NV/')
    os.mkdir('orig/2.BCC/')
    os.mkdir('orig/3.AKIEC/')
    os.mkdir('orig/4.BKL/')
    os.mkdir('orig/5.DF/')
    os.mkdir('orig/6.VASC/')

    os.mkdir('skin/')
    os.mkdir('skin/training/')
    os.mkdir('skin/validation/')
    os.mkdir('skin/training/0.MEL/')
    os.mkdir('skin/training/1.NV/')
    os.mkdir('skin/training/2.BCC/')
    os.mkdir('skin/training/3.AKIEC/')
    os.mkdir('skin/training/4.BKL/')
    os.mkdir('skin/training/5.DF/')
    os.mkdir('skin/training/6.VASC/')
    os.mkdir('skin/validation/0.MEL/')
    os.mkdir('skin/validation/1.NV/')
    os.mkdir('skin/validation/2.BCC/')
    os.mkdir('skin/validation/3.AKIEC/')
    os.mkdir('skin/validation/4.BKL/')
    os.mkdir('skin/validation/5.DF/')
    os.mkdir('skin/validation/6.VASC/')
except:
    print('Folders already created.')

In [None]:
orig_all = 'HAM10000/'
source_MEL = 'orig/0.MEL/'
source_NV = 'orig/1.NV/'
source_BCC = 'orig/2.BCC/'
source_AKIEC = 'orig/3.AKIEC/'
source_BKL = 'orig/4.BKL/'
source_DF = 'orig/5.DF/'
source_VASC = 'orig/6.VASC/'

training_MEL = 'skin/training/0.MEL/'
training_NV = 'skin/training/1.NV/'
training_BCC = 'skin/training/2.BCC/'
training_AKIEC = 'skin/training/3.AKIEC/'
training_BKL = 'skin/training/4.BKL/'
training_DF = 'skin/training/5.DF/'
training_VASC = 'skin/training/6.VASC/'
validation_MEL = 'skin/validation/0.MEL/'
validation_NV = 'skin/validation/1.NV/'
validation_BCC = 'skin/validation/2.BCC/'
validation_AKIEC = 'skin/validation/3.AKIEC/'
validation_BKL = 'skin/validation/4.BKL/'
validation_DF = 'skin/validation/5.DF/'
validation_VASC = 'skin/validation/6.VASC/'

In [None]:
part1 = "data/HAM10000_images_part_1"
part2 = "data/HAM10000_images_part_2"

for part in [part1,part2]:
    for img in tqdm(os.listdir(part)):
        copyfile(os.path.join(part, img), os.path.join('HAM10000', img))    

In [None]:
data

In [None]:
data.columns

In [None]:
len(data)

In [None]:
# drop all duplicate photos by lesion_id to make all our photos iid (independant and identically distributed), 
# so that we wont overfit

data = data.drop_duplicates(subset='lesion_id')

In [None]:
len(data)


In [None]:
image_names = [x for x in data['image_id']]
diagnosis = [x for x in data['dx']]

In [None]:
print(len(image_names))
print(len(diagnosis))

In [None]:
print(image_names[0])
print(diagnosis[0])

In [None]:
pd.unique(data['dx'])

# Copy image to their particular locations

In [None]:
for index, image in tqdm(enumerate(image_names)):
    image = image + '.jpg'
    if diagnosis[index] == 'mel':
        copyfile(os.path.join(orig_all, image), os.path.join(source_MEL, image))
    elif diagnosis[index] == 'nv':
        copyfile(os.path.join(orig_all, image), os.path.join(source_NV, image))
    elif diagnosis[index] == 'bcc': 
        copyfile(os.path.join(orig_all, image), os.path.join(source_BCC, image))
    elif diagnosis[index] == 'akiec': 
        copyfile(os.path.join(orig_all, image), os.path.join(source_AKIEC, image))
    elif diagnosis[index] == 'bkl':
        copyfile(os.path.join(orig_all, image), os.path.join(source_BKL, image))
    elif diagnosis[index] == 'df':  
        copyfile(os.path.join(orig_all, image), os.path.join(source_DF, image))
    elif diagnosis[index] == 'vasc': 
        copyfile(os.path.join(orig_all, image), os.path.join(source_VASC, image))       

In [None]:
# Print amount of pictures for each class
print(len(os.listdir(source_MEL)))
print(len(os.listdir(source_NV)))
print(len(os.listdir(source_BCC)))
print(len(os.listdir(source_AKIEC)))
print(len(os.listdir(source_BKL)))
print(len(os.listdir(source_DF)))
print(len(os.listdir(source_VASC)))

In [None]:
# split each individual class of photos into train and val datasets 80/20%

split_size = 0.8
split_data(source_MEL, training_MEL, validation_MEL, split_size)
split_data(source_NV, training_NV, validation_NV, split_size)
split_data(source_BCC, training_BCC, validation_BCC, split_size)
split_data(source_AKIEC, training_AKIEC, validation_AKIEC, split_size)
split_data(source_BKL, training_BKL, validation_BKL, split_size)
split_data(source_DF, training_DF, validation_DF, split_size)
split_data(source_VASC, training_VASC, validation_VASC, split_size)

In [None]:
# Print amount of pictures for each class after split, train/val.
print(len(os.listdir(training_MEL)))
print(len(os.listdir(validation_MEL)))
print(len(os.listdir(training_NV)))
print(len(os.listdir(validation_NV)))
print(len(os.listdir(training_BCC)))
print(len(os.listdir(validation_BCC)))
print(len(os.listdir(training_AKIEC)))
print(len(os.listdir(validation_AKIEC)))
print(len(os.listdir(training_BKL)))
print(len(os.listdir(validation_BKL)))
print(len(os.listdir(training_DF)))
print(len(os.listdir(validation_DF)))
print(len(os.listdir(training_VASC)))
print(len(os.listdir(validation_VASC)))

Inherent regularization of ResNet50 due to its Bottleneck layers and Batch normalization together with weight decay and random data augmentations is enough to prevent model from overfitting even on this imbalance dataset

In [None]:
traindir = r'skin/training/'
valdir = r'skin/validation/'

# challenging aspects:
Dataset consists of 7470 unique skin neoplasm photos with height=450 and width=600 pixels.
- After analyzing the images, I observed that in most cases, the skin neoplasm is positioned at the center and occupies the central 450x450 portion of the image, with only a few exceptions. Therefore, I conclude that there is no need to compress the photos along the width axis, which might result in the loss of important details, as our primary subject of interest is nearly always within the central crop. therefore for Resnet-50 to work, need to make it 240x240

# Data augmentation: a technique to increase the diversity in training set by applying random (but realistic) transformations

Resnet-50 requires input images to be of size 224 × 224 
we will first resize to 224x280 to avoid loss of info on width axis and then crop to 224x224 piece.

Apply Random rotation and Horizontal/Vertical flipping. Since skin neoplasms can be in a lot of different shapes, by rotating and horizontally flipping them we will still get skin neoplasms that we would expect our model to classify correctly. These two transformations will help our model greatly. They will inflate our dataset, increase model robustness (especially since we have some classes with very small representation), help with better generalization and overfitting prevention.

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 280)),
    transforms.CenterCrop((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply(transforms=[transforms.RandomRotation(degrees=(-180, 180))], p=0.99),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

val_transforms = transforms.Compose([
    transforms.Resize((224, 280)),
    torchvision.transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

train_dataset = datasets.ImageFolder(
    traindir, transform=train_transforms)

val_dataset = datasets.ImageFolder(
    valdir, transform=val_transforms)

In [None]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=64, shuffle=True,
    pin_memory=False, drop_last=False)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=64, shuffle=True,
    pin_memory=False, drop_last=False)

In [None]:
print(val_dataset.class_to_idx)
print(train_dataset.class_to_idx)

# resnet50 Model

In [None]:
device = 'cuda'

model = torchvision.models.resnet50(pretrained=True).to(device)

model.fc = nn.Linear(2048, 7).to(device)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.0001)

epochs = 5 # 200 or 250 or 300

writer = SummaryWriter(log_dir='skin_logs/', filename_suffix="skin50")

In [None]:
# Shows info about model
summary(model, input_size=(3, 224, 224))

# Early stops the training if validation loss doesn't improve after a given patience

In [None]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when monitored metric decrease.'''
        if self.verbose:
            self.trace_func(f'Monitored metric has improved ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), f'skin_models/skinmodel50.pt') 
        self.val_loss_min = val_loss


# Model Training 

In [None]:
early_stopping = EarlyStopping(patience=30, verbose=True)

for epoch in range(epochs):
    train_loss = 0.00
    val_loss = 0.00
    train_accuracy = Accuracy()
    val_accuracy = Accuracy()
    print(f'Epoch {epoch+1}')

    # Training loop
    for idx, (inputs, labels) in enumerate(Bar(train_loader)):
        model.train()
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad() 
        outputs = model(inputs) 
        loss = criterion(outputs, labels)
        loss.backward() 
        optimizer.step() 
        train_loss += loss.item()
        train_accuracy.update((nn.functional.softmax(outputs, dim=1), labels))
    print(f"Train Accuracy: {train_accuracy.compute()}")
    train_loss /= len(train_loader)
    train_loss_formated = "{:.4f}".format(train_loss)

    # Validation loop
    with torch.no_grad():
        for inputs, labels in val_loader:
            model.eval()           
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_accuracy.update((nn.functional.softmax(outputs, dim=1), labels))
    print(f"Val Accuracy: {val_accuracy.compute()}")
    val_loss /= len(val_loader)
    val_loss_formated = "{:.4f}".format(val_loss)
    print(f'Training Loss: {train_loss_formated}')
    print(f"Validation Loss: {val_loss_formated}")

    # TensorBoard writer 
    writer.add_scalar('Loss/train', train_loss, epoch+1)
    writer.add_scalar('Loss/val', val_loss, epoch+1)
    writer.add_scalar('Accuracy/train', train_accuracy.compute(), epoch+1)
    writer.add_scalar('Accuracy/val', val_accuracy.compute(), epoch+1)

    # Early Stopping
    early_stopping(val_loss, model)       
    if early_stopping.early_stop:
        print("Early stopping")
        break
        
# load the last checkpoint with the best model
model.load_state_dict(torch.load('skin_models/skinmodel50.pt'))   

In [None]:
num_classes = 7

predlist = torch.zeros(0,dtype=torch.long, device='cpu')
lbllist = torch.zeros(0,dtype=torch.long, device='cpu')
predlistauc = torch.zeros(0,dtype=torch.long, device='cpu')
with torch.no_grad():
    for i, (inputs, classes) in enumerate(val_loader):
        model.eval()
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        # Append batch prediction results
        predlist=torch.cat([predlist,preds.view(-1).cpu()])
        lbllist=torch.cat([lbllist,classes.view(-1).cpu()])
        predlistauc = torch.cat([predlistauc,nn.functional.softmax(outputs, dim=1).cpu()])
predlist = predlist.numpy()
lbllist = lbllist.numpy()
predlistauc = predlistauc.numpy()

# Confusion matrix, classification report and AUC
conf_mat=confusion_matrix(lbllist, predlist)
target_names = ['MEL','NV','BCC','AKIEC','BKL','DF','VASC',]
ConfusionMatrixDisplay(conf_mat, display_labels=target_names).plot(values_format="d")
print(classification_report(lbllist, predlist, target_names=target_names))
lbllist_one_hot = nn.functional.one_hot(torch.tensor([lbllist]), num_classes=num_classes)
every_auc = roc_auc_score(lbllist_one_hot.view([predlistauc.shape[0], predlistauc.shape[1]]), 
                                          predlistauc, multi_class='ovr', average=None)
for i, every in enumerate(target_names):
    print(f'AUC of class {every} = {every_auc[i]}')

# For better validation robustness lets spin validation dataset with random rotations and horizontal flip and then check the metrics again

In [None]:
val_transforms = transforms.Compose([
    transforms.Resize((224, 280)),
    torchvision.transforms.CenterCrop((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply(transforms=[transforms.RandomRotation(degrees=(-180, 180))], p=0.99),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


val_dataset = datasets.ImageFolder(
    valdir, transform=val_transforms)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=64, shuffle=True,
    pin_memory=False, drop_last=False)

In [None]:
num_classes = 7

predlist=torch.zeros(0,dtype=torch.long, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
predlistauc = torch.zeros(0,dtype=torch.long, device='cpu')
for n in range(5):
    with torch.no_grad():
        for i, (inputs, classes) in enumerate(val_loader):
            model.eval()
            inputs = inputs.to(device)
            classes = classes.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            # Append batch prediction results
            predlist=torch.cat([predlist,preds.view(-1).cpu()])
            lbllist=torch.cat([lbllist,classes.view(-1).cpu()])
            predlistauc = torch.cat([predlistauc,nn.functional.softmax(outputs, dim=1).cpu()])
predlist = predlist.numpy()
lbllist = lbllist.numpy()
predlistauc = predlistauc.numpy() 

# Confusion matrix, classification report and AUC
conf_mat=confusion_matrix(lbllist, predlist)
target_names = ['MEL','NV','BCC','AKIEC','BKL','DF','VASC',]
ConfusionMatrixDisplay(conf_mat, display_labels=target_names).plot(values_format="d")
print(classification_report(lbllist, predlist, target_names=target_names))
lbllist_one_hot = nn.functional.one_hot(torch.tensor([lbllist]), num_classes=num_classes)
every_auc = roc_auc_score(lbllist_one_hot.view([predlistauc.shape[0], predlistauc.shape[1]]), 
                                          predlistauc, multi_class='ovr', average=None)
for i, every in enumerate(target_names):
    print(f'AUC of class {every} = {every_auc[i]}')

In the end we were able to achieve 99% average F1 score and AUCs close to 100%!
And what is interesting, even on 2 classes with smallest representations (only 78 and 58 training images) we still were able to achieve 100 F1 score in both cases.
I believe that the most important reason for the perfect result is well chosen data augmentations that helped not only to enrich our dataset, but also prevent overfitting.