
Evaluation Metrics
=============================

Adapted from: **Pytorch_CNN_template** <https://colab.research.google.com/drive/1iDfYtWPQ4ku51Rn4-FEy4JviiyRKDekr>


Note: the following code only calculates accuracy for now. We are hoping to add more metrics(precision, recall, f1 core) ASAP

## Import Packages and Set Values

In [24]:
from __future__ import print_function, division
#import pakages
%matplotlib inline

import os
import random
import time
import copy
import torch
import pandas as pd
from skimage import io, transform, color
from sklearn import metrics
import sklearn.utils.class_weight
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import utils
from torchvision.io import read_image
import torchvision.transforms as T
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

# Detect if we have a GPU available
global device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [25]:
#Folder holding all images, and the csv file with the labels of those images
#a helper file will be created during the data cleaning process and saved as encoded_fitz17k.csv. this allows it to be inspected if necessary
image_dir = "data/finalfitz17k/"
image_labels_csv = "fitzpatrick17k.csv"
helper_file_folder = ""

#data to classify with (removes columns not related to image and specific data)
#choose one of 'three cond', 'nine cond', 'all skin cond', or 'skin tone'
#if none selected or improperly entered, will default to 'three cond'
to_classify_on = 'three cond'

#'skewed', 'balanced', or 'small' dataset:
#NOTE: balanced dataset does not necessarily have a balanced number of each skin type across skin types
#NOTE: balanced dataset cuts dataset from about N=16,000 to about N=3,800 (lots of downsampling, which is not ideal)
#NOTE: 'small' dataset is a small sample of 4000 images intended for testing without the... bulk
#if none selected or improperly entered, will default to 'skewed'
dataset_bias = 'skewed'

#can be one of 'resnet', 'alexnet', 'vgg', 'squeezenet', 'densenet', or 'inception'
model_name = 'resnet'

#Number of classes in the dataset
#NOTE: make sure this matches the number of conditions selected for 'to_classify_on'
num_classes = 3

#Batch size for training (change depending on how much memory you have)
batch_size = 8

#Number of epochs to train for 
num_epochs = 15

#Flag for feature extracting. When False, we finetune the whole model, when True we only update the reshaped layer params
feature_extract = True

## Model Training and Validation Code


The ``train_eval_model`` function handles the training and validation of a given model. As input, it takes a PyTorch model, two
dataloaders for training and validation, a loss function, an optimizer, a specified number of epochs to train and validate for, and a boolean flag for when the model is an Inception model. The *is_inception* flag is used to accomodate the *Inception v3* model, as that architecture uses an auxiliary output and the overall model loss respects both the auxiliary output and the final output, as described `here <https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958>`__.
The function trains for the specified number of epochs and after each
epoch runs a full validation step. It also keeps track of the best
performing model (in terms of validation accuracy), and at the end of
training returns the best performing model. After each epoch, the
training and validation accuracies are printed.

In [26]:
def train_eval_model(model_save_name, model, train_dl, val_dl, criterion, optimizer, num_epochs=25, is_inception=False):
    """
      Args:
      model: which Pytorch CNN model we are using
      train_dl: dataloader for training 
      val_dl: dataloader for validation
      criterion: loss function
      optimizer: a optimizer for training and validation
      num_epochs: how many epochs for training and validation
      is_inception: whether or not using a Inception model
    """
    start_time = time.time()
    val_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    #Iterate through every epoch
    for epoch in range(num_epochs):
        print('Epoch {}/{}:'.format(epoch, num_epochs - 1))

        #Model training
        train_loss, train_corrects = train_model(model, train_dl, criterion, optimizer, is_inception)
        epoch_loss = train_loss / len(train_dl.dataset)
        epoch_acc = train_corrects.double() / len(train_dl.dataset)
        print('Train Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

        #Model validation
        val_loss, val_corrects, metrics = eval_model(model, val_dl, criterion, optimizer)
        epoch_loss = val_loss / len(val_dl.dataset)
        epoch_acc = val_corrects.double() / len(val_dl.dataset)
        val_acc_history.append(epoch_acc)   #Track validation accuracy history
        if epoch_acc > best_acc:            #Track the best performing model in terms of validation accuracy
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
        print('Val Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        print(metrics)
        
        torch.save(model_ft.state_dict(), 'model_' + str(model_save_name) + '_epoch_' + str(epoch) + '.pth')

        print()

    end_time = time.time()
    running_time = end_time - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(running_time // 60, running_time % 60))

    #The best performing model (in terms of validation accuracy)
    print('Best Val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(best_model_wts)
    
    torch.save(model_ft.state_dict(), 'model_' + str(model_save_name) + '_final.pth')

    #Return the best model and validation accuracy history
    return model, val_acc_history

In [27]:
def train_model(model, dataloader, criterion, optimizer, is_inception=False):
    """ Helper function for train_eval_model() """

    model.train()             #Training mode
    running_loss = 0.0
    running_corrects = 0
    for inputs, labels in dataloader:
      inputs = inputs.to(device=device, dtype=torch.float)
      labels = labels.to(device=device, dtype=torch.float)
      optimizer.zero_grad()
      with torch.set_grad_enabled(True):
          if is_inception:    #If using Inception model
              outputs, aux_outputs = model(inputs)
              loss1 = criterion(outputs, labels)
              loss2 = criterion(aux_outputs, labels)
              loss = loss1 + 0.4*loss2
          else:               #If using other models
              _, labels = torch.max(labels, 1)
              outputs = model(inputs)
              loss = criterion(outputs, labels)
          _, preds = torch.max(outputs, 1)
          loss.backward()
          optimizer.step()
          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)

    #Return statistics about accuracy
    return running_loss, running_corrects

In [28]:
def eval_model(model, dataloader, criterion, optimizer):
    """ Helper function for train_eval_model() """

    model.eval()               #Validation mode
    running_loss = 0.0         #Track statistics
    running_corrects = 0
    for inputs, labels in dataloader:
      inputs = inputs.to(device=device, dtype=torch.float)
      labels = labels.to(device=device, dtype=torch.float)
      optimizer.zero_grad()
      with torch.set_grad_enabled(False):
          _, labels = torch.max(labels, 1)
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          _, preds = torch.max(outputs, 1)
          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)

          # PRECISION, RECALL, F1
          
          #if on gpu, convert to be on cpu first (or sklearn metrics.classification_report cannot handle it)
          labels = labels.to(device = 'cpu')
          preds = preds.to(device = 'cpu')

          out_metrics = metrics.classification_report(labels, preds)
    
    #Return statistics about accuracy
    return running_loss, running_corrects, out_metrics

# Test (using the case in the template file)


## Initialize a model

Set Model Parameters’ .requires_grad attribute
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This helper function sets the ``.requires_grad`` attribute of the
parameters in the model to False when we are feature extracting. By
default, when we load a pretrained model all of the parameters have
``.requires_grad=True``, which is fine if we are training from scratch
or finetuning. However, if we are feature extracting and only want to
compute gradients for the newly initialized layer then we want all of
the other parameters to not require gradients. This will make more sense
later.




In [29]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [30]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
        input_size = 224

    elif model_name == "inception":
        """ Inception v3 
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()
    
    return model_ft, input_size

## Load Data

In [31]:
#CUSTOM DATASET CLASS FOR FITZPATRICK DATASET - needed to load the data
#normalization is hard-coded in, augmentation is added as 'transforms = ' at instantiation
class FitzDataset(Dataset):
  """Fitzpatrick17k Dataset"""

  def __init__(self, csv_file, img_dir, transform=None, target_transform=None):
    """
    Args:
      csv_file (string): Path to the csv file with the metadata/labels for images.
      img_dir (string): Directory with all the images.
      transform (callable, optional): Optional transform to be applied
          on the image of a sample.
      target_transform (callable, optional): Optional transform to be
          applied on the features of the sample.
    """
    self.img_labels = pd.read_csv(csv_file, usecols=list(range(1, num_classes+3)))
    self.img_dir = img_dir
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.img_labels)

  #outputs image, label tuple, both as tensors
  def __getitem__(self, index):
    #if torch.is_tensor(index):
    #  index = index.tolist()

    #get image
    img_name = os.path.join(self.img_dir, (self.img_labels['md5hash'][index] + '.jpg'))

    """
    #NORMALIZATION 1 - normalize by image to reduce possible patterns produced from a certain subset of images (which reduces generalizability)
    image = read_image(img_name) #outputs tensor
    mean, std = torch.mean(image), torch.std(image)
    normalize_transform = T.Normalize(mean, std)
    image = normalize_transform(image)
    """

    #NORMALIZATION 2 - convert from RGB colourspace to LAB colourspace (replacing normalization)
    image = io.imread(img_name)
    image = color.rgb2lab(image)
    to_tensor_transform = T.ToTensor()
    image = to_tensor_transform(image)

    #get labels
    label = self.img_labels.iloc[index, 2:]
    label = np.array([label], dtype=np.int8)
    label = np.reshape(label, (num_classes))
    label = torch.from_numpy(label)

    #if transforms/augmentations are to be performed on the sample, this will trigger them
    if self.transform:
      image = self.transform(image)
    if self.target_transform:
      label = self.target_transform(label)
    
    return image, label

In [32]:
#CLEAN/PROCESS DATA AND INSTANTIATE DATASET
img_labels = pd.read_csv(image_labels_csv)

#Clean data: remove unncessary data
img_labels.drop(labels=['qc', 'url', 'url_alphanum'], axis = 'columns', inplace = True) #columns we don't need
img_labels.drop(img_labels.index[img_labels['fitzpatrick'] == -1].tolist(), axis = 'index', inplace = True) #missing skin tone value

#**remove duplicates**
duplicates = ['22f1d783dd6821defafcc915a8146c41', '6de74d3051ceafe10cf3f3e8c342bad8', 'dbee4a80595e78f281e1a0938f9857be', 'a7ef35e99387ff1227baced72467dc1f','6cd57e29acb9071a6c5e5aa23aeaf0ee',
              '3554761709cc4906ab9db13e5e46aa25', 'ea7c258aade6d510197d02b8d5012ba5', 'dbee4a80595e78f281e1a0938f9857be','38682083d6f7539a88c17d57559dcbd6','9b82bbff48d88f3bea9d30cfd96606dc',
              '09d46db9589ff45436cda87c4abc946b', 'bf77bafaa320f244f2331ca466b96f50', '9829c11b6a2ea0a47031a865d761a670', 'ba8e7927c71912e42cde00184b691376', '34b5d983d901815b931e28ee357ddf74',
              'bb7eaaccb79a069d59db68a3cae983cb', 'a36d079aeee1bd073859a3af6041c4f4', '8e4fcec9d635f8e9c152a23aad631eec', '0455b31fb640b89ee7375711168f318b', '0e2a24d28767bea1a4a37d1c6a4d4f31',
              'bf0403884214daf1e41bdb522df8c8a1', '8e8674abd53e4d087da3798f478edb8c', '5eebe4328896867cce5c841d7d15d765']

for img_hash in duplicates:
  index = img_labels.index[img_labels['md5hash'] == img_hash]
  img_labels.drop(index, axis = 'index')

# (NOT FOR USE BUT JUST IN CASE)
#class weights to balance skin types - the *goal* of this - are defined where the loss function is defined
#get class weights and reduce bias of a greater number of a given skin condition (used for class upweighting in loss function, but easier to calculate with unencoded data)
#if to_classify_on == 'nine cond':
#  class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(img_labels['nine_partition_label']), y=img_labels['nine_partition_label'].to_numpy())
#elif to_classify_on == 'all skin cond':
#  class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(img_labels['label']), y=img_labels['label'].to_numpy())
#else:
#  class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(img_labels['three_partition_label']), y=img_labels['three_partition_label'].to_numpy())

#Encode data with one-hot encoding (to remove possibe hierarchy/bias) and rename label new columns to be more self-explanatory
#ex. 'label' -> 'skin cond' for specific skin condition classification
#col_to_encode = ['fitzpatrick', 'label', 'nine_partition_label', 'three_partition_label']
#encoded_img_lab = pd.get_dummies(img_labels, prefix = ['fitz_skin', 'skin_cond', '9_part_cond', '3_part_cond'], columns = col_to_encode)

col_to_encode = ['label', 'nine_partition_label', 'three_partition_label']
encoded_img_lab = pd.get_dummies(img_labels, prefix = ['skin_cond', '9_part_cond', '3_part_cond'], columns = col_to_encode)

#data to choose from
#skin_tone_data = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^fitz_skin.*', axis='columns')), axis='columns') #6 skin tones (light - dark)
#skin_cond_data = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^skin_cond.*', axis='columns')), axis='columns') #over 100 skin conditions to classify
#nine_cond_data = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^9_part_cond.*', axis='columns')), axis='columns') #9 skin condition categories to classify
#three_cond_data = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^3_part_cond.*', axis='columns')), axis='columns') #3 skin condition categories to classify

#set to data of choice - defaults to three condition classification
if to_classify_on == 'nine cond':
  encoded_img_lab = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^9_part_cond.*', axis='columns')), axis='columns') #9 skin condition categories to classify
elif to_classify_on == 'all skin cond':
  encoded_img_lab = skin_cond_data = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^skin_cond.*', axis='columns')), axis='columns') #over 100 skin conditions to classify
#elif to_classify_on == 'skin tone':
#  encoded_img_lab = three_cond_data
else:
  encoded_img_lab = pd.concat((encoded_img_lab['md5hash'], encoded_img_lab['fitzpatrick'], encoded_img_lab.filter(regex='^3_part_cond.*', axis='columns')), axis='columns') #3 skin condition categories to classify


encoded_img_lab.to_csv(os.path.join(helper_file_folder, "encoded_fitz17k.csv"))
fitz_dataset = FitzDataset(csv_file = os.path.join(helper_file_folder, "encoded_fitz17k.csv"), 
                           img_dir = image_dir, 
                           transform = T.Compose([T.Resize(size = (400, 600)),
                                                  #data augmentations
                                                  T.RandomHorizontalFlip(p = 0.5),
                                                  T.RandomVerticalFlip(p = 0.5),
                                                  T.CenterCrop(size = (int(400*0.8) if np.random.uniform(0,1) < 0.114 else 400, int(600*0.8) if np.random.uniform(0,1) < 0.114 else 600))]))  # center crop with prob around 0.2 for both sides

In [33]:
data_to_classify = pd.read_csv(os.path.join(helper_file_folder, "encoded_fitz17k.csv"), usecols=list(range(1, num_classes+3)))

#GET SUBDATASETS FOR EACH SKIN CONDITION
#indices for each skin condition
skin_1 = data_to_classify.index[data_to_classify['fitzpatrick'] == 1].tolist()
skin_2 = data_to_classify.index[data_to_classify['fitzpatrick'] == 2].tolist()
skin_3 = data_to_classify.index[data_to_classify['fitzpatrick'] == 3].tolist()
skin_4 = data_to_classify.index[data_to_classify['fitzpatrick'] == 4].tolist()
skin_5 = data_to_classify.index[data_to_classify['fitzpatrick'] == 5].tolist()
skin_6 = data_to_classify.index[data_to_classify['fitzpatrick'] == 6].tolist()

#get train/test splits and create dataloaders for each
#roughly a 70/20/10 train/val/test split
if dataset_bias == 'balanced':
  #FOR A BALANCED DATASET - should have an equal number of each Fitzpatrick skin type
  #NOTE: not necessarily a balanced number of each skin type across skin types
  #NOTE: cuts dataset from about N=16,000 to about N=3,800 (lots of downsampling, which is not ideal)

  #skin type 6 has the fewest samples, so it will limit the size
  train_size = int(len(skin_6) * 0.7)
  val_size = int(len(skin_6) * 0.2)
  test_size = int(len(skin_6) * 0.1)

  #skin_6 has the fewest samples, so randomly downsample all other skin types to be the same size - should another skin tone have the fewest samples, this will need to be adapted
  #the lists returned will be the indexes used to get the subsets of the original dataset
  #NOTE: random produces pseudorandom results, but the lists returned should be different each time unless we control the seed
  bal_skin_1 = random.choices(skin_1, k=len(skin_6))
  bal_skin_2 = random.choices(skin_2, k=len(skin_6))
  bal_skin_3 = random.choices(skin_3, k=len(skin_6))
  bal_skin_4 = random.choices(skin_4, k=len(skin_6))
  bal_skin_5 = random.choices(skin_5, k=len(skin_6))
  bal_skin_6 = skin_6

  random.shuffle(bal_skin_1)
  random.shuffle(bal_skin_2)
  random.shuffle(bal_skin_3)
  random.shuffle(bal_skin_4)
  random.shuffle(bal_skin_5)
  random.shuffle(bal_skin_6)

  train_skin_1 = bal_skin_1[:train_size]
  train_skin_2 = bal_skin_2[:train_size]
  train_skin_3 = bal_skin_3[:train_size]
  train_skin_4 = bal_skin_4[:train_size]
  train_skin_5 = bal_skin_5[:train_size]
  train_skin_6 = bal_skin_6[:train_size]

  val_skin_1 = bal_skin_1[train_size:train_size+val_size]
  val_skin_2 = bal_skin_2[train_size:train_size+val_size]
  val_skin_3 = bal_skin_3[train_size:train_size+val_size]
  val_skin_4 = bal_skin_4[train_size:train_size+val_size]
  val_skin_5 = bal_skin_5[train_size:train_size+val_size]
  val_skin_6 = bal_skin_6[train_size:train_size+val_size]
    
  test_skin_1 = bal_skin_1[train_size+val_size:]
  test_skin_2 = bal_skin_2[train_size+val_size:]
  test_skin_3 = bal_skin_3[train_size+val_size:]
  test_skin_4 = bal_skin_4[train_size+val_size:]
  test_skin_5 = bal_skin_5[train_size+val_size:]
  test_skin_6 = bal_skin_6[train_size+val_size:]

  train_list = [*train_skin_1, *train_skin_2, *train_skin_3, *train_skin_4, *train_skin_5, *train_skin_6]
  val_list = [*val_skin_1, *val_skin_2, *val_skin_3, *val_skin_4, *val_skin_5, *val_skin_6]
  test_list = [*test_skin_1, *test_skin_2, *test_skin_3, *test_skin_4, *test_skin_5, *test_skin_6]

  bal_train_data = torch.utils.data.Subset(fitz_dataset, train_list)
  bal_val_data = torch.utils.data.Subset(fitz_dataset, val_list)
  bal_test_data = torch.utils.data.Subset(fitz_dataset, test_list)

  train_dl = DataLoader(bal_train_data, batch_size=64, shuffle=True)
  val_dl = DataLoader(bal_val_data, batch_size=64, shuffle=True)
  test_dl = DataLoader(bal_test_data, batch_size=64, shuffle=True)
elif dataset_bias == 'small':
  small_data = random.choices(fitz_dataset, k=4000)
  train_size = int(len(small_data)*0.7)
  val_size = int(len(small_data)*0.2)
  test_size = int(len(small_data)*0.1)

  small_train_data, small_val_data, small_test_data = torch.utils.data.random_split(small_data, [train_size, val_size, test_size])

  train_dl = DataLoader(small_train_data, batch_size=64, shuffle=True)
  val_dl = DataLoader(small_val_data, batch_size=64, shuffle=True)
  test_dl = DataLoader(small_test_data, batch_size=64, shuffle=True)
else:
  #dataset taken as a whole is biased across skin types (more light skin type samples), so skewed dataset is the whole thing
  train_size = int(len(fitz_dataset)*0.7)
  val_size = int(len(fitz_dataset)*0.2)
  test_size = int(len(fitz_dataset) - val_size - train_size)

  skew_train_data, skew_val_data, skew_test_data = torch.utils.data.random_split(fitz_dataset, [train_size, val_size, test_size])

  train_dl = DataLoader(skew_train_data, batch_size=64, shuffle=True)
  val_dl = DataLoader(skew_val_data, batch_size=64, shuffle=True)
  test_dl = DataLoader(skew_test_data, batch_size=64, shuffle=True)
    
    

Create the Optimizer
--------------------



In [34]:
# Initialize the model for this run
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Params to learn:
	 fc.weight
	 fc.bias


In [35]:
#class CustomCrossEntropyLoss(_WeightedLoss):
#    def __init__(self, weight: Optional[Tensor] = None) -> None:
#        super(CustomCrossEntropyLoss, self).__init__(weight)
#        
#    def custom_cross_entropy(input, target, weight=self.weight):
# 
#
#   def forward(self, input: Tensor, target: Tensor) -> Tensor:
#        return F.cross_entropy(input, target, weight=self.weight)

Run Training and Validation Step
--------------------------------




In [None]:
# Send the model to GPU
model_ft = model_ft.to(device)

# Setup the loss fxn
#class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(data_to_classify['fitzpatrick']), y=data_to_classify['fitzpatrick'].to_numpy())
#class_weights = torch.tensor(class_weights,dtype=torch.float) #needs to be tensor to be passed to torch.nn.CrossEntropyLoss
#criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_eval_model("skew_noupw", model_ft, train_dl, val_dl, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

num_correct = test_model(model_ft, test_dl)
print("Accuracy on the test set:", str(100 * num_correct / len(test_datasets[i])))



Epoch 0/14:
Train Loss: 0.7343 Acc: 0.7314
Val Loss: 0.7088 Acc: 0.7311
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       2.0
           2       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0


Epoch 1/14:
Train Loss: 0.6924 Acc: 0.7415
Val Loss: 0.6842 Acc: 0.7374
              precision    recall  f1-score   support

           2       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Epoch 2/14:
Train Loss: 0.6655 Acc: 0.7468
Val Loss: 0.6864 Acc: 0.7395
              precision    recall  f1-score   support

           2       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg      

NameError: name 'test_model' is not defined

In [None]:
#FOR A BALANCED DATASET - should have an equal number of each Fitzpatrick skin type
#NOTE: not necessarily a balanced number of each skin type across skin types
#NOTE: cuts dataset from about N=16,000 to about N=3,800 (lots of downsampling, which is not ideal)

#skin type 6 has the fewest samples, so it will limit the size
train_size = int(len(skin_6) * 0.7)
val_size = int(len(skin_6) * 0.2)
test_size = int(len(skin_6) - test_size - val_size)

#skin_6 has the fewest samples, so randomly downsample all other skin types to be the same size - should another skin tone have the fewest samples, this will need to be adapted
#the lists returned will be the indexes used to get the subsets of the original dataset
#NOTE: random produces pseudorandom results, but the lists returned should be different each time unless we control the seed
bal_skin_1 = random.choices(skin_1, k=len(skin_6))
bal_skin_2 = random.choices(skin_2, k=len(skin_6))
bal_skin_3 = random.choices(skin_3, k=len(skin_6))
bal_skin_4 = random.choices(skin_4, k=len(skin_6))
bal_skin_5 = random.choices(skin_5, k=len(skin_6))
bal_skin_6 = skin_6

random.shuffle(bal_skin_1)
random.shuffle(bal_skin_2)
random.shuffle(bal_skin_3)
random.shuffle(bal_skin_4)
random.shuffle(bal_skin_5)
random.shuffle(bal_skin_6)

train_skin_1 = bal_skin_1[:train_size]
train_skin_2 = bal_skin_2[:train_size]
train_skin_3 = bal_skin_3[:train_size]
train_skin_4 = bal_skin_4[:train_size]
train_skin_5 = bal_skin_5[:train_size]
train_skin_6 = bal_skin_6[:train_size]

val_skin_1 = bal_skin_1[train_size:train_size+val_size]
val_skin_2 = bal_skin_2[train_size:train_size+val_size]
val_skin_3 = bal_skin_3[train_size:train_size+val_size]
val_skin_4 = bal_skin_4[train_size:train_size+val_size]
val_skin_5 = bal_skin_5[train_size:train_size+val_size]
val_skin_6 = bal_skin_6[train_size:train_size+val_size]

test_skin_1 = bal_skin_1[train_size+val_size:]
test_skin_2 = bal_skin_2[train_size+val_size:]
test_skin_3 = bal_skin_3[train_size+val_size:]
test_skin_4 = bal_skin_4[train_size+val_size:]
test_skin_5 = bal_skin_5[train_size+val_size:]
test_skin_6 = bal_skin_6[train_size+val_size:]

train_list = [*train_skin_1, *train_skin_2, *train_skin_3, *train_skin_4, *train_skin_5, *train_skin_6]
val_list = [*val_skin_1, *val_skin_2, *val_skin_3, *val_skin_4, *val_skin_5, *val_skin_6]
test_list = [*test_skin_1, *test_skin_2, *test_skin_3, *test_skin_4, *test_skin_5, *test_skin_6]

bal_train_data = torch.utils.data.Subset(fitz_dataset, train_list)
bal_val_data = torch.utils.data.Subset(fitz_dataset, val_list)
bal_test_data = torch.utils.data.Subset(fitz_dataset, test_list)

train_dl2 = DataLoader(bal_train_data, batch_size=64, shuffle=True)
val_dl2 = DataLoader(bal_val_data, batch_size=64, shuffle=True)
test_dl2 = DataLoader(bal_test_data, batch_size=64, shuffle=True)

# Initialize the model for this run
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_ft2, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft2 = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Send the model to GPU
model_ft2 = model_ft2.to(device)

# Setup the loss fxn
#class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(data_to_classify['fitzpatrick']), y=data_to_classify['fitzpatrick'].to_numpy())
class_weights = torch.tensor(class_weights,dtype=torch.float) #needs to be tensor to be passed to torch.nn.CrossEntropyLoss
criterion2 = nn.CrossEntropyLoss(weight=class_weights).to(device)

# Train and evaluate
model_ft2, hist = train_eval_model("bal_upw", model_ft2, train_dl2, val_dl2, criterion2, optimizer_ft2, num_epochs=num_epochs, is_inception=(model_name=="inception"))

num_correct = test_model(model_ft2, test_dl2)
print("Accuracy on the test set:", str(100 * num_correct / len(test_datasets[i])))


In [40]:
data_to_classify = pd.read_csv(os.path.join(helper_file_folder, "encoded_fitz17k.csv"), usecols=list(range(1, num_classes+3)))

#TEST SETS
#by skin tone
skin_1 = data_to_classify.index[data_to_classify['fitzpatrick'] == 1].tolist()
skin_2 = data_to_classify.index[data_to_classify['fitzpatrick'] == 2].tolist()
skin_3 = data_to_classify.index[data_to_classify['fitzpatrick'] == 3].tolist()
skin_4 = data_to_classify.index[data_to_classify['fitzpatrick'] == 4].tolist()
skin_5 = data_to_classify.index[data_to_classify['fitzpatrick'] == 5].tolist()
skin_6 = data_to_classify.index[data_to_classify['fitzpatrick'] == 6].tolist()

#random.shuffle(bal_skin_1)
#random.shuffle(bal_skin_2)
#random.shuffle(bal_skin_3)
#random.shuffle(bal_skin_4)
#random.shuffle(bal_skin_5)
#random.shuffle(bal_skin_6)

# take subsets of these to form test set
skin1_test_data = torch.utils.data.Subset(fitz_dataset, skin_1)
skin2_test_data = torch.utils.data.Subset(fitz_dataset, skin_2)
skin3_test_data = torch.utils.data.Subset(fitz_dataset, skin_3)
skin4_test_data = torch.utils.data.Subset(fitz_dataset, skin_4)
skin5_test_data = torch.utils.data.Subset(fitz_dataset, skin_5)
skin6_test_data = torch.utils.data.Subset(fitz_dataset, skin_6)

skin1_test_dl = DataLoader(skin1_test_data, batch_size=64, shuffle=True)
skin2_test_dl = DataLoader(skin2_test_data, batch_size=64, shuffle=True)
skin3_test_dl = DataLoader(skin3_test_data, batch_size=64, shuffle=True)
skin4_test_dl = DataLoader(skin4_test_data, batch_size=64, shuffle=True)
skin5_test_dl = DataLoader(skin5_test_data, batch_size=64, shuffle=True)
skin6_test_dl = DataLoader(skin6_test_data, batch_size=64, shuffle=True)

In [41]:
def test_model(model, dataloader):
    #model.eval()               #Validation mode
    running_corrects = 0
    for inputs, labels in dataloader:
      inputs = inputs.to(device=device, dtype=torch.float)
      labels = labels.to(device=device, dtype=torch.float)
      with torch.set_grad_enabled(False):
          _, labels = torch.max(labels, 1)
          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)
          running_corrects += torch.sum(preds == labels.data)

    #Return statistics about accuracy
    return running_corrects

In [43]:
#num_correct = test_model(model_ft, test_dl)
#print("Accuracy on the test set:", str(100 * num_correct / len(skew_test_data)))

test_dataloaders = [skin1_test_dl, skin2_test_dl, skin3_test_dl, skin4_test_dl, skin5_test_dl, skin6_test_dl]
test_datasets = [skin1_test_data, skin2_test_data, skin3_test_data, skin4_test_data, skin5_test_data, skin6_test_data]

for i in range(0, 6):
    num_correct = test_model(model_ft, test_dataloaders[i])
    print("Accuracy on the skin type", str(i + 1), "test set:", str(100 * num_correct / len(test_datasets[i])))

Accuracy on the skin type 1 test set: tensor(73.8717, device='cuda:0')
Accuracy on the skin type 2 test set: tensor(73.5441, device='cuda:0')
Accuracy on the skin type 3 test set: tensor(74.6977, device='cuda:0')
Accuracy on the skin type 4 test set: tensor(77.1665, device='cuda:0')
Accuracy on the skin type 5 test set: tensor(80.8872, device='cuda:0')
Accuracy on the skin type 6 test set: tensor(83.7795, device='cuda:0')


In [None]:
#(RE)LOAD MODEL

test_dataloaders = [skin1_test_dl, skin2_test_dl, skin3_test_dl, skin4_test_dl, skin5_test_dl, skin6_test_dl]
test_datasets = [skin1_test_data, skin2_test_data, skin3_test_data, skin4_test_data, skin5_test_data, skin6_test_data]

print('Balanced data: model_bal_epoch_14.pth')
model1, input_size_ = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
model1.to(device)
model1.load_state_dict(torch.load('model_bal_epoch_14.pth'))
model1.eval()

for i in range(0, 6):
    num_correct = test_model(model1, test_dataloaders[i])
    print("Accuracy on the skin type", str(i + 1), "test set:", str(100 * num_correct / len(test_datasets[i])))

"""
print('Skewed data: model_epoch4.pth')
model1, input_size_ = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
model1.to(device)
model1.load_state_dict(torch.load('model_epoch4.pth'))
model1.eval()

for i in range(0, 6):
    num_correct = test_model(model1, test_dataloaders[i])
    print("Accuracy on the skin type", str(i + 1), "test set:", str(100 * num_correct / len(test_datasets[i])))
"""