In [1]:
from tensorflow.keras.optimizers import SGD
import pandas as pd
from google.colab import drive
from pydrive.drive import GoogleDrive
import os
import shutil
import random
from matplotlib import pyplot
from collections import Counter
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Flatten
from keras.models import  Model
from math import ceil
import numpy as np
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
from sklearn.metrics import roc_curve, auc, confusion_matrix
from keras.callbacks import EarlyStopping, LearningRateScheduler, Callback
from keras.applications.resnet import ResNet50, preprocess_input

#mount drive and unzip folder containing images
drive.mount('/content/gdrive')
!unzip gdrive/My\ Drive/archive.zip > /dev/null
print("Total Images: {}".format(len([name for name in os.listdir('ISIC_2019_Training_Input/ISIC_2019_Training_Input')])))

Mounted at /content/gdrive
Total Images: 25333


In [2]:
#create dataframe for each class melanoma, basal cell carcinoma, seborrheic keratosis
def OrganizeImages():
  df_gt = pd.read_csv ('ISIC_2019_Training_GroundTruth.csv')
  df_meta = pd.read_csv ('ISIC_2019_Training_Metadata.csv')

  df_mel = df_gt.loc[(df_gt['MEL'] == 1)]
  df_bcc = df_gt.loc[(df_gt['BCC'] == 1)]
  df_bkl = df_gt.loc[(df_gt['BKL'] == 1)]

  return df_mel, df_bcc, df_bkl

In [3]:
#only extract images coresponding to melanoma, basal cell carcinoma, seborrheic keratosis and move them into corresponding folders
def ExtractImages(df, name):
  dir = name + "/" + name
  os.mkdir(name)
  os.mkdir(dir)

  for index, row in df.iterrows():
    image = "ISIC_2019_Training_Input/ISIC_2019_Training_Input/{}.jpg".format(row['image'])
    shutil.move(image, dir)

  print("Total " + name + " images: {}".format(len([name for name in os.listdir(dir)])))

In [4]:
#splitting data into train, validation, testing and move them into corresponding folders
def SplitData(name, training, validation, test):
  dir = name + "/" + name
  length = len([name for name in os.listdir(dir)])
  train_size = round(length * training)
  validation_size = round(length * validation)
  test_size = round(length * test)

  try:
    os.mkdir('train/' + name)
    os.mkdir('validation/' + name)
    os.mkdir('test/' + name)
  except OSError as e:
    print("Error: {}.\nContinuing...".format(e.strerror))

  end_index = 0
  names = [name for name in os.listdir(dir)]
  for i in range(train_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, 'train/' + name)
    end_index = i
  
  for i in range(end_index + 1, end_index + validation_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, 'validation/' + name)
    end_index = i

  for i in range(end_index + 1, end_index + test_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, 'test/' + name)

  print_str = name + ":\n\tTrain Size: {}\n\tValidation Size: {}\n\tTest Size: {}\n"
  print(print_str.format(len([name for name in os.listdir('train/' + name)]), len([name for name in os.listdir('validation/' + name)]), len([name for name in os.listdir('test/' + name)])))

In [5]:
#balance classes
def ClassBalance(train):
  counter = Counter(train.classes)
  max_val = float(max(counter.values()))
  class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}

In [45]:
train_arg_params = ImageDataGenerator(
    featurewise_center = True,
    samplewise_center = False,
    featurewise_std_normalization = False,
    samplewise_std_normalization = False,
    zca_whitening = False,
    zca_epsilon = 1e-06,
    rotation_range = 90,
    width_shift_range = [-30, 30],
    height_shift_range = [-30, 30],
    brightness_range = None,
    shear_range = 0.0,
    zoom_range = 0.0,
    channel_shift_range = 0.0,
    fill_mode = 'nearest',
    cval = 0.0,
    horizontal_flip = True,
    vertical_flip = True,
    rescale = None,
    preprocessing_function = preprocess_input,
    data_format = None,
    validation_split = 0.0,
    dtype = None
)

test_arg_params = ImageDataGenerator(preprocessing_function=preprocess_input)

In [7]:
def ShowExampleArg(name, count):
  #if folder exits (already example images), delete and create new set
  try:
    os.mkdir(name + "/ExampleArg/")
    os.mkdir(name + "/ExampleArg/" + name)
  except OSError as e:
    print("Deleting dir {}".format(name + "/ExampleArg"))
    shutil.rmtree(name + "/ExampleArg/")
    print("Creating dir {}".format(name + "/ExampleArg/" + name))
    os.mkdir(name + "/ExampleArg/")
    os.mkdir(name + "/ExampleArg/" + name)

  dir = "train/" + name
  length = len([name for name in os.listdir(dir)])
  names = [name for name in os.listdir(dir)]

  #find a random integer that won't cause out of index error
  rand_int = random.randint(0, length)
  while (rand_int + count > length):
    rand_int = random.randint(0, length)

  #moves example images to different folder
  for i in range(rand_int, rand_int + count):
    image = dir + "/{}".format(names[i])
    target = name + "/ExampleArg/" + name + "/" + str(names[i])
    shutil.copyfile(image, target)

  #start argumentation
  examples = train_arg_params.flow_from_directory(name + '/ExampleArg', target_size = inputSize[0:2])

  #display
  sub_plots = list(range(1, (count * 4) + 1))
  fig = pyplot.figure(figsize=(50, 50))

  for img in range(0, count * 4):
    x,y = examples.next()
    image = x[0]
    sub = fig.add_subplot(8,8, sub_plots[img])
    sub.axis('off')
    sub.imshow(image)
  pyplot.show()

In [8]:
def CreateDataForModel():
  train = train_arg_params.flow_from_directory("train",
                                batch_size=6,
                                target_size = inputSize[0:2],
                                class_mode="categorical")

  validation = test_arg_params.flow_from_directory("validation",
                                  batch_size=6,
                                  target_size = inputSize[0:2],
                                  class_mode="categorical")

  test = test_arg_params.flow_from_directory("test",
                                  target_size = inputSize[0:2],
                                  class_mode="categorical")
  
  return train, validation, test

In [9]:
def BuildModel(train, lr, mom, factor):
  resnet = ResNet50(include_top = False, pooling = 'avg', weights = 'imagenet', input_shape = (224,224,3))
  x = resnet.output

  #change final layer to 3
  fc8 = Dense(train.num_classes, activation='softmax', name = 'fc8')(x)
  model = Model(inputs=resnet.input, outputs=fc8)

  optimizer = SGD(lr = lr, momentum = mom, nesterov = False)
  model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

  #print(model.summary())
  return model

In [10]:
def VariableLR(epoch, lr):
  factor =  0.1
  step = 5
  if epoch % step == 0 and epoch:
      return lr * factor
  return lr

In [29]:
#plot loss v accruacy
def PlotLossAcc(history):
  fig = pyplot.figure(figsize=(50, 50))
  sub = fig.add_subplot(8, 8, 1)
  sub.plot(history.history["loss"], label = "Train loss")
  sub.plot(history.history["val_loss"], label = "Val Loss")
  sub.title.set_text("Train Loss v Val Loss")
  sub.legend()

  sub2 = fig.add_subplot(8, 8, 2)
  sub2.plot(history.history["accuracy"], label = "Train Acc.")
  sub2.plot(history.history["val_accuracy"], label = "Val Acc.")
  sub2.title.set_text("Train Acc. v Val Acc.")
  sub2.legend()

In [12]:
def TestModel(test, model):
  x_test = []
  y_test = []
  for i in range(ceil(test.n/test.batch_size)):
    x_iter, y_iter = test.next()
    x_test.extend(x_iter)
    y_test.extend(y_iter)

  x_test = np.array(x_test) 
  y_test = np.array(y_test)

  y_pred = model.predict(x_test)
  vector = np.argmax(y_test, 1)

  return x_test, y_test, y_pred, vector

In [13]:
def ShowExampleTestPred(test, x_test, y_pred, vector, count):
  sub_plots = list(range(1, (count) + 1))
  fig = pyplot.figure(figsize=(50, 50))
  labels = list(test.class_indices.keys())

  for i in range(0, count):
    random_image = np.random.randint(test.n)
    image = x_test[random_image]
    sub = fig.add_subplot(8, 8, sub_plots[i])
    sub.title.set_text('Ground truth = '+ labels[int(vector[random_image])] + ';  Prediction = ' + labels[np.argmax(y_pred[random_image])])
    sub.axis('off')
    sub.imshow(image)

  pyplot.show()

In [None]:
#organize and extract melanoma, basal cell carcinoma, seborrheic keratosis into seperate folders 
df_mel, df_bcc, df_bkl = OrganizeImages()
ExtractImages(df_mel, 'MEL')
ExtractImages(df_bcc, 'BCC')
ExtractImages(df_bkl, 'BKL')

#split data into training, validatin, testing
training = 0.7
validation = 0.15
test = 0.15
inputSize= [224, 224, 3]

os.mkdir('train')
os.mkdir('validation')
os.mkdir('test')
SplitData('MEL', training, validation, test)
SplitData('BCC', training, validation, test)
SplitData('BKL', training, validation, test)

In [None]:
ShowExampleArg('MEL', 1)

In [None]:
#create frames for the training, validation, and testing for each
train, validation, test = CreateDataForModel()

In [None]:
lr = 0.0001
mom = 0.9
factor = 10

weights = ClassBalance(train)
model = BuildModel(train, lr, mom, factor)

In [None]:
#train model
variable_lr = LearningRateScheduler(VariableLR, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=5)
history = model.fit(train, steps_per_epoch = ceil(train.n / 6), epochs = 50, shuffle = True,
                                    validation_data=validation, validation_steps = ceil(validation.n / 6),
                                    callbacks = [early_stop, variable_lr], verbose = 1, class_weight = weights)

In [None]:
PlotLossAcc(history)

In [None]:
x_test, y_test, y_pred, vector = TestModel(test, model)

test_acc = accuracy_score(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis = 1))
print('Accuracy: {}\n'.format(round(test_acc, 3)))

c_matrix = confusion_matrix(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis = 1))
disp = ConfusionMatrixDisplay(confusion_matrix = c_matrix, display_labels = train.class_indices.keys())
disp.plot(cmap ='YlGnBu')
disp.figure_.set_size_inches(5, 5)
pyplot.show()

In [None]:
ShowExampleTestPred(test, x_test, y_pred, vector, 4)