# Melanoma detection in dermatoscopic images using contextual information and Convolutional Neural Networks.
By Brenda Farinha Fernandes

November 2022

## Setup

Required libraries

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import zipfile
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split 
from matplotlib import pyplot as plt

import tensorflow as tf
print('Version of Tensorflow used : ', tf.__version__)

Mounted at /content/drive
Version of Tensorflow used :  2.11.0


GPU Information

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)
  print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

strategy = tf.distribute.get_strategy()
REPLICAS = strategy.num_replicas_in_sync
print('Number of replicas:', REPLICAS)

Mon Feb 27 14:58:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P0    27W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

RAM Information

In [3]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


Global variables

In [4]:
PATH_dataset_images_zip = '/content/drive/MyDrive/PFG_MELANOMA/ISIC_2020_Training_JPEG.zip'
PATH_dataset_images = '/content/drive/MyDrive/PFG_MELANOMA/images'
PATH_dataset_csv = '/content/drive/MyDrive/PFG_MELANOMA/ISIC_2020_Training_GroundTruth.csv'

In [5]:
IMG_HEIGHT = 124
IMG_WIDTH = 124

In [6]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 64

## Data preproccesing

Read [SIIM-ISIC 2020 Melanoma Classification Challenge Dataset](https://doi.org/10.34970/2020-ds01) 

In [7]:
def read_dataset(dataset_unzip = False): 

  # Unzip images from "/content/drive/MyDrive/PFG_MELANOMA/ISIC_2020_Training_JPEG.zip"
  if dataset_unzip: 
    zipfile.ZipFile(PATH_dataset_images_zip).extractall(PATH_dataset_images)
  
  df = pd.DataFrame(pd.read_csv(f'{PATH_dataset_csv}'))

  df['sex'] = df['sex'].fillna('unknown')
  df['age_approx'] = df['age_approx'].fillna('unknown')
  df['anatom_site_general_challenge'] = df['anatom_site_general_challenge'].fillna('unknown')

  df = df.drop(columns=['diagnosis', 'benign_malignant'])

  train_df, val_df = train_test_split(df, random_state=42, shuffle=True, stratify=df['target'])

  return train_df, val_df

In [8]:
def decode_image(image_path, metadata, label):

  image = tf.io.read_file(image_path)
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, [IMG_HEIGHT, IMG_WIDTH])
  image = tf.cast(image, tf.float32) / 255.0

  return image, metadata, label

In [9]:
def image_augmentation(image, metadata, label): 

  image = tf.image.random_flip_left_right(image)
  image = tf.image.random_flip_up_down(image)
  image = tf.image.random_hue(image, 0.025)
  image = tf.image.random_saturation(image, 0.6, 1.4)
  image = tf.image.random_contrast(image, 0.7, 1.4)
  image = tf.image.random_brightness(image, 0.1)

  return (image, metadata), label

Load dataset: 
* `map()`: This transformation applies map_func to each element of this dataset, and returns a new dataset containing the transformed elements

* `repeat()`: Repeats this dataset so each original value is seen count times.

* `shuffle()`: Randomly shuffles the elements of this dataset.

* `batch()`: Combines consecutive elements of this dataset into batches.

* `prefetch()` : Most dataset input pipelines should end with a call to prefetch. This allows later elements to be prepared while the current element is being processed. prefetch( ) doesn’t allow CPU stand idle. When model is training prefetch continue prepare data while GPU is busy.

* `cache()` : The first time the dataset is iterated over, its elements will be cached either in the specified file or in memory. Subsequent iterations will use the cached data. When caching to a file, the cached data will persist across runs. Even the first iteration through the data will read from the cache file.



In [10]:
def load_dataset(df, deterministic = False, augment = False, cache = False): 

  labels = df["target"].values
  images = f'{PATH_dataset_images}/' + df["image_name"].values +".jpg"

  metadata_df = df.drop(columns=['image_name', 'patient_id', 'target'])
  metadata_df = pd.get_dummies(metadata_df, columns = ["sex", "age_approx", "anatom_site_general_challenge"])
  metadata_df.shape

  metadata = metadata_df.to_numpy()

  ds = tf.data.Dataset.from_tensor_slices((images, metadata, labels))

  if not deterministic:
    options = tf.data.Options()
    options.experimental_deterministic = False
    ds = ds.with_options(options)

  ds = ds.map(decode_image, num_parallel_calls=AUTOTUNE)

  if augment: 
    ds = ds.map(image_augmentation, num_parallel_calls=AUTOTUNE)
    ds = ds.repeat()
    ds.shuffle(buffer_size = 2048)
  
  ds = ds.batch(BATCH_SIZE)

  if cache:
    ds = ds.cache()

  ds = ds.prefetch(AUTOTUNE)

  return ds

Show images

In [11]:
def show_images(ds):

  row, col = 3,5
  fig = plt.figure(figsize=(2*col, 2*row))
  for _row in range(row):
    for _col in range(col):
      plt.subplot(row, col, _row*col + _col +1)
      for images, labels in ds.take(1):
        img = images[0].numpy()
        plt.imshow(img)
        plt.title(labels[0].numpy())
        plt.axis(False)
  plt.show()

## Model

In [12]:
train_df, val_df = read_dataset(dataset_unzip = False)

In [13]:
metadata_df = train_df.drop(columns=['image_name', 'patient_id', 'target'])
metadata_df = pd.get_dummies(metadata_df, columns = ["sex", "age_approx", "anatom_site_general_challenge"])
metadata_df.shape

(24843, 29)

In [14]:
metadata_df.head()

Unnamed: 0,sex_female,sex_male,sex_unknown,age_approx_0.0,age_approx_10.0,age_approx_15.0,age_approx_20.0,age_approx_25.0,age_approx_30.0,age_approx_35.0,...,age_approx_85.0,age_approx_90.0,age_approx_unknown,anatom_site_general_challenge_head/neck,anatom_site_general_challenge_lower extremity,anatom_site_general_challenge_oral/genital,anatom_site_general_challenge_palms/soles,anatom_site_general_challenge_torso,anatom_site_general_challenge_unknown,anatom_site_general_challenge_upper extremity
7560,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
29505,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
11069,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
13032,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
9952,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


Distribution dataset

In [15]:
train_total = len(train_df)
train_malignant = len(train_df[train_df["target"] == 1])
train_benign = len(train_df[train_df["target"] == 0])

val_total = len(val_df)
val_malignant = len(val_df[val_df["target"] == 1])
val_benign = len(val_df[val_df["target"] == 0])

print("Number of training files =", train_total)
print("\tNumber of malignant training files =", train_malignant)
print("\tNumber of benign training files =", train_benign)

print("\nNumber of validation files =", val_total)
print("\tNumber of malignant validation files =", val_malignant)
print("\tNumber of benign validation files =", val_benign)

Number of training files = 24843
	Number of malignant training files = 437
	Number of benign training files = 24406

Number of validation files = 8282
	Number of malignant validation files = 146
	Number of benign validation files = 8136


In [16]:
STEPS_PER_EPOCH_TRAIN = train_total // BATCH_SIZE
STEPS_PER_EPOCH_VAL = val_total // BATCH_SIZE

EPOCHS = 15
print("Number of steps per epoch in training:", STEPS_PER_EPOCH_TRAIN)
print("Number of steps per epoch in validation:", STEPS_PER_EPOCH_VAL)

Number of steps per epoch in training: 388
Number of steps per epoch in validation: 129


In [17]:
train_ds = load_dataset(train_df, deterministic = False, augment = True, cache = False)
val_ds = load_dataset(val_df, deterministic = False, augment = False, cache = True)

Class weights in training dataset

In [18]:
weight_malignant = (train_total/train_malignant)/2.0
weight_benign = (train_total/train_benign)/2.0

class_weight = {0: weight_benign, 1: weight_malignant}

print("Weight for benign cases = ", class_weight[0])
print("Weight for malignant cases = ", class_weight[1])

Weight for benign cases =  0.5089527165451119
Weight for malignant cases =  28.424485125858123


Defining Callbacks

*   `EarlyStopping`: Stop training when a monitored metric has stopped improving.
*   `ModelCheckpoint`: Callback to save the Keras model or model weights at some frequency.



In [19]:
callback_early_stopping = tf.keras.callbacks.EarlyStopping(
    patience = 15, 
    verbose = 0, 
    restore_best_weights = True)

callbacks_lr_reduce = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = "val_auc", 
    factor = 0.1, 
    patience = 10,
    verbose = 0, 
    min_lr = 1e-6)

callback_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "melanoma_detection_weights.h5",
    save_weights_only=True,
    monitor='val_auc',
    mode='max',
    save_best_only=True)

In [26]:
with strategy.scope():

  images = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="image")
  dummy = tf.keras.layers.Lambda(lambda x:x)(images)

  encoder = tf.keras.applications.EfficientNetB6(
      input_shape = (IMG_HEIGHT, IMG_WIDTH, 3),
      include_top = False,
      weights = 'imagenet'
  )(dummy)
  encoder.trainable = False

  x = tf.keras.layers.GlobalAveragePooling2D()(encoder)
  #outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

  metadata = tf.keras.layers.Input(shape=(29,), name="metadata")
  y = tf.keras.layers.Dense(100, activation="relu")(metadata)
  y = tf.keras.layers.Dense(3, activation="relu")(y)

  z = tf.keras.layers.concatenate([x, y])
  outputs = tf.keras.layers.Dense(1, activation="sigmoid")(z)

  model = tf.keras.Model(inputs=[images, metadata], outputs=outputs, name='aNetwork')
  model.summary()

  model.compile(
      optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3),
      loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = 0.05),
      metrics = [tf.keras.metrics.AUC(name='auc')]
  )

Model: "aNetwork"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image (InputLayer)             [(None, 124, 124, 3  0           []                               
                                )]                                                                
                                                                                                  
 lambda_2 (Lambda)              (None, 124, 124, 3)  0           ['image[0][0]']                  
                                                                                                  
 metadata (InputLayer)          [(None, 29)]         0           []                               
                                                                                                  
 efficientnetb6 (Functional)    (None, 4, 4, 2304)   40960143    ['lambda_2[0][0]']        

In [None]:
history_B6 = model.fit(
    train_ds, 
    verbose=1, 
    steps_per_epoch=STEPS_PER_EPOCH_TRAIN, 
    validation_data = val_ds, validation_steps = STEPS_PER_EPOCH_VAL,
    epochs = EPOCHS, 
    callbacks=[callback_early_stopping, callbacks_lr_reduce, callback_checkpoint],
    class_weight = class_weight) 

Epoch 1/15

In [None]:
model.evaluate(val_ds)