<a href="https://colab.research.google.com/github/davidokel/MelanomaClassification/blob/main/MelanomaClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Classification of Melanoma images

# Preamble and data processing

In [43]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import os

AUTO = tf.data.AUTOTUNE


dir="archive"


In [44]:
print('GPU name: ', tf.config.experimental.list_physical_devices('GPU'))
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print("GPU: ",tf.test.gpu_device_name())

GPU name:  []
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7065770617953565842
xla_global_id: -1
]
GPU:  


In [45]:
image_size=(512,512)
image_shape = [*image_size, 3]
batch_size = 16

FILENAMES = tf.io.gfile.glob(dir + "/train*.tfrec")
split_ind = int(0.9 * len(FILENAMES))
TRAINING_FILENAMES, VALID_FILENAMES = FILENAMES[:split_ind], FILENAMES[split_ind:]

TEST_FILENAMES = tf.io.gfile.glob(dir + "/test*.tfrec")
print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALID_FILENAMES))
print("Test TFRecord Files:", len(TEST_FILENAMES))

tfrec_format = {
    'image'                        : tf.io.FixedLenFeature([], tf.string),
    'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
    'sex'                          : tf.io.FixedLenFeature([], tf.int64),
    'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
    'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
    'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
    'target'                       : tf.io.FixedLenFeature([], tf.int64)
}

unlabelled_tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    }

Train TFRecord Files: 13
Validation TFRecord Files: 2
Test TFRecord Files: 16


In [46]:
def read_tfrecord(example, labelled = True):
  if labelled:           
    example = tf.io.parse_single_example(example, tfrec_format)
    return example["image"], example["target"]
  else:
    example = tf.io.parse_single_example(example, unlabelled_tfrec_format)
    return example["image"], example["image_name"]
  return example


def prepare_image(img, img_name, augment=True):
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) #/ 255.0
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    img = tf.reshape(img, [*image_size, 3])
    if augment:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)

    return img, img_name


def get_dataset(filenames, labelled = True, batch = True, filter=None, is_jpeg=False):
  if is_jpeg:
    builder = tfds.ImageFolder(filenames)
    raw_dataset = builder.as_dataset(split=[], shuffle_files=True)
  else:
    raw_dataset = tf.data.TFRecordDataset(filenames)
  raw_dataset = raw_dataset.cache()
  raw_dataset = raw_dataset.shuffle(1024)
  opt = tf.data.Options()
  opt.experimental_deterministic = False
  raw_dataset = raw_dataset.with_options(opt)

  raw_dataset = raw_dataset.map(lambda element: read_tfrecord(element, labelled))
  raw_dataset = raw_dataset.map(lambda img, img_name: prepare_image(img, img_name, False))

  if batch:
    raw_dataset = raw_dataset.batch(batch_size)
  
  return raw_dataset

def show_batch(dataset):
  plt.figure(figsize=(10,10))
  n = 0
  for x,y in dataset.take(9):
    ax = plt.subplot(3,3, n+1)
    plt.imshow(x)
    if y:
        plt.title("MALIGNANT")
    else:
        plt.title("BENIGN")
    plt.axis("off")
    n += 1

In [47]:
def filter_by_target(ds, target):
  filtered = ds.filter(lambda img, label: label == target)
  return filtered

In [48]:
full_training_set = get_dataset(TRAINING_FILENAMES, batch=False)
full_valid_set = get_dataset(VALID_FILENAMES, batch=False)
full_test_set = get_dataset(TEST_FILENAMES, batch=False)

In [18]:
size = 300
pos_ds = filter_by_target(full_training_set, 1)
neg_ds = filter_by_target(full_training_set, 0)

small_training_ds = pos_ds.take(size).concatenate(neg_ds.take(size*2)).shuffle(size*3)
training_set = small_training_ds.take(size)
valid_set = full_valid_set.take(size)
test_set = full_test_set.take(size)

In [49]:
size = 2000
pos_ds = filter_by_target(full_training_set, 1)
neg_ds = filter_by_target(full_training_set, 0)

small_training_ds = pos_ds.take(-1).concatenate(neg_ds.take(size)).shuffle(size)
training_set = small_training_ds.take(size)
valid_set = full_valid_set.take(size)
test_set = full_test_set.take(size)

NameError: name 'full_test_set' is not defined

Augmenting Data

In [50]:
def count_bins(ds):
  pos = 0
  neg = 0
  for x,y in ds:
    if y == 0:
      neg += 1
    else:
      pos += 1

  return neg, pos

In [9]:
neg, pos = count_bins(full_training_set)
print("Negative: ", neg)
print("Positive: ", pos)

KeyboardInterrupt: 

In [9]:
neg, pos = count_bins(training_set)
print("Negative: ", neg)
print("Positive: ", pos)

Negative:  497
Positive:  503


In [51]:
neg_train_ds = filter_by_target(training_set, 0)
pos_train_ds = filter_by_target(training_set, 1)

In [None]:
show_batch(pos_train_ds)

In [52]:
trainAug = tf.keras.Sequential([
  # tf.keras.layers.InputLayer(input_shape=)
	tf.keras.layers.RandomFlip(),
  tf.keras.layers.RandomRotation(0.5),
  tf.keras.layers.RandomContrast(0.5),
  tf.keras.layers.RandomZoom(0.2,0.2)
])

trainAug.build(input_shape=image_shape)

trainAug.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_flip (RandomFlip)    (512, 512, 3)             0         
                                                                 
 random_rotation (RandomRota  (512, 512, 3)            0         
 tion)                                                           
                                                                 
 random_contrast (RandomCont  (512, 512, 3)            0         
 rast)                                                           
                                                                 
 random_zoom (RandomZoom)    (512, 512, 3)             0         
                                                                 
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


In [53]:
aug_pos_ds = pos_train_ds.map(lambda x, y: (trainAug(x), y))
aug_pos_ds = aug_pos_ds.repeat(3)

In [None]:
neg, pos = count_bins(aug_pos_ds)
print("Negative: ", neg)
print("Positive: ", pos)
# show_batch(aug_pos_ds)

In [54]:
aug_train_ds = aug_pos_ds.concatenate(neg_train_ds)
aug_train_ds = aug_train_ds.shuffle(size)

In [26]:
aug_train_ds = training_set.map(lambda x, y: (trainAug(x), y))
aug_train_ds = aug_train_ds.repeat(2).shuffle(2000)

In [61]:
neg, pos = count_bins(aug_train_ds)
print("Negative: ", neg)
print("Positive: ", pos)

Negative:  1497
Positive:  1509


In [55]:
training_set = aug_train_ds

Model Building

In [56]:
from keras.models import Model
from keras.applications.mobilenet import MobileNet
from keras.layers import Dense, GlobalAveragePooling2D
from keras.layers import Input

# create the base pre-trained model on ImageNet with a custom input tensor
base_model = MobileNet(
    input_shape=image_shape,
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax',)
x = base_model.output
# Add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(8, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(1, activation='sigmoid')(x)

# this is the model we will train
transfer_model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional MobileNet layers, alternatively you can freeze the whole base model without a for loop
for layer in base_model.layers:
    layer.trainable = False



In [37]:
transfer_model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 512, 512, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 256, 256, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 256, 256, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 256, 256, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 256, 256, 32)     288       
                                                                 
 conv_dw_1_bn (BatchNormaliz  (None, 256, 256, 32)     128       
 ation)                                                    

In [57]:
LR = 0.001

# compile the model (should be done *after* setting layers to non-trainable)
transfer_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
    loss=tf.keras.losses.binary_crossentropy,
    metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.TruePositives(), tf.keras.metrics.TrueNegatives()],
    run_eagerly=True
)

In [58]:
class_weight={0:1.0, 1:1.0}

In [17]:
total = neg + pos
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

weight_for_1 = 3

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Weight for class 0: 1.01
Weight for class 1: 3.00


In [59]:
training_set=training_set.batch(batch_size)
valid_set = valid_set.batch(batch_size)
test_set = test_set.batch(batch_size)

In [None]:
from keras.callbacks import EarlyStopping

EPOCHS=50
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
history = transfer_model.fit(
    training_set,
    epochs=EPOCHS,
    class_weight=class_weight,
    validation_data=valid_set,
    callbacks=[es]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


In [None]:
transfer_model.save("Models/normalised")

# Evaluation

In [42]:
fit_model = tf.keras.models.load_model("Models/normalised")
fit_model.summary()

None


In [33]:
print(valid_set)

<TakeDataset element_spec=(TensorSpec(shape=(512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>


In [34]:
y_pred = transfer_model.predict(valid_set)

AttributeError: 'NoneType' object has no attribute 'predict'

In [None]:
y_true = np.concatenate([y for x, y in ds], axis=0)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

In [None]:
import seaborn as sns
sns.heatmap(cm/np.sum(cm), annot=True,
            fmt='.2%', cmap='Blues')
plt.savefig('confusionmatrix.png')

In [None]:
from sklearn.metrics import RocCurveDisplay
RocCurveDisplay.from_predictions(y_true, y_pred)
plt.show()
plt.savefig('roccurve.png')

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_true, y_pred)

In [None]:
tn, fp, fn, tp = cm.ravel()

false_positive_rate = fp / (fp + tn)
false_negative_rate = fn / (tp + fn)
true_negative_rate = tn / (tn + fp)

negative_predictive_value = tn/ (tn + fn)
false_discovery_rate = fp/ (tp + fp)

true_positive_rate = tp / (tp + fn)
positive_predictive_value = tp/ (tp + fp)