<a href="https://colab.research.google.com/github/richmondvan/melanoma-detection/blob/master/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training

Import all modules and mount Google Drive

In [None]:
# Must be run every time!

from pathlib import Path # Manage file paths
import pickle # Storing epoch number
from google.colab import drive # For mounting GDrive
import matplotlib.pyplot as plt

from tensorflow.keras import metrics, regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import CSVLogger

# Mount Google Drive
drive.mount('/content/gdrive')

!git clone https://github.com/richmondvan/isic-image-database.git

Prepare datasets

In [None]:
# Setting up file paths
PATH = "/content/isic-image-database/"

TRAINING_PATH = Path(PATH + "training/")
VALIDATION_PATH = Path(PATH + "validation/")
TEST_PATH = Path(PATH + "test/")

# Create image generators
train_image_generator = ImageDataGenerator(
    rescale=1./255,
    brightness_range=(0.90, 1.10),
    zoom_range=[0.9, 1],
    horizontal_flip=True,
    vertical_flip=True) # Generator for our training data

validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data

# Some constants
batch_size = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
TRAIN_LEN = len(list(TRAINING_PATH.glob("*/*.jpg")))
VALID_LEN = len(list(VALIDATION_PATH.glob("*/*.jpg")))
CLASS_NAMES = ['benign', 'malignant']

# Get generated datasets
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=TRAINING_PATH,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary',
                                                           classes=CLASS_NAMES)

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=VALIDATION_PATH,
                                                              shuffle=True,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary',
                                                              classes=CLASS_NAMES)

Show images

In [None]:
image_batch, label_batch = next(train_data_gen)

def show_batch(image_batch, label_batch):
  plt.figure(figsize=(10,10))
  for n in range(25):
      ax = plt.subplot(5,5,n+1)
      plt.imshow(image_batch[n])
      plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
      plt.axis('off')

show_batch(image_batch, label_batch)

Prepare metrics and weights

In [None]:
# Get some training weights to offset class imbalance
numBenign = len(list(TRAINING_PATH.glob("benign/*.jpg")))
numMalignant = len(list(TRAINING_PATH.glob("malignant/*.jpg")))
total = numBenign + numMalignant

additionalWeightMultiplier = 1.0

weight_for_0 = (1 / numBenign) * (total) / 2.0 
weight_for_1 = (additionalWeightMultiplier / numMalignant) * (total) / 2.0
class_weight = {0: weight_for_0, 1: weight_for_1}

print(class_weight)

# Metrics we will be using to assess accuracy
METRICS = [
      metrics.BinaryAccuracy(name='acc'),
      metrics.TruePositives(name='tp'),
      metrics.FalsePositives(name='fp'),
      metrics.TrueNegatives(name='tn'),
      metrics.FalseNegatives(name='fn'), 
      metrics.Precision(name='pre'),
      metrics.Recall(name='rec'),
      metrics.AUC(name='auc'),
]

Prepare model

In [None]:
# Hyperparameters
NEURONS_PER_LAYER = 512
REG_LAMBDA = 0.001
DROPOUT = 0.1
ACTIVATION = "relu"
NUM_DENSE_LAYERS = 8

IMG_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

base_model = MobileNetV2(input_shape = IMG_SHAPE, include_top=False, weights='imagenet', alpha=1.4, pooling='avg')
base_model.trainable = False

model = Sequential()
model.add(base_model)
model.add(Dropout(DROPOUT))
for x in range(NUM_DENSE_LAYERS):
    model.add(Dense(NEURONS_PER_LAYER, kernel_regularizer=regularizers.l2(REG_LAMBDA), activation=ACTIVATION))
    model.add(Dropout(DROPOUT))
model.add(Dense(1, activation="sigmoid"))

LEARNING_RATE = 0.0005

def compileModel(learningRate):
    global model
    model.compile(
        optimizer=Adam(learning_rate=learningRate),
        loss=BinaryCrossentropy(from_logits=True),
        metrics=METRICS)

compileModel(LEARNING_RATE)
model.summary()

Load model weights and last epoch

In [None]:
# Get last epoch number from pickled file

MODEL_FILEPATH = f"/content/gdrive/My Drive/MelanomaDetectionModels/{NEURONS_PER_LAYER}_{NUM_DENSE_LAYERS}/"
EPOCH_FILEPATH = MODEL_FILEPATH + "epochnum.pkl"

def loadWeights():
    global epoch, model, EPOCH_FILEPATH, MODEL_FILEPATH
    try: 
        infile = open(EPOCH_FILEPATH, 'rb')
        infile.seek(0)
        epoch = pickle.load(infile)
        try:
            model.load_weights(MODEL_FILEPATH + f"epoch{epoch}.h5")
            infile.close()
        except:
            pass
    except: 
        # Otherwise start again (only happens if no epoch number found)
        epoch = 0
    print(epoch)

loadWeights()

Prepare CSV logger

In [None]:
# File where we store our CSV history

HISTORY_FILEPATH = MODEL_FILEPATH + "history.csv"

csv_logger = CSVLogger(HISTORY_FILEPATH, append=True)

Train model

In [None]:
# Train for 150 epochs

epochsToTrain = 150

if epoch < epochsToTrain:
    for i in range(epoch, epochsToTrain):
        history = model.fit(x=train_data_gen, 
                            epochs=i+1, 
                            initial_epoch=i, 
                            verbose=1, 
                            validation_data=val_data_gen, 
                            validation_steps=VALID_LEN // batch_size, 
                            steps_per_epoch=TRAIN_LEN // batch_size, 
                            class_weight=class_weight,
                            callbacks = [csv_logger])
        model.save_weights(MODEL_FILEPATH + f"epoch{i + 1}.h5")
        outfile = open(EPOCH_FILEPATH, 'wb')
        pickle.dump(i+1, outfile)
        outfile.close()

Set up fine-tuning by unfreezing layers in base model

In [None]:
base_model.trainable = True

compileModel(LEARNING_RATE/10)
model.summary()

Load up fine tuning epoch number

Fine-tune

In [None]:
fineTuneEpochsToTrain = epochsToTrain + 300

if epoch > epochsToTrain:
    loadWeights()

In [None]:
if epoch < fineTuneEpochsToTrain:
    for i in range(epoch, fineTuneEpochsToTrain):
        history = model.fit(x=train_data_gen, 
                            epochs=i+1, 
                            initial_epoch=i, 
                            verbose=1, 
                            validation_data=val_data_gen, 
                            validation_steps=VALID_LEN // batch_size, 
                            steps_per_epoch=TRAIN_LEN // batch_size, 
                            class_weight=class_weight,
                            callbacks = [csv_logger])
        model.save_weights(MODEL_FILEPATH + f"epoch{i + 1}.h5")
        outfile = open(EPOCH_FILEPATH, 'wb')
        pickle.dump(i+1, outfile)
        outfile.close()
