<a href="https://colab.research.google.com/github/richmondvan/melanoma-detection/blob/master/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training

Import all modules and mount Google Drive

In [1]:
# Must be run every time!

import pathlib # Manage file paths
import math # Manage basic math
import pickle # Storing epoch number
import csv # Storing data in .csv files
from google.colab import drive # For mounting GDrive
import tensorflow_hub as hub # For importing EfficientNet

import tensorflow as tf #nightly
from tensorflow.keras import models, layers, losses, metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import CSVLogger

# Mount Google Drive
drive.mount('/content/gdrive') 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Prepare datasets

In [2]:
# Setting up file paths
PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/"

TRAINING_PATH = pathlib.Path(PATH + "training/")
VALIDATION_PATH = pathlib.Path(PATH + "validation/")
TEST_PATH = pathlib.Path(PATH + "test/")

# Create image generators
train_image_generator = ImageDataGenerator(
    rescale=1./255,
    brightness_range=(0.95, 1.05),
    horizontal_flip=True,
    vertical_flip=True) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True) # Generator for our validation data

# Some constants
batch_size = 32
IMG_HEIGHT = 380
IMG_WIDTH = 380
TRAIN_LEN = len(list(TRAINING_PATH.glob("*/*.jpg")))
VALID_LEN = len(list(VALIDATION_PATH.glob("*/*.jpg")))
CLASS_NAMES = ['benign', 'malignant']

# Get generated datasets
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=TRAINING_PATH,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary',
                                                           classes=CLASS_NAMES)

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=VALIDATION_PATH,
                                                              shuffle=True,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary',
                                                              classes=CLASS_NAMES)

Found 8423 images belonging to 2 classes.
Found 1054 images belonging to 2 classes.


Prepare metrics and weights

In [0]:
# Get some training weights to offset class imbalance
numBenign = len(list(TRAINING_PATH.glob("benign/*.jpg")))
numMalignant = len(list(TRAINING_PATH.glob("malignant/*.jpg")))
total = numBenign + numMalignant

additionalWeightMultiplier = 1.5

weight_for_0 = (1 / numBenign) * (total) / 2.0 
weight_for_1 = (additionalWeightMultiplier / numMalignant) * (total) / 2.0
class_weight = {0: weight_for_0, 1: weight_for_1}

# Metrics we will be using to assess accuracy
METRICS = [
      metrics.BinaryAccuracy(name='accuracy'),
      metrics.TruePositives(name='tp'),
      metrics.FalsePositives(name='fp'),
      metrics.TrueNegatives(name='tn'),
      metrics.FalseNegatives(name='fn'), 
      metrics.Precision(name='precision'),
      metrics.Recall(name='recall'),
      metrics.AUC(name='auc'),
]

Prepare model

In [4]:
# Hyperparameters
NEURONS_PER_LAYER = 256
REG_LAMBDA = 0.001
DROPOUT = 0.1
ACTIVATION = "relu"

# Build model
model = models.Sequential([
    hub.KerasLayer("https://tfhub.dev/tensorflow/efficientnet/lite4/feature-vector/2", trainable=False),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=METRICS)

model.build([None, IMG_HEIGHT, IMG_WIDTH, 3])
model.summary()



Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     multiple                  11837936  
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  327936    
_________________________________________________________________
dropout_1 (Dropout)          multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  65792     
_________________________________________________________________
dropout_2 (Dropout)          multiple                  0         
_________________________________________________________________
dense_2 (Dense)              multiple                  6

Load model weights and last epoch

In [5]:
# Get last epoch number from pickled file

transferLearningCode = "enetlite"

EPOCH_FILEPATH = f"/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_4_epochnum"

try: 
    infile = open(EPOCH_FILEPATH, 'rb')
    infile.seek(0)
    epoch = pickle.load(infile)
    model.load_weights(f"/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_4/epoch{epoch}.h5")
    infile.close()
except: 
    # Otherwise start again (only happens if no epoch number found)
    epoch = 0


print(epoch)


5


Prepare CSV logger

In [0]:
# File where we store our CSV history

HISTORY_FILE = f'/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_stats.csv'

csv_logger = CSVLogger(HISTORY_FILE, append=True)

Train model

In [8]:
# Train for 50 epochs

epochsToTrain = 100

if epoch < epochsToTrain:
    for i in range(epoch, epochsToTrain):
        history = model.fit(x=train_data_gen, 
                            epochs=i+1, 
                            initial_epoch=i, 
                            verbose=1, 
                            validation_data=val_data_gen, 
                            validation_steps=VALID_LEN // batch_size, 
                            steps_per_epoch=TRAIN_LEN // batch_size, 
                            class_weight=class_weight,
                            callbacks = [csv_logger])
        model.save_weights(f"/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_4/epoch{i + 1}.h5")
        outfile = open(EPOCH_FILEPATH, 'wb')
        pickle.dump(i+1, outfile)
        outfile.close()

Epoch 6/6
Epoch 7/7
Epoch 8/8
Epoch 9/9
Epoch 10/10
Epoch 11/11
Epoch 12/12
Epoch 13/13
Epoch 14/14
Epoch 15/15
Epoch 16/16
Epoch 17/17
Epoch 18/18
Epoch 19/19
Epoch 20/20
Epoch 21/21
Epoch 22/22
Epoch 23/23
Epoch 24/24
Epoch 25/25
Epoch 26/26
Epoch 27/27
Epoch 28/28
Epoch 29/29
Epoch 30/30
Epoch 31/31
Epoch 32/32
Epoch 33/33
Epoch 34/34
Epoch 35/35
Epoch 36/36
Epoch 37/37
Epoch 38/38
Epoch 39/39
Epoch 40/40
Epoch 41/41
Epoch 42/42
Epoch 43/43
Epoch 44/44
Epoch 45/45
Epoch 46/46
Epoch 47/47
Epoch 48/48
Epoch 49/49
Epoch 50/50


Fine-tuning with trainable EfficientNet

In [0]:
# Build model again

# model = models.Sequential([
#     hub.KerasLayer("https://tfhub.dev/tensorflow/efficientnet/lite4/feature-vector/2", trainable=True), # Trainable this time
#     layers.Dropout(DROPOUT),
#     layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
#     layers.Dropout(DROPOUT),
#     layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
#     layers.Dropout(DROPOUT),
#     layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
#     layers.Dropout(DROPOUT),
#     layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
#     layers.Dropout(DROPOUT),
#     layers.Dense(1, activation="sigmoid")
# ])

# model.compile(
#     optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), # smaller learning rate
#     loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
#     metrics=METRICS)

# model.build([None, IMG_HEIGHT, IMG_WIDTH, 3])
# model.summary()

# model.load_weights(f"/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_4/epoch{epoch}.h5")

Train again

In [0]:
# Train for 50 more epochs

# epochsToFineTune = 150

# if epoch < epochsToFineTune:
#     for i in range(epoch, epochsToFineTune):
#         history = model.fit(x=train_data_gen, 
#                             epochs=i+1, 
#                             initial_epoch=i, 
#                             verbose=1, 
#                             validation_data=val_data_gen, 
#                             validation_steps=VALID_LEN // batch_size, 
#                             steps_per_epoch=TRAIN_LEN // batch_size, 
#                             class_weight=class_weight,
#                             callbacks = [csv_logger])
#         model.save_weights(f"/content/gdrive/My Drive/Dataset/{transferLearningCode}{NEURONS_PER_LAYER}_4/epoch{i + 1}.h5")
#         outfile = open(EPOCH_FILEPATH, 'wb')
#         pickle.dump(i+1, outfile)
#         outfile.close()