<a href="https://colab.research.google.com/github/richmondvan/melanoma-detection/blob/master/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training

Import all modules and mount Google Drive

In [1]:
# Must be run every time!
import pathlib
import math
import os
import pickle
import csv
from google.colab import drive 
import tensorflow_hub as hub

import tensorflow as tf #nightly
from tensorflow.keras import models, layers, losses, metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import CSVLogger

# Mount Google Drive
drive.mount('/content/gdrive') 

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


Prepare datasets

In [2]:
# Setting up file paths
PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/"

TRAINING_PATH = pathlib.Path(PATH + "training/")
VALIDATION_PATH = pathlib.Path(PATH + "validation/")
TEST_PATH = pathlib.Path(PATH + "test/")

# Create image generators
train_image_generator = ImageDataGenerator(
    rescale=1./255,
    brightness_range=(0.95, 1.05),
    horizontal_flip=True,
    vertical_flip=True) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True) # Generator for our validation data

# Some constants
batch_size = 32
epochs = 15
IMG_HEIGHT = 600
IMG_WIDTH = 600
TRAIN_LEN = len(list(TRAINING_PATH.glob("*/*.jpg")))
VALID_LEN = len(list(VALIDATION_PATH.glob("*/*.jpg")))
CLASS_NAMES = ['benign', 'malignant']

# Get generated datasets
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=TRAINING_PATH,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary',
                                                           classes=CLASS_NAMES)

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=VALIDATION_PATH,
                                                              shuffle=True,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary',
                                                              classes=CLASS_NAMES)

8423
1054
Found 8423 images belonging to 2 classes.
Found 1054 images belonging to 2 classes.


Prepare metrics and weights

In [0]:
# Get some training weights to offset class imbalance
numBenign = len(list(TRAINING_PATH.glob("benign/*.jpg")))
numMalignant = len(list(TRAINING_PATH.glob("malignant/*.jpg")))
total = numBenign + numMalignant
weight_for_0 = (1 / numBenign)*(total)/2.0 
weight_for_1 = (1.5 / numMalignant)*(total)/2.0

class_weight = {0: weight_for_0, 1: weight_for_1}

# Metrics we will be using to assess accuracy
METRICS = [
      metrics.TruePositives(name='tp'),
      metrics.FalsePositives(name='fp'),
      metrics.TrueNegatives(name='tn'),
      metrics.FalseNegatives(name='fn'), 
      metrics.BinaryAccuracy(name='accuracy'),
      metrics.Precision(name='precision'),
      metrics.Recall(name='recall'),
      metrics.AUC(name='auc'),
]

Prepare model

In [4]:
# Hyperparameter
NEURONS_PER_LAYER = 1024
REG_LAMBDA = 0.001
DROPOUT = 0.1
ACTIVATION = "relu"


model = models.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/efficientnet/b7/feature-vector/1", trainable=False),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(NEURONS_PER_LAYER, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), activation=ACTIVATION),
    layers.Dropout(DROPOUT),
    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=METRICS)

model.build([None, IMG_HEIGHT, IMG_WIDTH, 3])
model.summary()



Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     multiple                  64097680  
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  2622464   
_________________________________________________________________
dropout_1 (Dropout)          multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  1049600   
_________________________________________________________________
dropout_2 (Dropout)          multiple                  0         
_________________________________________________________________
dense_2 (Dense)              multiple                  1

Load model weights and last epoch

In [0]:
# Get last epoch number from pickled file

EPOCH_FILEPATH = f"/content/gdrive/My Drive/Dataset/enet{NEURONS_PER_LAYER}_4_epochnum"
infile = open(EPOCH_FILEPATH, 'rb')
try: 
    epoch = pickle.load(infile)
    model.load_weights(f"/content/gdrive/My Drive/Dataset/enet{NEURONS_PER_LAYER}_4_epoch{epoch}.h5")
except: 
    # Otherwise start again (only happens if no epoch number found)
    epoch = 0

infile.close()

Prepare CSV logger

In [0]:
# File where we store our CSV history

HISTORY_FILE = f'/content/gdrive/My Drive/Dataset/enet{NEURONS_PER_LAYER}_stats.csv'

csv_logger = CSVLogger(HISTORY_FILE, append=True)

Train model

In [0]:
# Train for 150 epochs

for i in range(epoch, 150):
    outfile = open(EPOCH_FILEPATH, 'wb')
    history = model.fit(x=train_data_gen, 
                        epochs=i+1, 
                        initial_epoch=i, 
                        verbose=1, 
                        validation_data=val_data_gen, 
                        validation_steps=VALID_LEN // batch_size, 
                        steps_per_epoch=TRAIN_LEN // batch_size, 
                        class_weight=class_weight,
                        callbacks = [csv_logger])
    model.save_weights(f"/content/gdrive/My Drive/Dataset/enet{NEURONS_PER_LAYER}_4/epoch{i + 1}.h5")
    pickle.dump(i+1, outfile)
    outfile.close()

 30/263 [==>...........................] - ETA: 2:43:19 - loss: 3.2684 - tp: 8.0000 - fp: 52.0000 - tn: 768.0000 - fn: 132.0000 - accuracy: 0.8083 - precision: 0.1333 - recall: 0.0571 - auc: 0.5186