<a href="https://colab.research.google.com/github/richmondvan/melanoma-detection/blob/master/process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SETUP

**This cell is mandatory!**

- Imports modules

- Mounts Google Drive

- Sets up constants

In [0]:
# Must be run every time!
import numpy as np
import tensorflow as tf #nightly
import pathlib
from tensorflow.keras import models, layers, losses
import math
from google.colab import drive 
import os

drive.mount('/content/gdrive') 

DATASET_FILEPATH = "/content/gdrive/My Drive/Dataset/dataset.zip"
DIRECTORY_PATH = "/content/gdrive/My Drive/Dataset/"

One-time:
Download my data from ISIC

In [0]:
import urllib

urllib.request.urlretrieve("https://isic-archive.com/api/v1/image/download?include=all&filter={%22operator%22:%22and%22,%22operands%22:[{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.datasetId%22,%22type%22:%22objectid%22},[%225a2ecc5e1165975c945942a4%22,%225a2ecc5e1165975c945942a2%22,%225a2ecc5d1165975c94594292%22,%225a2ecc5d1165975c9459428e%22,%225a2ecc5d1165975c94594284%22,%225aaf6f2a116597691367292e%22,%225a2ecc5d1165975c9459427e%22,%225a2ecc5d1165975c9459428a%22]]},{%22operator%22:%22and%22,%22operands%22:[{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.clinical.benign_malignant%22,%22type%22:%22string%22},[%22benign%22,%22malignant%22]]},{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.tags%22,%22type%22:%22string%22},[%22Challenge%202019:%20Training%22]]}]}]}", DATASET_FILEPATH)

One-time: Extract data from archive

In [0]:
from zipfile import ZipFile

with ZipFile(DATASET_FILEPATH, 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall(DIRECTORY_PATH)

# More Setup

One-time: Converts CSV file into a dictionary for classification lookups

In [0]:
import csv

METADATA_PATH = DIRECTORY_PATH + "ISIC-images/metadata.csv"

with open(METADATA_PATH, mode='r') as infile:
    reader = csv.DictReader(infile)
    GROUND_TRUTH_DICT = {row['name'] : row['meta.clinical.benign_malignant'] for row in reader} 

print(GROUND_TRUTH_DICT)

One-time: Sort images into proper directories

In [0]:
import shutil
import os

IMAGE_PATH = pathlib.Path(DIRECTORY_PATH + "ISIC-images/")
BENIGN_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/benign/"
MALIGNANT_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/malignant/"

pathlist = pathlib.Path(IMAGE_PATH).glob("*/*.jpg")
for path in pathlist:
    fileName = path.name
    key = GROUND_TRUTH_DICT[path.name.strip(".jpg")]
    if key == "benign":
        pastePath = BENIGN_PATH + path.name
    elif key == "malignant":
        pastePath = MALIGNANT_PATH + path.name
    else:
        print("error")
    
    copyPath = str(path)

    shutil.move(copyPath, pastePath)

Sort images into training, validation, test sets


In [0]:
import shutil
import os
from random import shuffle

BENIGN_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/benign/"
MALIGNANT_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/malignant/"

TRAINING_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/training/"
VALIDATION_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/validation/"
TEST_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/test/"

benignPathList = pathlib.Path(BENIGN_PATH).glob("*.jpg")

counter = 0
for path in benignPathList:
    name = path.name
    copyPath = str(path)
    counter += 1
    key = counter % 5
    print(key)
    if key == 3:
        pastePath = VALIDATION_PATH + "benign/" + name
    elif key == 4:
        pastePath = TEST_PATH + "benign/" + name
    else:
        pastePath = TRAINING_PATH + "benign/" + name
    
    shutil.move(copyPath, pastePath)

malignantPathList = pathlib.Path(MALIGNANT_PATH).glob("*.jpg")

counter = 0
for path in malignantPathList:
    name = path.name
    copyPath = str(path)
    counter += 1
    key = counter % 5
    print(key)
    if key == 3:
        pastePath = VALIDATION_PATH + "malignant/" + name
    elif key == 4:
        pastePath = TEST_PATH + "malignant/" + name
    else:
        pastePath = TRAINING_PATH + "malignant/" + name
    
    shutil.move(copyPath, pastePath)


Reformats image

In [0]:
def Reformat_Image(ImageFilePath):

    from PIL import Image
    image = Image.open(ImageFilePath, 'r')
    image_size = image.size
    width = image_size[0]
    height = image_size[1]

    if(width != height):
        bigside = width if width > height else height

        background = Image.new('RGB', (bigside, bigside), (255, 255, 255))
        offset = (int(round(((bigside - width) / 2), 0)), int(round(((bigside - height) / 2),0)))

        background.paste(image, offset)
        background.save(ImageFilePath)
        print("Image has been resized !")

    else:
        print("Image is already a square, it has not been resized !")

In [0]:
IMAGE_PATH = pathlib.Path("/content/gdrive/My Drive/Dataset/DatasetSorted/")

pathList = pathlib.Path(IMAGE_PATH).glob("*/*/*.jpg")

print(pathList)
counter = 0
for path in pathList:
    counter += 1
    Reformat_Image(path)
    print(counter)

**Mandatory!**

sets up data generator


In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

BENIGN_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/benign/"
MALIGNANT_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/malignant/"

TRAINING_PATH = pathlib.Path("/content/gdrive/My Drive/Dataset/DatasetSorted/training/")
VALIDATION_PATH = pathlib.Path("/content/gdrive/My Drive/Dataset/DatasetSorted/validation/")
TEST_PATH = pathlib.Path("/content/gdrive/My Drive/Dataset/DatasetSorted/test/")

train_image_generator = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data

batch_size = 128
epochs = 15
IMG_HEIGHT = 224
IMG_WIDTH = 224

TRAIN_LEN = len(list(TRAINING_PATH.glob("*/*.jpg")))
VALID_LEN = len(list(VALIDATION_PATH.glob("*/*.jpg")))

CLASS_NAMES = ['benign', 'malignant']

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=TRAINING_PATH,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary',
                                                           classes=CLASS_NAMES)

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=VALIDATION_PATH,
                                                              shuffle=True,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='binary',
                                                              classes=CLASS_NAMES)

set up model


In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
import tensorflow_hub as hub

model = models.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4", output_shape=[1280], trainable=False),
    layers.Dropout(0.1),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.1),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.1),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.1),
    layers.Dense(64, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['accuracy'])

model.build([None, 224, 224, 3])
model.summary()


In [0]:
history = model.fit(x=train_data_gen, epochs=1, verbose=1, validation_data=val_data_gen, validation_steps=VALID_LEN // batch_size, steps_per_epoch=TRAIN_LEN // batch_size)

In [0]:
model.save_weights("/content/gdrive/My Drive/Dataset/mnet64weights.h5")