<a href="https://colab.research.google.com/github/richmondvan/melanoma-detection/blob/master/process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SETUP

**This cell is mandatory!**

- Imports modules

- Mounts Google Drive

- Sets up constants

In [0]:
# Must be run every time!
import numpy as np
import tensorflow as tf

from google.colab import drive 
drive.mount('/content/gdrive') 

DATASET_FILEPATH = "/content/gdrive/My Drive/Dataset/dataset.zip"
DIRECTORY_PATH = "/content/gdrive/My Drive/Dataset/"

One-time:
Download my data from ISIC

In [0]:
import urllib

urllib.request.urlretrieve("https://isic-archive.com/api/v1/image/download?include=all&filter={%22operator%22:%22and%22,%22operands%22:[{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.datasetId%22,%22type%22:%22objectid%22},[%225a2ecc5e1165975c945942a4%22,%225a2ecc5e1165975c945942a2%22,%225a2ecc5d1165975c94594292%22,%225a2ecc5d1165975c9459428e%22,%225a2ecc5d1165975c94594284%22,%225aaf6f2a116597691367292e%22,%225a2ecc5d1165975c9459427e%22,%225a2ecc5d1165975c9459428a%22]]},{%22operator%22:%22and%22,%22operands%22:[{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.clinical.benign_malignant%22,%22type%22:%22string%22},[%22benign%22,%22malignant%22]]},{%22operator%22:%22in%22,%22operands%22:[{%22identifier%22:%22meta.tags%22,%22type%22:%22string%22},[%22Challenge%202019:%20Training%22]]}]}]}", DATASET_FILEPATH)

One-time: Extract data from archive

In [0]:
from zipfile import ZipFile

with ZipFile(DATASET_FILEPATH, 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall(DIRECTORY_PATH)

# More Setup

Sets up ground truth dataset 

In [0]:
# Must be run every time!

import numpy as np

METADATA_PATH = DIRECTORY_PATH + "ISIC-images/metadata.csv"


Converts CSV file into a dictionary for classification lookups

In [0]:
import csv

with open(METADATA_PATH, mode='r') as infile:
    reader = csv.DictReader(infile)
    GROUND_TRUTH_DICT = {row['name'] : row['meta.clinical.benign_malignant'] for row in reader} 

print(GROUND_TRUTH_DICT)

Sort images into propert directories

In [0]:
import shutil
import os
import pathlib

IMAGE_PATH = pathlib.Path(DIRECTORY_PATH + "ISIC-images/")
BENIGN_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/benign/"
MALIGNANT_PATH = "/content/gdrive/My Drive/Dataset/DatasetSorted/malignant/"

pathlist = pathlib.Path(IMAGE_PATH).glob("*/*.jpg")
for path in pathlist:
    fileName = path.name
    key = GROUND_TRUTH_DICT[path.name.strip(".jpg")]
    if key == "benign":
        pastePath = BENIGN_PATH + path.name
    elif key == "malignant":
        pastePath = MALIGNANT_PATH + path.name
    else:
        print("error")
    
    copyPath = str(path)

    shutil.move(copyPath, pastePath)

**Mandatory!**

Set up some constants before we load the dataset

In [0]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)


IMAGE_PATH = pathlib.Path("/content/gdrive/My Drive/Dataset/DatasetSorted/")
IMAGE_COUNT = len(list(IMAGE_PATH.glob("*/*.jpg")))
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
STEPS_PER_EPOCH = np.ceil(IMAGE_COUNT / BATCH_SIZE)
CLASS_NAMES = np.array(["benign", "malignant"])

train_data_gen = image_generator.flow_from_directory(directory=str(IMAGE_PATH), batch_size=BATCH_SIZE, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode="categorical")
print(train_data_gen.class_indices)