# Tensorflow Melanoma Classification
Classifying benign and malignant tumors using tensorflow.

In [8]:
import os
import shutil
import random

In [9]:
seed = 1
random.seed(seed)
directory = "ISIC/images/"
train = "data/train/"
test = "data/test/"
validation = 'data/validation/'

### Data Processing
Split the dataset into train, test, and validation.

In [None]:
os.makedirs(train + "benign/")
os.makedirs(train + "malignant/")

In [5]:
os.makedirs(test + "benign/")
os.makedirs(test + "malignant/")

In [6]:
os.makedirs(validation + "benign/")
os.makedirs(validation + "malignant/")

In [13]:
test_examples = train_examples = validation_examples = 0

In [14]:
for line in open("labels.csv").readlines()[1:]:
    split_line = line.split(",")
    img_file = split_line[0]
    benign_malign = split_line[1]
    
    random_num = random.random()
    if random_num < 0.8:
        location = train
        train_examples += 1
    elif random_num < 0.9:
        location = validation
        validation_examples += 1
    else:
        location = test
        test_examples += 1
        
    if int(float(benign_malign)) == 0:
        shutil.copy(
            "images/" + img_file + ".jpg",
            location + "benign/" + img_file + ".jpg"
        )
    else:
        shutil.copy(
            "images/" + img_file + ".jpg",
            location + "malignant/" + img_file + ".jpg"
        )    

In [15]:
print("Training examples: %i" % (train_examples))
print("Test examples: %i" % (test_examples))
print("Validation examples: %i" % (validation_examples))

Training examples: 20227
Test examples: 2554
Validation examples: 2550


### Model Setup
Import libraries.

In [18]:
import sys
!{sys.executable} -m pip install tensorflow_hub --user

Collecting tensorflow_hub
  Downloading tensorflow_hub-0.12.0-py2.py3-none-any.whl (108 kB)
Installing collected packages: tensorflow-hub
Successfully installed tensorflow-hub-0.12.0




In [24]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import math
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import roc_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [25]:
train_examples = 20227
test_examples = 2554
validation_examples = 2550
img_height = img_width = 224
batch_size = 32

In [27]:
model = keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/5",
                  trainable=True),
    layers.Dense(1, activation="sigmoid")
])

In [28]:
train_datagen = ImageDataGenerator(
    rescale = 1.0/255,
    rotation_range = 15,
    zoom_range = (0.95, 0.95),
    horizontal_flip = True,
    vertical_flip = True,
    data_format = "channels_last",
    dtype = tf.float32
)

validation_datagen = ImageDataGenerator(
    rescale = 1.0/255,
    dtype = tf.float32
)

test_datagen = ImageDataGenerator(
    rescale = 1.0/255,
    dtype = tf.float32
)

In [30]:
train_gen = train_datagen.flow_from_directory(
    "data/train/",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode="binary",
    shuffle=True,
    seed=123
)

test_gen = test_datagen.flow_from_directory(
    "data/test/",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode="binary",
    shuffle=True,
    seed=123
)

validation_gen = validation_datagen.flow_from_directory(
    "data/validation/",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode="binary",
    shuffle=True,
    seed=123
)

Found 20228 images belonging to 2 classes.
Found 2554 images belonging to 2 classes.
Found 2552 images belonging to 2 classes.


In [34]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=3e-4),
    loss=[keras.losses.BinaryCrossentropy(from_logits=False)],
    metrics=["accuracy"]
)

In [None]:
model.fit(
    train_gen,
    epochs=1,
    steps_per_epoch=train_examples//batch_size,
    validation_data=validation_gen,
    validation_steps=validation_examples//batch_size,
)

 32/632 [>.............................] - ETA: 9:06:53 - loss: 1.7477 - accuracy: 0.8183