# Usage Instructions: 
* Initialize Kaggle Notebook
* Run the Kaggle Notebook by clicking "Run All"
* Wait for the preprocessing to finish
* Log into your Weights & Biases account (might need to create a new account for free if you don't have one)
* Wait for the training to finish for {Custom_CNN, Inception, ResNet)
* Select any model and perform prediction on the test set (saved in variable named "test")
* Perform error analysis 

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
#Import some necessary Modules
import os
import cv2
import keras
import numpy as np
import pandas as pd
import random as rn
from PIL import Image
import tensorflow as tf
import shutil
from sklearn.metrics import classification_report
import numpy as np

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break
data_dir = "../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input"

## Citations
* https://www.kaggle.com/andrewmvd/isic-2019 (dataset)
* https://becominghuman.ai/melanoma-skin-cancer-detection-with-cnn-90dc3749f3d7
* https://www.kaggle.com/minnieliang/multimodal-melanoma-model-with-conf-matrix

In [3]:
# Get the ground truth labels and metadata
df_labels = pd.read_csv("../input/isic-2019/ISIC_2019_Training_GroundTruth.csv")
df_meta = pd.read_csv("../input/isic-2019/ISIC_2019_Training_Metadata.csv")
print(df_labels.head())
print(df_meta.head())
print(len(df_labels))

In [4]:
# Move the benign and malignant files into separate folders
benign_dir = os.path.join(data_dir, "benign")
malign_dir = os.path.join(data_dir, "malign")

### Categoize image files into benign and malignant folders

In [5]:
# image_names = list(df_labels["image"])
# file_names = sorted(os.listdir(data_dir))
# data_dir2 = "./data"
# # UNCOMMENT THE TWO LINES BELOW IF THERE IS AN ERROR, AND RE-RUN (THIS WILL CREATE DATA FOLDERS)
# os.makedirs(os.path.join(data_dir2, "malignant"))   
# os.makedirs(os.path.join(data_dir2, "benign"))
# for index, row in df_labels.iterrows():
#     image_name = row["image"] + ".jpg"
#     if image_name in file_names:
#         label = "unspecified"
#         for k, v in row.items():
#             if k == "image":
#                 continue
#             if v > 0 and k == "MEL":
#                 label = "malignant"
#                 break
#             else:
#                 label = "benign"
#         shutil.copy(os.path.join(data_dir,image_name), os.path.join(data_dir2,label))
#     else:
#         continue


In [6]:
## Split the dataset into Train/Val/Test in 80%/10%/10% ratios
import random
random.seed(0) # set seed to ensure consistency among different runs
image_names = list(df_labels["image"])
file_names = sorted(os.listdir(data_dir))
data_dir2 = "./data"
# UNCOMMENT THE LINES BELOW IF THERE IS AN ERROR, AND RE-RUN (THIS WILL CREATE DATA FOLDERS)
os.makedirs(os.path.join(data_dir2, "train-val"))
os.makedirs(os.path.join(data_dir2, "test"))
os.makedirs(os.path.join(data_dir2, "train-val", "malignant"))
os.makedirs(os.path.join(data_dir2, "train-val", "benign"))
os.makedirs(os.path.join(data_dir2, "test", "malignant"))
os.makedirs(os.path.join(data_dir2, "test", "benign"))

for index, row in df_labels.iterrows():
    image_name = row["image"] + ".jpg"
    rand_num = random.randint(0,99)
    if image_name in file_names:
        label = "unspecified"
        for k, v in row.items():
            if k == "image":
                continue
            if v > 0 and k == "MEL":
                label = "malignant"
                break
            else:
                label = "benign"
        if rand_num < 90: # 0 - 89, 20% chance
            subset = "train-val"
        else: # 80% chance
            subset = "test"
        target_path = os.path.join(data_dir2,subset, label)
        source_path = os.path.join(data_dir,image_name)
        shutil.copy(source_path, target_path)
    else:
        continue


### Read images into TensorFlow Dataset

In [7]:
data_dir = "./data"
def load_subset(cur_subset, shape):
    if cur_subset == "training" or cur_subset == "validation":
        return tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(data_dir, "train-val"),
        labels="inferred",
        label_mode="int",
        class_names=None,
        color_mode="rgb",
        batch_size=32,
        image_size=(shape, shape),
        validation_split=0.1,
        subset=cur_subset,
        shuffle=True,
        seed=0,
        interpolation="bilinear",
        follow_links=False,
        crop_to_aspect_ratio=False
        )
    else:
        return tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(data_dir, "test"), # Test dataset
        labels="inferred",
        label_mode="int",
        class_names=None,
        color_mode="rgb",
        batch_size=32,
        image_size=(shape, shape),
        shuffle=True,
        seed=0,
        interpolation="bilinear",
        follow_links=False,
        crop_to_aspect_ratio=False)

# train_229, val_299 = load_subset("training", 299), load_subset("validation", 299) # Datasets for default Inception; Sized to 299

train, val = load_subset("training", 224), load_subset("validation", 224) # Datasets for ResNet & Custom CNN & Inception; Sized to 224
test = load_subset("test", 224)

In [8]:
# TODO: add visualizations to show some sample images belonging to each class
# Example: https://becominghuman.ai/melanoma-skin-cancer-detection-with-cnn-90dc3749f3d7
import wandb
from wandb.keras import WandbCallback
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Flatten
from keras.utils import np_utils
from keras.callbacks import TensorBoard

defaults=dict(
    dropout = 0.2,
    hidden_layer_size = 32,
    layer_1_size = 32,
    learn_rate = 0.01,
    decay = 1e-6,
    momentum = 0.9,
    epochs = 5,
)


In [19]:
def init_wandb(name):
    wandb.init(project="visualize-melanoma-models", config=defaults, name=name)

init_wandb("neural_network_nov30_2021_run4")

# Evaluation Function
We will print out the per-class metrics during training

In [10]:
def get_performance_CNN(model):
    y_pred = model.predict(test)
    Y_test = []
    for images, labels in test:  # only take first element of dataset
        labels = labels.numpy()
        Y_test.extend(labels)
    for i, y in enumerate(y_pred):
        if y >= 0.50:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    y_pred = y_pred.astype(int)
    assert len(Y_test) == len(y_pred)
    print(classification_report(Y_test, y_pred))


# Custom Callback
Use this to auto-evaluate the model's per-class metrics

In [11]:
class CustomEvaluateCallback(keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs=None):
        model = self.model
        print("End epoch {} of training; performing per-class evaluation...".format(epoch))
        get_performance_CNN(model)


# Model training

## Define and Train Custom CNN

In [None]:
tf.debugging.set_log_device_placement(True)

In [22]:
# A BASIC CUSTOMIZED CNN
# Inspired by: https://www.analyticsvidhya.com/blog/2020/10/create-image-classification-model-python-keras/
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

model_CNN = Sequential()
model_CNN.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
model_CNN.add(MaxPool2D())

model_CNN.add(Conv2D(32, 3, padding="same", activation="relu"))
model_CNN.add(MaxPool2D())

model_CNN.add(Conv2D(32, 3, padding="same", activation="relu"))
model_CNN.add(MaxPool2D())

model_CNN.add(Conv2D(64, 3, padding="same", activation="relu"))
model_CNN.add(MaxPool2D())

model_CNN.add(Conv2D(64, 3, padding="same", activation="relu"))
model_CNN.add(Dropout(0.4))

model_CNN.add(Flatten())
model_CNN.add(Dense(128,activation="relu"))
model_CNN.add(Dense(1, activation="sigmoid"))

print(model_CNN.summary())
class_weight = {0: 1,
                1: 7}
opt = Adam(lr=0.001)
model_CNN.compile(optimizer = opt , loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) , metrics = ['accuracy'])
model_CNN.fit(
train, 
epochs=20, # PLEASE REMEMBER TO CHANGE THIS TO A LARGER NUMBER (e.g., 5)
validation_data=val,
class_weight=class_weight,
callbacks=[CustomEvaluateCallback()]
) 
# get_performance_CNN(model_CNN) # Change the argument to select the model you want to evaluate

## Train Inception V3

In [23]:
# Reference: https://www.pyimagesearch.com/2020/04/27/fine-tuning-resnet-with-keras-tensorflow-and-deep-learning/
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

In [None]:
model_inceptionV3 = tf.keras.applications.InceptionV3(
    include_top=False,
    weights="imagenet"
)

inputs = keras.Input(shape=(224,224,3))
x = model_inceptionV3(inputs)
x = AveragePooling2D(pool_size=(5, 5))(x)
x = Flatten(name="flatten")(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.1)(x)
outputs = Dense(2, activation="softmax")(x)
outputs = keras.layers.Dense(1)(x)
model_inceptionV3 = keras.Model(inputs,outputs)

model_inceptionV3.compile(
    optimizer=tf.keras.optimizers.Adam(0.001), # TODO use learning rate decay, TODO use smaller learning rate! 0.0003, 0.0001
    loss=tf.keras.losses.CategoricalCrossentropy(), # Used this because class labels are mutually exclusive
    metrics=[tf.keras.metrics.CategoricalCrossentropy()] #, tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()], # TODO
)

class_weight = {
    0: 1,
    1: 5
}
model_inceptionV3.fit(
    train, 
    epochs=20,
    validation_data=val,
    class_weight=class_weight,
    callbacks=[CustomEvaluateCallback()]
)

In [None]:
get_performance_CNN(model_inceptionV3)

## Train ResNet50


In [None]:
model_resnet50 = tf.keras.applications.resnet50.ResNet50(
    input_shape=None,
    include_top=None,
    input_tensor=None,
    weights='imagenet',
    pooling=None,
    classes=1000
)

inputs = keras.Input(shape=(224,224,3))
x = model_resnet50(inputs)
x = AveragePooling2D(pool_size=(5, 5))(x)
x = Flatten(name="flatten")(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.2)(x)
outputs = Dense(2, activation="softmax")(x)
outputs = keras.layers.Dense(1)(x)
model_resnet50 = keras.Model(inputs,outputs)

model_resnet50.compile(
    optimizer=tf.keras.optimizers.Adam(0.001), # TODO use learning rate decay, TODO use smaller learning rate! 0.0003, 0.0001
    loss=tf.keras.losses.BinaryCrossentropy(), # Used this because class labels are mutually exclusive
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()] #, tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()], # TODO
)

model_resnet50.fit(
train, 
epochs=20,
validation_data=val,
class_weight=class_weight,
callbacks=[CustomEvaluateCallback()]
)

### Model evaluation

In [None]:
# TODO perform some error analysis and visualization