In [None]:
import os
import sys
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
import random
import os

img_dir = "data/ISIC2018_Task3_Training_Input"
trg_dir = "data/ISIC2018_Task3_Training_GroundTruth.csv"

def load_img(img_name):
    img = cv2.imread(os.path.join(img_dir, img_name), 1)
    img = cv2.resize(img, dsize=(200,150))
    return img

def load_target():
    f = open(trg_dir)
    lines = f.read().split('\n')
    toks = [line.split(',') for line in lines]
    toks = toks[1:-1]
    f.close()
    return np.array([[int(float(tk)) for tk in tok[1:]] for tok in toks])

def class_indices(Y, j):
    return [i for i in range(len(Y)) if Y[i][j] == 1]

def loading_text(text):
    sys.stdout.write(str(text) + '\r')
    sys.stdout.flush()

In [None]:
np.random.seed(1000)

In [None]:
Z, Y = np.array(sorted(os.listdir(img_dir))), load_target()

In [None]:
# from keras.applications.vgg16 import VGG16
# conv_base = VGG16(include_top=False, weights='imagenet', input_shape=(150, 200, 3))
from keras.applications.mobilenet_v2 import MobileNetV2
conv_base = MobileNetV2(include_top=False, weights='imagenet', input_shape=(150, 200, 3))
# from keras.applications import ResNet50
# conv_base = ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3))

output_len = np.prod(conv_base.layers[-1].output_shape[1:])

In [None]:
# take v_n instances from each class
I_class = [np.array(class_indices(Y,j)) for j in range(7)]
Y_class = [Y[i_class] for i_class in I_class]
Z_class = [Z[i_class] for i_class in I_class]

v_n = 20
def split_YZ_class(Y_class, Z_class, n):
    ### shuffler
    for i in range(len(Y_class)):
        inds = np.random.permutation(len(Y_class[i]))
        Y_class[i], Z_class[i] = Y_class[i][inds], Z_class[i][inds]
    Y_class_train, Z_class_train = [y_class[n:] for y_class in Y_class], [z_class[n:] for z_class in Z_class]
    Y_class_valid, Z_class_valid = [y_class[:n] for y_class in Y_class], [z_class[:n] for z_class in Z_class]
    return Y_class_train, Z_class_train, Y_class_valid, Z_class_valid

# all of these are lists of numpy arrays until we manipulate them further
Y_class_train, Z_class_train, Y_class_valid, Z_class_valid = split_YZ_class(Y_class, Z_class, v_n)

# generate even validation set
Y_valid_f, Z_valid = np.concatenate(Y_class_valid), np.concatenate(Z_class_valid)
X_valid = np.array([load_img(img_name) for img_name in Z_valid])/255
X_valid_f = conv_base.predict(X_valid).reshape((len(X_valid), output_len))
X_valid.shape, Y_valid_f.shape

In [None]:
from keras.preprocessing.image import ImageDataGenerator

aug_gen = ImageDataGenerator(
    rescale             = 1./255,
    rotation_range      = 40,
    width_shift_range   = 0.2,
    height_shift_range  = 0.2,
    shear_range         = 0.2,
    zoom_range          = 0.2,
    brightness_range    = (0.8, 1.2),
    horizontal_flip     = True,
    vertical_flip       = True,
    fill_mode           = 'nearest')

def img_gen(Y_class, Z_class, batch_size):
    # shuffler
    for i in range(7):
        inds = np.random.permutation(len(Y_class[i]))
        Y_class[i], Z_class[i] = Y_class[i][inds], Z_class[i][inds]
    # splitting
    n = batch_size // 7
    i = 0
    while True:
        Y_batch = np.concatenate([y_class[(i % len(y_class)):(i % len(y_class)) + n] for y_class in Y_class])
        Z_batch = np.concatenate([z_class[(i % len(z_class)):(i % len(z_class)) + n] for z_class in Z_class])
        X_batch = np.array([load_img(img_name) for img_name in Z_batch])
        for x in aug_gen.flow(X_batch, shuffle=False, batch_size=len(X_batch)):
            X_batch = x
            break
        yield X_batch, Y_batch
        i += n
    
# generates features through conv_base
def feat_gen(Y_class, Z_class, batch_size):
    # shuffler
    for i in range(7):
        inds = np.random.permutation(len(Y_class[i]))
        Y_class[i], Z_class[i] = Y_class[i][inds], Z_class[i][inds]
    # splitting
    n = batch_size // 7
    i = 0
    while True:
        Y_batch = np.concatenate([y_class[(i % len(y_class)):(i % len(y_class)) + n] for y_class in Y_class])
        Z_batch = np.concatenate([z_class[(i % len(z_class)):(i % len(z_class)) + n] for z_class in Z_class])
        X_batch = np.array([load_img(img_name) for img_name in Z_batch])
        for x in aug_gen.flow(X_batch, shuffle=False, batch_size=len(X_batch)):
            X_batch = np.reshape(conv_base.predict(x), (len(x), output_len)) # generate augmented versions of each img, then predict
            break
        yield X_batch, Y_batch
        i += n
        
# Pre-generate convolutional features
# N = 700
# i = 0
# b = 7
# X_train_f = np.zeros(shape=(N, output_len))
# Y_train_f = np.zeros(shape=(N, 7))
# for X_batch, Y_batch in feat_gen(Y_class_train, Z_class_train, b):
#     X_train_f[i:i+b] = X_batch
#     Y_train_f[i:i+b] = Y_batch
#     i += b
#     loading_text(str(i) + "/" + str(N))
#     if i >= N:
#         break

In [None]:
# discard all features with zero variance, indices of remaining features are in nz
# nz = []
# i = 0  # counter
# d = 50 # delta
# while i < X_train_f.shape[1]:
#     inds = i + np.nonzero(np.var(X_train_f[:, i:i+d], axis=0))[0]
#     nz.append(inds)
#     i += d
#     loading_text(str(i) + "/" + str(X_train_f.shape[1]))
# nz = np.concatenate(nz, axis=0)
# len(nz)

In [None]:
# actually discard from X,Y
# X_train_f2 = X_train_f[:,nz]
# X_valid_f2 = X_valid_f[:,nz]

In [None]:
from keras import models
from keras import layers
from keras import optimizers
from keras.regularizers import l1, l2
from keras.initializers import he_normal
from keras.callbacks import EarlyStopping

In [None]:
batch_size = 28 # 14
ms = []
hs = []

In [None]:
# try feature standarization
# from sklearn import preprocessing
# standardized_X = preprocessing.scale(X)

In [None]:
import keras.backend as K
def recall(y_true, y_pred):
        """
        Computes the true positive rate.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

overfitCallback = EarlyStopping(monitor='recall', min_delta=0, patience = 50) #loss

ms, hs = [],[]

In [None]:
# edit this to use img_gen instead of feature_genfst
c
N = [512]
for n in N:
    model = models.Sequential()
    model.add(layers.Dense(n, activation='relu', input_shape=(output_len,)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(7, activation='softmax'))
    model.compile(optimizer=optimizers.Adam(lr=0.00002), # even slower learning rate?
                  loss='categorical_crossentropy',
                  metrics=['acc', recall])
    history = model.fit_generator(feat_gen(Y_class_train, Z_class_train, batch_size),
                        steps_per_epoch=200,
                        epochs=2000, 
                        validation_data=(X_valid_f, Y_valid_f), 
                        callbacks=[overfitCallback],
                        verbose=1)
    ms.append(model)
    hs.append(history)

In [None]:
# ms[1].save("models/vgg1536.h5")
predictor = models.load_model("models/vgg1536.h5")

In [None]:
# fine-tuning vgg
conv_base = VGG16(include_top=False, weights='imagenet', input_shape=(150, 200, 3))
conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [None]:

tuned_model = models.Sequential()
tuned_model.add(conv_base)
tuned_model.add(layers.Flatten())
tuned_model.add(predictor)
tuned_model.compile(optimizer=optimizers.Adam(lr=0.00001),
                  loss='categorical_crossentropy',
                  metrics=['acc'])
tuned_model.summary()

In [None]:
overfitCallback_ft = EarlyStopping(monitor='loss', min_delta=0, patience = 5)
history = tuned_model.fit_generator(img_gen(Y_class_train, Z_class_train, batch_size),
    steps_per_epoch=10,
    epochs=20, 
    validation_data=(X_valid, Y_valid_f), 
    callbacks=[overfitCallback_ft],
    verbose=1)

In [None]:
tuned_model.save("models/vgg1536_2.h5")

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
def pred_max(model, X_test):
    a = model.predict(X_test)
    b = np.zeros_like(a) # lol so this was it all along
    b[np.arange(len(a)), a.argmax(1)] = 1
    return b

def report(model, X_test, Y_test):
    b = pred_max(model, X_test)
    print("Area Under ROC:", roc_auc_score(Y_test, b))
    print(classification_report(Y_test, b))
    
    
def plot_history(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)
    
    plt.figure(figsize=(13,5))
    plt.subplot(1,2,1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'ro', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'ro', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

In [None]:
model = models.load_model("models/vgg1536.h5")

In [None]:
report(model, X_valid_f, Y_valid_f)