In [1]:
import os
import sys
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
import random

def import_images(path, img_names):
    imgs = [cv2.imread(os.path.join(path, img_name), 1) for img_name in img_names]
    return np.array(resize_all(imgs))

def import_target(path):
    f = open(path)
    lines = f.read().split('\n')
    toks = [line.split(',') for line in lines]
    toks = toks[1:-1]
    f.close()
    return np.array([[int(tk) for tk in tok[1:]] for tok in toks])

def resize_all(X):
    return np.array([cv2.resize(x,dsize=(200,150)) for x in X])

def class_indices(Y, j):
    return [i for i in range(len(Y)) if Y[i][j] == 1]

In [2]:
py_rand_state = random.getstate()
np_rand_state = np.random.get_state()

In [3]:
img_path = "data\\ISIC2018_Task3_Training_Input"
target_path = "data\\ISIC_2018_Training_GroundTruth.csv"
img_names, Y_all = np.array(os.listdir(img_path)), import_target(target_path)

In [4]:
counts = np.sum(Y_all, axis=0)
counts

array([1113, 6705,  514,  327, 1099,  115,  142])

In [5]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        brightness_range=[0.8, 1.2],
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest')

Using TensorFlow backend.


make batches a multiple of 7,
feed in even # of each class

In [6]:
c_n = 1400
X, Y = np.zeros(shape=(7*c_n, 150, 200, 3)), np.zeros(shape=(7*c_n, 7))

In [7]:
for j in range(0, 7):
    inds = class_indices(Y_all, j)[:c_n]
    X_c = import_images(img_path, img_names[inds])
    Y_c = Y_all[inds]
    i = len(inds)
    X[c_n*j:c_n*j+i] = X_c
    Y[c_n*j:c_n*j+i] = Y_c   
    if i < c_n:  
        for x_new, y_new in datagen.flow(X_c, Y_c, batch_size=1):
            X[c_n*j+i] = x_new
            Y[c_n*j+i] = y_new
            i += 1
            if i == c_n:
                break

In [8]:
print(np.sum(Y, axis=0))
print(X.shape, Y.shape)

[1400. 1400. 1400. 1400. 1400. 1400. 1400.]
(9800, 150, 200, 3) (9800, 7)


In [9]:
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 200, 3))

# from keras.applications.resnet50 import ResNet50
# conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 200, 3))

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
# conv_base.summary()

In [11]:
def loading_text(text):
    sys.stdout.write(str(text) + '\r')
    sys.stdout.flush()

def extract_features(X, batch_size, conv_base):
    total = len(X)
    output_shape = conv_base.layers[-1].output_shape[1:]
    features = np.zeros(shape=(total,) + output_shape)
    i = 0
    while i*batch_size < total:
        loading_text(str(i*batch_size) +  "/" + str(total))
        inputs_batch = X[i*batch_size:(i+1)*batch_size]/255 # SCALING TO 0-1 HERE
        features[i * batch_size : (i + 1) * batch_size] = conv_base.predict(inputs_batch)
        i += 1
    return features.reshape(total, np.prod(output_shape))

In [12]:
X_f = extract_features(X, 32, conv_base)

9792/9800

In [13]:
import pickle
file = open('VGG_XY_' + str(c_n) + '.pkl', 'wb')
pickle.dump((X_f,Y), file)
# file = open('VGG_8750_XY_2.pkl', 'rb')
# X_f, Y = pickle.load(file)
file.close()

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_f, Y,
                                                stratify=Y, 
                                                test_size=0.1)

In [15]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((8820, 12288), (980, 12288), (8820, 7), (980, 7))

In [None]:
# np.sum(Y_train, axis=0), np.sum(Y_test, axis=0)

In [16]:
from keras import backend as K
def precision_threshold(threshold=0.5):
    def precision(y_true, y_pred):
        """Precision metric.
        Computes the precision over the whole batch using threshold_value.
        """
        threshold_value = threshold
        # Adaptation of the "round()" used before to get the predictions. Clipping to make sure that the predicted raw values are between 0 and 1.
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        # Compute the number of true positives. Rounding in prevention to make sure we have an integer.
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        # count the predicted positives
        predicted_positives = K.sum(y_pred)
        # Get the precision ratio
        precision_ratio = true_positives / (predicted_positives + K.epsilon())
        return precision_ratio
    return precision

def recall_threshold(threshold = 0.5):
    def recall(y_true, y_pred):
        """Recall metric.
        Computes the recall over the whole batch using threshold_value.
        """
        threshold_value = threshold
        # Adaptation of the "round()" used before to get the predictions. Clipping to make sure that the predicted raw values are between 0 and 1.
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        # Compute the number of true positives. Rounding in prevention to make sure we have an integer.
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        # Compute the number of positive targets.
        possible_positives = K.sum(K.clip(y_true, 0, 1))
        recall_ratio = true_positives / (possible_positives + K.epsilon())
        return recall_ratio
    return recall

In [18]:
from keras import models
from keras import layers
from keras import optimizers
from keras.callbacks import ModelCheckpoint

ms = []
hs = []
for i in range(3):
    
    model = models.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=X_train.shape[1]))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(7, activation='softmax'))
    filepath="models\\vgg16_train1260\\m{i:1d}_e{epoch:02d}_acc{val_acc:.2f}.h5"
    # monitor = 'val_acc'
    checkpoint = ModelCheckpoint(filepath, monitor='val_recall', verbose=1, save_best_only=True, mode='max')
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', recall_threshold(0.5)])
    history = model.fit(X_train, Y_train, epochs=40, batch_size=20, validation_data=(X_test, Y_test), callbacks=[checkpoint])
    
    ms.append(model)
    hs.append(history)

Train on 8820 samples, validate on 980 samples
Epoch 1/40

Epoch 00001: val_recall improved from -inf to 0.34592, saving model to models\vgg16_train1260\epoch01_acc0.58.hdf5
Epoch 2/40

Epoch 00002: val_recall improved from 0.34592 to 0.41020, saving model to models\vgg16_train1260\epoch02_acc0.58.hdf5
Epoch 3/40

Epoch 00003: val_recall improved from 0.41020 to 0.45102, saving model to models\vgg16_train1260\epoch03_acc0.61.hdf5
Epoch 4/40

Epoch 00004: val_recall improved from 0.45102 to 0.50000, saving model to models\vgg16_train1260\epoch04_acc0.63.hdf5
Epoch 5/40

Epoch 00005: val_recall improved from 0.50000 to 0.54490, saving model to models\vgg16_train1260\epoch05_acc0.61.hdf5
Epoch 6/40

Epoch 00006: val_recall did not improve from 0.54490
Epoch 7/40

Epoch 00007: val_recall did not improve from 0.54490
Epoch 8/40

Epoch 00008: val_recall did not improve from 0.54490
Epoch 9/40

Epoch 00009: val_recall improved from 0.54490 to 0.59796, saving model to models\vgg16_train1260\ep


Epoch 00034: val_recall did not improve from 0.65408
Epoch 35/40

Epoch 00035: val_recall did not improve from 0.65408
Epoch 36/40

Epoch 00036: val_recall did not improve from 0.65408
Epoch 37/40

Epoch 00037: val_recall improved from 0.65408 to 0.65510, saving model to models\vgg16_train1260\epoch37_acc0.68.hdf5
Epoch 38/40

Epoch 00038: val_recall improved from 0.65510 to 0.66429, saving model to models\vgg16_train1260\epoch38_acc0.68.hdf5
Epoch 39/40

Epoch 00039: val_recall did not improve from 0.66429
Epoch 40/40

Epoch 00040: val_recall did not improve from 0.66429
Train on 8820 samples, validate on 980 samples
Epoch 1/40
1080/8820 [==>...........................] - ETA: 1:24 - loss: 6.3796 - acc: 0.2250 - recall: 0.1741

KeyboardInterrupt: 

can also try:
- relu -> dropout
- batch norm -> relu
- batch norm -> relu -> dropout


In [23]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
def report(model, X_test, Y_test):
    a = model.predict(X_test)
    print("ROC AUC:", roc_auc_score(Y_test, a))
    b = np.zeros_like(a) # lol so this was it all along
    b[np.arange(len(a)), a.argmax(1)] = 1
    print(classification_report(Y_test, b))
    
def plot_history(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.figure()
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

In [None]:
for model in ms:
    report(model, X_test, Y_test)

In [24]:
from keras.models import load_model
path = "models\\vgg16_train1260\\"
model = load_model(path + "epoch38_acc0.68.hdf5", custom_objects={'recall':recall_threshold()})
report(model, X_test, Y_test)

ROC AUC: 0.921036807580175
              precision    recall  f1-score   support

           0       0.57      0.64      0.60       140
           1       0.75      0.72      0.73       140
           2       0.55      0.66      0.60       140
           3       0.66      0.59      0.63       140
           4       0.62      0.47      0.53       140
           5       0.71      0.79      0.75       140
           6       0.95      0.89      0.92       140

   micro avg       0.68      0.68      0.68       980
   macro avg       0.69      0.68      0.68       980
weighted avg       0.69      0.68      0.68       980
 samples avg       0.68      0.68      0.68       980

