In [1]:
import os 
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.utils import class_weight
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import ResNet50, VGG16
from keras.applications.resnet50 import preprocess_input
from keras import Model, layers
from keras.models import load_model, model_from_json
from keras.callbacks import EarlyStopping, ModelCheckpoint
from math import ceil
from keras.regularizers import l2

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
keras.__version__
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [None]:
train_datagen = ImageDataGenerator(
    shear_range=10,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    './data/cropped_multiclass/train',
    batch_size=32,
    shuffle=True,
    class_mode='categorical',
    target_size=(224,224))

validation_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input)

validation_generator = validation_datagen.flow_from_directory(
    './data/cropped_multiclass/val',
    batch_size=32,
    shuffle=False,
    class_mode='categorical',
    target_size=(224,224))

mapping = ['AK' , 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'SCC', 'VASC']
classes = {'AK': 'Actinic keratosis', 'BCC' : 'Basal cell carcinoma', 'BKL' : 'Benign keratosis', 'DF' : 'Dermatofibroma', 'MEL' : 'Melanoma', 'NV' : 'Melanocytic nevus', 'SCC' : 'Squamous cell carcinoma', 'VASC' : 'Vascular lesion', }

In [None]:
df = pd.read_csv('data/ISIC_2019_Training_GroundTruth.csv')
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(train_generator.classes),
                                                 train_generator.classes)
print(class_weights)


In [3]:
model_dir = 'models/keras/patience10_vgg16_multiclass_128/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
patience = 8

In [None]:
conv_base = VGG16(
    include_top=False,
    weights='imagenet')

for layer in conv_base.layers:
    layer.trainable = False

In [None]:
x = conv_base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
x = layers.Dense(32, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
predictions = layers.Dense(8, activation='softmax')(x)
model = Model(conv_base.input, predictions)
model.summary()

In [None]:
import functools
top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'

optimizer = keras.optimizers.Adam()
# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='val_loss', patience=patience),
             ModelCheckpoint(filepath= model_dir +'best_model.h5', monitor='val_loss', save_best_only=True)]

callbacks_ft = [EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint(filepath= model_dir +'best_model_ft.h5', monitor='val_loss', save_best_only=True)]

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy', top3_acc])

In [None]:
history = model.fit_generator(generator=train_generator,
                              epochs=100,
                              validation_data=validation_generator, callbacks=callbacks, workers=4, class_weight=class_weights, steps_per_epoch=ceil(20242//32), validation_steps=ceil(5089//32))

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
plt.figure(1)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.axvline(x=16, color='k')
plt.legend(['Training Loss', 'Test Loss', 'Early Stopping'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.savefig(model_dir + 'loss.png')

# Get training and test accuracy histories
training_accuracy = history.history['accuracy']
test_accuracy = history.history['val_accuracy']

# Visualize accuracy history
plt.figure(2)
plt.plot(epoch_count, training_accuracy, 'r--')
plt.plot(epoch_count, test_accuracy, 'b-')
plt.axvline(x=16, color='k')
plt.legend(['Training Accuracy', 'Test Accuracy', 'Early Stopping'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.savefig(model_dir + 'acc.png')
        
# Get training and test catkacc histories
training_top3_acc = history.history['top3_acc']
test_top3_acc = history.history['val_top3_acc']

# Visualize loss history
plt.figure(3)
plt.plot(epoch_count, training_top3_acc, 'b')
plt.plot(epoch_count, test_top3_acc, 'r--')
plt.axvline(x=16, color='k')
plt.legend(['Training top3_acc', 'Test top3_acc', 'Early Stopping'])
plt.xlabel('Epoch')
plt.ylabel('Top 3 Accuracy (%)')
plt.savefig(model_dir + 'top3_acc.png')

In [4]:
import functools
top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'

dependencies = {
    'top3_acc': top3_acc
}

from sklearn.metrics import roc_curve, roc_auc_score
best_model = load_model(model_dir + 'best_model.h5', custom_objects=dependencies)
logits = best_model.predict_generator(validation_generator)
y_actual = validation_generator.classes
y_pred = np.argmax((logits), axis = 1)

Instructions for updating:
Colocations handled automatically by placer.


InternalError: Dst tensor is not initialized.
	 [[{{node _arg_Placeholder_24_0_16}}]]

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score

bal_acc = balanced_accuracy_score(y_actual, y_pred)
print(bal_acc)

y = np.bincount(y_actual)
ii = np.nonzero(y)[0]
print(np.vstack((ii,y[ii])).T)


y = np.bincount(y_pred)
ii = np.nonzero(y)[0]
print(np.vstack((ii,y[ii])).T)

matrix = confusion_matrix(y_actual, y_pred)
print(matrix)

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

matrix = matrix / matrix.sum(axis=0)*100

df_cm = pd.DataFrame(matrix, mapping, mapping)
# plt.figure(figsize=(10,7))
sn.set(font_scale=1.1) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 12}, cmap="BuGn") # font size
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix (%)')
plt.savefig(model_dir + 'conf_matrix.png')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support as score


precision, recall, fscore, support = score(y_actual, y_pred)

print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
best_model = load_model(model_dir + 'best_model.h5')
y_pred = best_model.predict_generator(validation_generator)[:,1]
y_actual = validation_generator.classes
fpr, tpr, thresholds = roc_curve(y_actual, y_pred)
auc = roc_auc_score(y_actual, y_pred)
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='AUC = {:.3f})'.format(auc))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.savefig(model_dir + 'roc_auc.png')