In [1]:
import numpy as np
import os
import cv2
import math
import matplotlib.pyplot as plt
import sys

def import_images(path, img_names):
    imgs = [cv2.imread(os.path.join(path, img_name), 1) for img_name in img_names]
    return np.array(resize_all(imgs))

def import_target(path):
    f = open(path)
    lines = f.read().split('\n')
    toks = [line.split(',') for line in lines]
    toks = toks[1:-1]
    f.close()
    return np.array([[int(tk) for tk in tok[1:]] for tok in toks])

def resize_all(X):
    return np.array([cv2.resize(x,dsize=(200,150)) for x in X])

def data_generator(path, total, img_names, targets, batch_size):
    i = 0
    N = np.random.permutation(total)
    while True:
        batch = N[i:i+batch_size]
        X = load_images(path, img_names[batch])
        Y = targets[batch]
        yield X, Y
        i = (i+batch_size) % total

def class_indices(Y, j):
    return [i for i in range(len(Y)) if Y[i][j] == 1]

def loading_text(text):
    sys.stdout.write(str(text) + '\r')
    sys.stdout.flush()

In [2]:
img_path = "data\\ISIC2018_Task3_Training_Input"
target_path = "data\\ISIC_2018_Training_GroundTruth.csv"
img_names, Y_all = np.array(os.listdir(img_path)), import_target(target_path)

In [10]:
counts = np.sum(Y_all, axis=0)
counts

array([ 8.99820305,  1.49366145, 19.4844358 , 30.62691131,  9.11282985,
       87.08695652, 70.52816901])

In [6]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

Using TensorFlow backend.


maybe augment a bit, but need to see if unbalanced works

In [None]:
# X, Y = np.zeros(shape=(7000, 150, 200, 3)), np.zeros(shape=(7000, 7))

In [16]:
# for j in range(0, 7):
#     inds = class_indices(Y_all, j)[:1000]
#     X_c = import_images(path, img_names[inds])
#     Y_c = Y_all[inds]
#     i = len(inds)
#     X[1000*j:1000*j+i] = X_c
#     Y[1000*j:1000*j+i] = Y_c   
#     if i < 1000:  
#         for x_new, y_new in datagen.flow(X_c, Y_c, batch_size=1):
#             X[1000*j+i] = x_new
#             Y[1000*j+i] = y_new
#             i += 1
#             if i == 1000:
#                 break

In [None]:
# print(np.sum(Y, axis=0))
# print(X.shape, Y.shape)

In [7]:
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 200, 3))
#conv_base.summary()

Instructions for updating:
Colocations handled automatically by placer.


In [11]:
def load_features(img_names, batch_size, conv_base):
    total = len(img_names)
    output_shape = conv_base.layers[-1].output_shape[1:]
    features = np.zeros(shape=(total,) + output_shape)
    i = 0
    while i*batch_size < total:
        loading_text(str(i*batch_size) +  "/" + str(total))
        img_names_batch = img_names[i*batch_size:(i+1)*batch_size]
        inputs_batch = import_images(img_path, img_names_batch)
        features[i * batch_size : (i + 1) * batch_size] = conv_base.predict(inputs_batch)
        i += 1
    features = features.reshape(total, np.prod(output_shape))
    return features

In [12]:
X_f = load_features(img_names, 350, conv_base)

In [4]:
import pickle
# file = open('10015_VGG.pkl', 'wb')
# pickle.dump(X_f, file)
file = open('10015_VGG.pkl', 'rb')
X_f = pickle.load(file)
file.close()

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_f, Y_all,
                                                stratify=Y_all, 
                                                test_size=0.2)

In [6]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((8012, 12288), (2003, 12288), (8012, 7), (2003, 7))

In [7]:
np.sum(Y_train, axis=0), np.sum(Y_test, axis=0)

(array([ 890, 5364,  411,  262,  879,   92,  114]),
 array([ 223, 1341,  103,   65,  220,   23,   28]))

In [21]:
from keras import models
from keras import layers
from keras import optimizers
from tensorflow import set_random_seed

weights = {i:len(Y_all)/(counts[i]) for i in range(len(counts))}
histories = []
ms = []

for i in range(1):
    np.random.seed(2)
    set_random_seed(2)
    model = models.Sequential()
    model.add(layers.Dense(512, activation='sigmoid', input_dim=X_train.shape[1]))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(7, activation='softmax'))
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy']) # adam optimizer?

    history = model.fit(X_train, Y_train, epochs=40, batch_size=20, validation_data=(X_test,Y_test))
    # history = model.fit(X_f, Y_all, epochs=40, batch_size=20, class_weight=weights)
    histories.append(history)
    ms.append(model)
# history = model.fit(X_train, Y_train, epochs=30, batch_size=64, validation_data=(X_test, Y_test))

Train on 8012 samples, validate on 2003 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
# acc = history.history['acc']
# val_acc = history.history['val_acc']
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# epochs = range(1, len(acc) + 1)
# plt.plot(epochs, acc, 'bo', label='Training acc')
# plt.plot(epochs, val_acc, 'b', label='Validation acc')
# plt.title('Training and validation accuracy')
# plt.legend()
# plt.figure()
# plt.plot(epochs, loss, 'bo', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
# plt.title('Training and validation loss')
# plt.legend()
# plt.show()

In [22]:
from sklearn.metrics import classification_report
for i in range(len(ms)):
    print("Model #" + str(i))
    model = ms[i]
    a = model.predict(X_test)
    b = np.zeros_like(a) # lol so this was it all along
    b[np.arange(len(a)), a.argmax(1)] = 1
    #Y_test.shape, Y_pred.shape
    print(classification_report(Y_test, b))

Model #0
              precision    recall  f1-score   support

           0       0.61      0.49      0.54       223
           1       0.86      0.94      0.90      1341
           2       0.66      0.60      0.63       103
           3       0.45      0.49      0.47        65
           4       0.60      0.45      0.51       220
           5       0.36      0.17      0.24        23
           6       0.86      0.68      0.76        28

   micro avg       0.79      0.79      0.79      2003
   macro avg       0.63      0.55      0.58      2003
weighted avg       0.78      0.79      0.78      2003
 samples avg       0.79      0.79      0.79      2003



In [24]:
ms[0].save("models\\vgg16_sigmoid_10015.h5")