In [1]:
import os
import sys
import numpy as np
import keras.callbacks as cb
import keras.utils.np_utils as np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.layers.core import Activation
from keras import applications # For easy loading the VGG_16 Model
from skimage import color
# Image loading and other helper functions
import dwdii_bc_model_helper as bc

Using Theano backend.


In [9]:
def VGG_Prep(img_data):
    """
    :param img_data: training or test images of shape [#images, height, width]
    :return: the array transformed to the correct shape for the VGG network
                shape = [#images, height, width, 3] transforms to rgb and reshapes
    """
    images = np.zeros([len(img_data), img_data.shape[1], img_data.shape[2], 3])
    for i in range(0, len(img_data)):
        im = img_data[i]
        im *= 255 # Orginal imagnet images were not rescaled
        im = color.gray2rgb(im)
        images[i] = im
    return(images)

In [17]:
def vgg16_bottleneck(trainPath, testPath, imagePath, modelPath, size, balance = True, verbose = True, verboseFreq = 50, valPath = 'None'):
    # Loading data
    metaTr, metaTr2, mCountsTr = bc.load_training_metadata(trainPath, balance, verbose)
    lenTrain = len(metaTr)
    X_train, Y_train = bc.load_data(trainPath, imagePath, maxData = lenTrain, verboseFreq = verboseFreq, imgResize=size)
    
    metaTest, meataT2, mCountsT = bc.load_training_metadata(testPath, balance, verbose)
    lenTest = len(metaTest)
    X_test, Y_test = bc.load_data(testPath, imagePath, maxData = lenTrain, verboseFreq = verboseFreq, imgResize=size)
    
    X_train = VGG_Prep(X_train)
    X_test = VGG_Prep(X_test)
        
    print('Loading the VGG_16 Model')
    model = applications.VGG16(include_top=False, weights='imagenet')
        
    # Generating the bottleneck features for the training data
    print('Evaluating the VGG_16 Model on the Training Data')
    bottleneck_features_train = model.predict(X_train)
    
    # Saving the bottleneck features for the training data
    featuresTrain = os.path.join(modelPath, 'bottleneck_features_train.npy')
    labelsTrain = os.path.join(modelPath, 'labels_train.npy')
    np.save(open(featuresTrain, 'wb'), bottleneck_features_train)
    np.save(open(labelsTrain, 'wb'), Y_train)

    # Generating the bottleneck features for the test data
    print('Evaluating the VGG_16 Model on the Test Data')
    bottleneck_features_test = model.predict(X_test)
    
    # Saving the bottleneck features for the test data
    featuresTrain = os.path.join(modelPath, 'bottleneck_features_test.npy')
    labelsTrain = os.path.join(modelPath, 'labels_test.npy')
    np.save(open(featuresTrain, 'wb'), bottleneck_features_test)
    np.save(open(labelsTrain, 'wb'), Y_test)
    
    if valPath != 'None':
        metaVal, metaV2, mCountsV = bc.load_training_metadata(valPath, verbose = verbose, balanceViaRemoval = False)
        lenVal = len(metaVal)
        X_val, Y_val = bc.load_data(valPath, imagePath, maxData = lenVal, verboseFreq = verboseFreq, imgResize=size)
        X_val = VGG_Prep(X_val)
        
        # Generating the bottleneck features for the test data
        print('Evaluating the VGG_16 Model on the Validataion Data')
        bottleneck_features_val = model.predict(X_val)
    
        # Saving the bottleneck features for the test data
        featuresVal = os.path.join(modelPath, 'bottleneck_features_validation.npy')
        labelsVal = os.path.join(modelPath, 'labels_validation.npy')
        np.save(open(featuresVal, 'wb'), bottleneck_features_val)
        np.save(open(labelsVal, 'wb'), Y_val)

In [None]:
# global variables for loading the data
imagePath = '../images/ddsm/png/'
trainDataPath = '../images/ddsm/ddsm_train.csv'
testDataPath = '../images/ddsm/ddsm_test.csv'
valDataPath = '../images/ddsm/ddsm_val.csv'
imgResize = (150, 150) # can go up to (224, 224)
modelPath = '../model/'

In [18]:
vgg16_bottleneck(trainDataPath, testDataPath, imagePath, modelPath, imgResize, 
                 balance = True, verbose = True, verboseFreq = 50, valPath = valDataPath)

Raw Balance
----------------
benign 531
malignant 739
normal 2685
balanaceViaRemoval.avgE: 1318
balanaceViaRemoval.theshold: 1318.0

After Balancing
----------------
benign 531
malignant 739
normal 862
Raw Balance
----------------
benign 531
malignant 739
normal 2685
balanaceViaRemoval.avgE: 1318
balanaceViaRemoval.theshold: 1318.0

After Balancing
----------------
benign 531
malignant 739
normal 862
0.0000: A_0152_1.RIGHT_MLO.LJPEG.png
0.0235: C_0112_1.LEFT_CC.LJPEG.png
0.0469: C_0218_1.LEFT_MLO.LJPEG.png
0.0704: B_3478_1.RIGHT_CC.LJPEG.png
0.0938: A_0437_1.RIGHT_MLO.LJPEG.png
0.1173: C_0301_1.RIGHT_MLO.LJPEG.png
0.1407: A_1078_1.LEFT_MLO.LJPEG.png
0.1642: B_3096_1.RIGHT_MLO.LJPEG.png
0.1876: A_0114_1.RIGHT_CC.LJPEG.png
0.2111: A_1060_1.RIGHT_CC.LJPEG.png
0.2345: A_1020_1.LEFT_CC.LJPEG.png
0.2580: C_0397_1.LEFT_MLO.LJPEG.png
0.2814: A_1017_1.LEFT_MLO.LJPEG.png
0.3049: C_0238_1.LEFT_MLO.LJPEG.png
0.3283: A_0472_1.RIGHT_CC.LJPEG.png
0.3518: B_3107_1.LEFT_CC.LJPEG.png
0.3752: C_0169_1.RI

In [19]:
class LossHistory(cb.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        batch_loss = logs.get('loss')
        self.losses.append(batch_loss)

In [43]:
def train_top_model(train_feats, train_lab, test_feats, test_lab, model_path, model_save, epoch = 50, batch = 64):
    train_bottleneck = os.path.join(model_path, train_feats)
    train_labels = os.path.join(model_path, train_lab)
    test_bottleneck = os.path.join(model_path, test_feats)
    test_labels = os.path.join(model_path, test_lab)
    
    history = LossHistory()
    
    X_train = np.load(train_bottleneck)
    Y_train = np.load(train_labels)
    Y_train = np_utils.to_categorical(Y_train, nb_classes=3)
    
    X_test = np.load(test_bottleneck)
    Y_test = np.load(test_labels)
    Y_test = np_utils.to_categorical(Y_test, nb_classes=3)

    model = Sequential()
    model.add(Flatten(input_shape=X_train.shape[1:]))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    # try Adadelta and Adam
    model.compile(optimizer='adadelta',
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

    model.fit(X_train, Y_train,
              nb_epoch=epoch,
              batch_size=batch,
              callbacks=[history],
              validation_data=(X_test, Y_test),
              verbose=2)
    
    score = model.evaluate(X_test, Y_test, batch_size=16, verbose=0)

    print "Network's test score [loss, accuracy]: {0}".format(score)
    
    model.save_weights(model_save)

In [23]:
# Locations for the bottleneck and labels files that we need
modelPath = '../model/'
train_bottleneck = 'bottleneck_features_train.npy'
train_labels = 'labels_train.npy'
test_bottleneck = 'bottleneck_features_test.npy'
test_labels = 'labels_test.npy'
validation_bottleneck = 'bottleneck_features_valdation.npy'
validation_label = 'labels_validation.npy'
top_model_weights_path = 'top_weights01.h5'

In [44]:
train_top_model(train_feats=train_bottleneck, train_lab=train_labels, test_feats=test_bottleneck, test_lab=test_labels,
                model_path=modelPath, model_save=top_model_weights_path)

Train on 2132 samples, validate on 536 samples
Epoch 1/50
0s - loss: 4.5813 - acc: 0.4592 - val_loss: 2.1936 - val_acc: 0.5093
Epoch 2/50
0s - loss: 1.1091 - acc: 0.6102 - val_loss: 1.1547 - val_acc: 0.5392
Epoch 3/50
1s - loss: 0.6859 - acc: 0.7205 - val_loss: 1.1288 - val_acc: 0.5373
Epoch 4/50
1s - loss: 0.5272 - acc: 0.7880 - val_loss: 1.3420 - val_acc: 0.5504
Epoch 5/50
0s - loss: 0.4266 - acc: 0.8288 - val_loss: 1.2831 - val_acc: 0.5429
Epoch 6/50
0s - loss: 0.3469 - acc: 0.8565 - val_loss: 1.3129 - val_acc: 0.5504
Epoch 7/50
0s - loss: 0.2816 - acc: 0.8921 - val_loss: 1.3801 - val_acc: 0.5653
Epoch 8/50
0s - loss: 0.2236 - acc: 0.9264 - val_loss: 1.4412 - val_acc: 0.5653
Epoch 9/50
0s - loss: 0.1882 - acc: 0.9390 - val_loss: 1.4813 - val_acc: 0.5765
Epoch 10/50
0s - loss: 0.1466 - acc: 0.9536 - val_loss: 1.5087 - val_acc: 0.5485
Epoch 11/50
0s - loss: 0.1233 - acc: 0.9629 - val_loss: 1.6294 - val_acc: 0.5597
Epoch 12/50
0s - loss: 0.1039 - acc: 0.9676 - val_loss: 1.7408 - val_ac