In [1]:
import numpy as np
from keras.models import Sequential
from keras import applications
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Lambda
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle
import csv
import cv2
import scipy
import os

Using TensorFlow backend.


In [11]:
num_classes = 4
epochs = 20
# BASE_PATH = '/home/ec2-user/cell_classifier/'
BASE_DIR = '../'
batch_size = 32

In [4]:
def get_model():
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1., input_shape=(120, 160, 3), output_shape=(120, 160, 3)))
    model.add(Conv2D(32, (3, 3), input_shape=(120, 160, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.7))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'])

    return model

def top_model(input_shape):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [9]:
def get_data(folder):
    X = []
    y = []

    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):
#             if wbc_type in ['NEUTROPHIL', 'EOSINOPHIL']:
#                 label = 'MONONUCLEAR'
#             else:
#                 label = 'POLYNUCLEAR'
            for image_filename in os.listdir(folder + wbc_type):
                img_file = cv2.imread(folder + wbc_type + '/' + image_filename)
                img_file = scipy.misc.imresize(arr=img_file, size=(120, 160, 3))
                if img_file is not None:
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(wbc_type)
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [10]:
X_train, y_train = get_data(BASE_DIR + 'images/TRAIN/')
X_test, y_test = get_data(BASE_DIR + 'images/TEST/')
X_test_simple, y_test_simple = get_data(BASE_DIR + 'images/TEST_SIMPLE/')


X_train = X_train * 1./255.
X_test = X_test * 1./255.
X_test_simple = X_test_simple * 1./255.

In [12]:
encoder = LabelEncoder()
encoder.fit(y_test_simple)
y_train = np_utils.to_categorical(encoder.transform(y_train))
y_test = np_utils.to_categorical(encoder.transform(y_test))
y_test_simple = np_utils.to_categorical(encoder.transform(y_test_simple))

In [19]:
from keras.models import Model
from keras.layers import Input
from keras import optimizers


base_model = applications.VGG16(include_top=False, weights='imagenet')
input = Input(shape=(120, 160, 3),name = 'image_input')
vgg_output = base_model(input)

top_model = Flatten()(vgg_output)
top_model = Dense(64, activation='relu')(top_model)
predictions = Dense(num_classes, activation='softmax', name='prediction_layer')(top_model)

model = Model(input=input, output=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
layers = base_model.layers[:-2]
for layer in layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])



In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     (None, 120, 160, 3)       0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 7680)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                491584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
prediction_layer (Dense)     (None, 4)                 260       
Total params: 15,206,532.0
Trainable params: 2,851,652.0
Non-trainable params: 12,354,880.0
__________________________________________________

array([[[[ 0.79215686,  0.77254902,  0.77254902],
         [ 0.77647059,  0.77647059,  0.77647059],
         [ 0.75686275,  0.78431373,  0.78039216],
         ..., 
         [ 0.        ,  0.        ,  0.00392157],
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ]],

        [[ 0.78039216,  0.78039216,  0.77254902],
         [ 0.77254902,  0.78431373,  0.77647059],
         [ 0.76078431,  0.78823529,  0.78431373],
         ..., 
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ]],

        [[ 0.75686275,  0.79215686,  0.76862745],
         [ 0.76470588,  0.8       ,  0.78039216],
         [ 0.76862745,  0.8       ,  0.79215686],
         ..., 
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.00392157],
         [ 0.        ,  0.        ,  0.00392157]],

        ..., 
        [[ 0.        ,  0.        ,

In [20]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, shuffle=True, verbose=1)

Train on 7965 samples, validate on 1992 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
1344/7965 [====>.........................] - ETA: 54s - loss: 11.1412 - acc: 0.3088

KeyboardInterrupt: 

In [24]:
model.save_weights('vgg_top.h5')

# batch_size = 16


# fine-tune the model
model.fit(
        X_train,
        y_train,
        validation_data=(X_validation, y_validation),
        epochs=epochs
)

In [7]:
model.load_weights('vgg_top.h5')

In [8]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')
y_pred = np.rint(model.predict(X_test_simple))

print(accuracy_score(y_test_simple, y_pred))

Predicting on test data
0.943661971831


In [26]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     (None, 120, 160, 3)       0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_4 (Flatten)          (None, 7680)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                491584    
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
prediction_layer (Dense)     (None, 1)                 65        
Total params: 15,206,337.0
Trainable params: 2,851,457.0
Non-trainable params: 12,354,880.0
__________________________________________________

In [37]:
print(base_model.layers[-2].name)

block5_conv3
