In [1]:
## Data Preprocessing

import glob
from PIL import Image
import numpy as np

img_rows = 96
img_cols = 96
nb_classes = 22 #23

# Generate X and y for Convolutional Neural Network
X = []
y = []

paths = glob.glob('./cropped_cells/*.jpg')
np.random.shuffle(paths) #randomly shuffle data set

counter = 0

# Get all the images and convert them into greyscale
for path in paths:
    im = Image.open(path)
    grey_im = im.convert('L')
    # Convert them into numpy
    label = [path.split('_')[1].split('/')[1]] # extract the label from path
    # balance the dataset
    if label == ['interphase'] and counter < 800:
        #counter += 1
        pass
    else:
    y += [path.split('_')[1].split('/')[1]] # extract the label from path
    X += [np.array(grey_im)]
    
X = np.array(X)
print X.shape
y = np.array(y)

X_train = X # stupid, taking full dataset

# filtered interphase numbers
f = filter(lambda x: x == 'interphase', y)
len(f)

(2283, 96, 96)


977

In [2]:
# Normalize 

X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
#X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
X_train /= 255 #Normalize
#X_test /= 255
print(X_train.shape[0], 'train samples')
#print(X_test.shape[0], 'test samples')

(2283, 'train samples')


In [3]:
# Convert y into Y

from keras.utils import np_utils, generic_utils

# Convert labels to numeric
y_unique = np.unique(y)
dic = {}

for i, label in enumerate(y_unique):
    dic[label] = i
print dic

y_numeric = []
for el in y:
    y_numeric += [dic[el]]
    
y_numeric # now a 2000 label vector
Y = np_utils.to_categorical(y_numeric, nb_classes)

print Y.shape

Y_train = Y

{'halfcircle': 7, 'interphase': 10, 'metaphase': 15, 'earlyprophase': 4, 'debris': 3, 'nucleolirim': 19, 'kidney': 11, 'telophase': 22, 'blurry': 2, 'fragmented': 6, 'apoptotic': 1, 'latepro': 12, 'multinucleate': 18, 'prophase': 20, 'anaphase': 0, 'indented': 9, 'monopole': 17, 'holey': 8, 'latetelophase': 13, 'lines': 14, 'micronucleus': 16, 'round': 21, 'elongated': 5}
(2283, 23)


In [None]:
from sklearn.cross_validation import StratifiedKFold



In [4]:
## Convolutional Neural Network with 2 convolutions

from keras.models import Sequential
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD, Adadelta, Adagrad, Adam



batch_size = 128
nb_epoch = 12
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3
# the data images are greyscale
img_channels = 1

Using Theano backend.
Couldn't import dot_parser, loading of dot files will not be possible.

Using gpu device 0: GRID K520 (CNMeM is disabled)





In [None]:
model = Sequential()

model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu',border_mode='valid', input_shape=(img_channels, img_rows, img_cols)))
model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))
model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Convolution2D(nb_filters, nb_conv, nb_conv, activation='relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

In [None]:
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='sgd')

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,validation_split=0.3, show_accuracy=True)

In [5]:
## VGG Net inspired
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=(img_channels, img_rows, img_cols)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

# let's train the model using SGD + momentum (how original).
# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(loss='categorical_crossentropy', optimizer=sgd)

In [6]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(X_train)

In [None]:
model.fit_generator(datagen.flow(X_train, Y_train, batch_size=32),
                    samples_per_epoch=len(X_train) / 10, nb_epoch=1000, show_accuracy=True)

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=1000,validation_split=0.3, show_accuracy=True)

In [7]:
# let's train the model using SGD + momentum (how original).
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss='categorical_crossentropy', optimizer=adam)

In [9]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=1000,validation_split=0.5, show_accuracy=True)

Train on 1141 samples, validate on 1142 samples
Epoch 1/1000
Epoch 2/1000

KeyboardInterrupt: 