In [1]:
from __future__ import print_function

import lasagne
import theano
import theano.tensor as T
import os
import time
import plac
import numpy as np
import pandas as pd
import pickle
import hickle
import sklearn.cross_validation
from sklearn.preprocessing import OneHotEncoder
import skimage.transform
import matplotlib.pyplot as plt

  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [2]:
CW_DIR = os.getcwd()
DATA_DIR = CW_DIR + '/data/'
OUTPUT_DIR = CW_DIR + '/output/'

RANDOM_SEED = 42

NUM_CLASSES = 10
IMAGE_W = 70
NUM_CHANNELS = 3

NUM_EPOCHS = 250
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
MOMENTUM = 0.9

np.random.seed(RANDOM_SEED)

In [3]:
def get_data_sampler(dataset, train):

    # load in hickle dataset containing cell label dataset
    d = hickle.load('{}/{}.hkl'.format(DATA_DIR, dataset))

    GROUPS = {
        'NORMAL': 0,
        'Echinocyte': 1,
        'Dacrocyte': 2,
        'Schistocyte': 3,
        'Elliptocyte': 4,
        'Acanthocyte': 5,
        'Target cell': 6,
        'Stomatocyte': 7,
        'Spherocyte': 8,
        'Overlap' : 9
    }

    labels = []
    images = []
    for y, x in zip(d['y'], d['X']):
        labels.append(GROUPS[y])
        images.append(x)
    labels = np.array(labels)
    images = np.array(images)

    sss = sklearn.cross_validation.StratifiedShuffleSplit(
        labels,
        n_iter=1,
        test_size=0.2,
        random_state=RANDOM_SEED,
    )
    if train:
        ix, _ = tuple(sss)[0]
    else:
        _, ix = tuple(sss)[0]

    labels = labels[ix]
    images = images[ix]

    return images, labels

In [4]:
train_images, train_labels = get_data_sampler("September_1_total_non_overlap", train=True)

In [5]:
train_images = train_images.transpose((0,3,1,2))
enc = OneHotEncoder()
enc.fit(train_labels[:,None])
train_labels2=enc.transform(train_labels[:,None]).toarray()
train_images.shape
train_labels2[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])

In [6]:
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras import backend as K

Using Theano backend.


In [10]:
model = Sequential()
# input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
# this applies 32 convolution filters of size 3x3 each.
K.set_image_dim_ordering('th')

model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(128, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(128, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(128, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
# # Note: Keras does automatic shape inference.
# model.add(Dense(256))
# model.add(Activation('relu'))
# model.add(Dropout(0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
earlyStopping=EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy', 'categorical_accuracy', 'mean_squared_error'])
model.fit(train_images, train_labels2, batch_size=32, nb_epoch=30, validation_split=0.1, validation_data=None)
# callbacks=[earlyStopping]
# model.fit(train_images, train_labels2, batch_size=32, nb_epoch=250)

Train on 2690 samples, validate on 299 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fc000d92e10>

In [18]:
y_hat = model.predict_classes(train_images)
pd.crosstab(y_hat, train_labels)



col_0,0,1,2,3,4,5,6,7,8,9
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,759,203,41,28,4,6,4,45,4,19
1,2,15,1,0,0,0,0,1,0,2
2,0,0,1,0,0,0,0,0,0,0
3,26,11,15,515,24,59,8,9,40,6
4,1,1,1,6,5,1,0,3,0,0
5,0,0,0,5,0,16,1,0,0,2
6,18,14,7,36,40,6,568,22,0,18
7,0,0,0,2,0,2,0,1,0,2
8,2,6,5,14,0,39,0,2,143,17
9,1,1,0,1,0,4,0,5,5,118
