In [2]:
from __future__ import print_function

import lasagne
import theano
import theano.tensor as T
import os
import time
import plac
import numpy as np
import pandas as pd
import pickle
import hickle
import sklearn.cross_validation
from sklearn.preprocessing import OneHotEncoder
import skimage.transform
import matplotlib.pyplot as plt

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5103)
  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [30]:
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.models import load_model
from keras import backend as K
from keras.utils.visualize_util import plot

In [13]:
CW_DIR = os.getcwd()
DATA_DIR = CW_DIR + '/data/'
OUTPUT_DIR = CW_DIR + '/output/'

RANDOM_SEED = 42

NUM_CLASSES = 10
IMAGE_W = 70
NUM_CHANNELS = 3

NUM_EPOCHS = 250
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
MOMENTUM = 0.9

np.random.seed(RANDOM_SEED)

In [14]:
def get_data_sampler(dataset, train):

    # load in hickle dataset containing cell label dataset
    d = hickle.load('{}/{}.hkl'.format(DATA_DIR, dataset))

    GROUPS = {
        'NORMAL': 0,
        'Echinocyte': 1,
        'Dacrocyte': 2,
        'Schistocyte': 3,
        'Elliptocyte': 4,
        'Acanthocyte': 5,
        'Target cell': 6,
        'Stomatocyte': 7,
        'Spherocyte': 8,
        'Overlap' : 9
    }

    labels = []
    images = []
    for y, x in zip(d['y'], d['X']):
        labels.append(GROUPS[y])
        images.append(x)
    labels = np.array(labels)
    images = np.array(images)

    sss = sklearn.cross_validation.StratifiedShuffleSplit(
        labels,
        n_iter=1,
        test_size=0.2,
        random_state=RANDOM_SEED,
    )
    if train:
        ix, _ = tuple(sss)[0]
    else:
        _, ix = tuple(sss)[0]

    labels = labels[ix]
    images = images[ix]
    
    return images, labels

In [15]:
train_images, train_labels = get_data_sampler("September_1_total_non_overlap", train=True)

In [16]:
train_images = train_images.transpose((0,3,1,2))
enc = OneHotEncoder()
enc.fit(train_labels[:,None])
train_labels2=enc.transform(train_labels[:,None]).toarray()
train_images.shape
train_labels2[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])

In [17]:
model = Sequential()
# input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
# this applies 32 convolution filters of size 3x3 each.
K.set_image_dim_ordering('th')

model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(128, 3, 3, border_mode='valid', input_shape=(3, 70, 70)))
model.add(Activation('relu'))
model.add(Convolution2D(128, 3, 3))
model.add(Activation('relu'))
model.add(Convolution2D(128, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
# # Note: Keras does automatic shape inference.
# model.add(Dense(256))
# model.add(Activation('relu'))
# model.add(Dropout(0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
earlyStopping=EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy', 'categorical_accuracy', 'mean_squared_error'])
model.fit(train_images, train_labels2, batch_size=32, nb_epoch=50, validation_split=0.1, validation_data=None)
# callbacks=[earlyStopping]
# model.fit(train_images, train_labels2, batch_size=32, nb_epoch=250)

INFO (theano.gof.compilelock): Refreshing lock /home/Devansh/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.12-64/lock_dir/lock


Train on 2690 samples, validate on 299 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f373e954910>

In [18]:
model.save('mod_adam.h5')

In [19]:
y_hat = model.predict_classes(train_images)
pd.crosstab(y_hat, train_labels)



col_0,0,1,2,3,4,5,6,7,8,9
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,805,1,14,0,0,0,1,1,2,0
1,0,248,0,0,0,0,0,0,0,0
2,1,0,54,1,1,0,0,1,0,0
3,0,0,1,602,1,2,0,0,0,0
4,0,0,0,0,70,0,0,0,0,0
5,0,2,0,1,0,130,0,0,0,0
6,1,0,0,2,0,0,580,0,0,0
7,0,0,0,0,0,0,0,86,2,0
8,2,0,2,1,0,0,0,0,188,0
9,0,0,0,0,1,1,0,0,0,184


In [20]:
test_images, test_labels = get_data_sampler("September_1_total_non_overlap", train=False)
model = load_model('mod_adam.h5')

In [21]:
test_images = test_images.transpose((0,3,1,2))
enc = OneHotEncoder()
enc.fit(test_labels[:,None])
test_labels2=enc.transform(test_labels[:,None]).toarray()
test_images.shape
test_labels2[0]

array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [22]:
y_hat_t = model.predict_classes(test_images)
pd.crosstab(y_hat_t, test_labels)



col_0,0,1,2,3,4,5,6,7,8,9
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,195,1,7,0,0,0,3,2,0,1
1,0,61,0,1,0,2,0,0,0,0
2,4,0,5,5,1,0,0,0,1,1
3,0,0,4,138,7,3,0,0,0,0
4,0,0,1,0,9,0,0,0,0,0
5,0,1,0,2,0,28,0,0,0,0
6,3,0,0,0,0,0,141,0,0,0
7,0,0,0,0,1,0,0,20,1,1
8,1,0,1,4,0,0,0,0,46,0
9,0,0,0,2,0,0,1,0,0,43


In [23]:
model.evaluate(test_images, test_labels2, batch_size=32)



[0.35676533302521324,
 0.91711229978398201,
 0.91711229978398201,
 0.013077612099720833]

In [24]:
model.metrics_names

['loss', 'acc', 'categorical_accuracy', 'mean_squared_error']

In [31]:
plot(model, to_file='model_adam_pic.png')