In [None]:
import keras
import h5py
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
from keras.utils import to_categorical

import numpy as np

import os
from os.path import isfile, join
from os import listdir
from datetime import datetime

import logging
import gzip
import pickle
import gc
from random import shuffle

In [None]:
log_location = "../logs/"
PATH_TO_INTERMEDIATE = "../data/intermediate"
PATH_TO_MODELS = "../data/models"

In [None]:
logger = logging.getLogger()
logging.basicConfig(format="%(asctime)-15s %(message)s",
                    level=logging.DEBUG,
                    filename=os.path.join(log_location,'csv.' + datetime.now().strftime("%Y%m%d%H%M%S.%f") + '.log'))

In [None]:
def IsDevelopmentEnvironment():
    return True

DevelopmentEnvironment = IsDevelopmentEnvironment()

In [None]:
def log(msg):
    logging.debug(msg)
 
def print_log(msg):
    log(msg)
    print(msg)   

In [None]:
train_batches_x = sorted([join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle")  and x.startswith('train')])
validation_batches_x = sorted([join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle") and x.startswith('validation')])
test_batches_x= sorted([join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle") and x.startswith('test')])

print_log("Original number of train batches:{}".format(len(train_batches_x)))
print_log("Original number of validation batches:{}".format(len(validation_batches_x)))
print_log("Original number of test batches:{}".format(len(test_batches_x)))

In [None]:
train_x = None
train_y = None

validation_x = None
validation_y = None

test_x = None
test_y = None

current_train_batch = 0
current_validation_batch = 0
current_test_batch = 0

print_log("Finished Resetting the global arrays...")

In [None]:
current_train_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "train.batch.{}.x.pickle".format(current_train_batch))
current_train_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "train.batch.{}.y.pickle".format(current_train_batch))
with gzip.open(current_train_batch_file_name_x,'rb') as f:
    train_x = pickle.load(f)
with gzip.open(current_train_batch_file_name_y,'rb') as f:
    train_y = pickle.load(f) 
print_log("Finished Reading the sample train batches...")    

In [None]:
current_test_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "test.batch.{}.x.pickle".format(current_test_batch))
current_test_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "test.batch.{}.y.pickle".format(current_test_batch))
with gzip.open(current_test_batch_file_name_x,'rb') as f:
    test_x = pickle.load(f)
with gzip.open(current_test_batch_file_name_y,'rb') as f:
    test_y = pickle.load(f)   
print_log("Finished Reading the sample test batches...")        

In [None]:
batch_size = 128
num_classes = 2
epochs = 10
global_epochs = 100
development_machine_samples = 10000
img_rows, img_cols = train_x.shape[1], train_x.shape[2]

In [None]:
first = True

total_validation_x = None
total_validation_y = None

validation_x = None
validation_y = None

for current_validation in validation_batches_x:
    print_log("Merging Validation Batch:{}".format(current_validation))
    with gzip.open(current_validation,'rb') as f:
        validation_x = pickle.load(f)
    with gzip.open(current_validation.replace("x.pickle","y.pickle"),'rb') as f:
        validation_y = pickle.load(f) 
    if not first:
        total_validation_x = np.concatenate((total_validation_x, validation_x))
        total_validation_y = np.concatenate((total_validation_y, validation_y))
    else:
        total_validation_x = validation_x
        total_validation_y = validation_y
    first = False
    
for current_validation in test_batches_x:
    print_log("Merging Test Batch:{}".format(current_validation))
    with gzip.open(current_validation,'rb') as f:
        validation_x = pickle.load(f)
    with gzip.open(current_validation.replace("x.pickle","y.pickle"),'rb') as f:
        validation_y = pickle.load(f) 
    if not first:
        total_validation_x = np.concatenate((total_validation_x, validation_x))
        total_validation_y = np.concatenate((total_validation_y, validation_y))
    else:
        total_validation_x = validation_x
        total_validation_y = validation_y
    first = False    
    
validation_x = total_validation_x  
validation_y = total_validation_y

validation_x = validation_x.reshape(validation_x.shape[0], img_rows, img_cols,1)
validation_y = validation_y.reshape(validation_y.shape[0])
validation_y = keras.utils.to_categorical(validation_y, num_classes)  

print_log("Finished merging validation batches...")

In [None]:
print_log("Number of Nuclei sample slices:{}".format(len([x for x in validation_y if x[0] == 1])))
print_log("Number of Non=Nuclei sample slices:{}".format(len([x for x in validation_y if x[0] == 0])))

In [None]:
train_x = train_x.reshape(train_x.shape[0], img_rows, img_cols,1)
test_x = test_x.reshape(test_x.shape[0], img_rows, img_cols,1)

input_shape = (img_rows, img_cols, 1)

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])


In [None]:
current_batch = 0 

for current_batch in range(global_epochs):
    for current_train in train_batches_x:
        i = datetime.now()
        current_date_time = i.strftime('%Y%m%d.%H%M%S')
        print("Training Batch:{}".format(current_train))
        batch_report = { 'batch_id' : current_batch , 
                         'batch_file' : current_train} 

        with gzip.open(current_train,'rb') as f:
            train_x = pickle.load(f)
        with gzip.open(current_train.replace("x.pickle","y.pickle"),'rb') as f:
            train_y = pickle.load(f) 

        train_x = train_x.reshape(train_x.shape[0], img_rows, img_cols,1)
        train_y = train_y.reshape(train_y.shape[0])
        train_y = keras.utils.to_categorical(train_y, num_classes)
        
        if DevelopmentEnvironment:
            train_x = train_x[:development_machine_samples]
            train_y = train_y[:development_machine_samples]
            validation_x = validation_x[:development_machine_samples]
            validation_y = validation_y[:development_machine_samples]

        model.fit(train_x, train_y,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1, validation_data = (validation_x, validation_y))
    
        score = model.evaluate(validation_x, validation_y, verbose=1)
        print_log('Test loss:'.format(score[0]))
        print_log('Test accuracy:'.format(score[1]))
        batch_report['score'] = (score)

        model_file_name = join(PATH_TO_MODELS,'{}.model.meta.b{}.{}.h5'.format(current_date_time,current_batch,score[1]))
        batch_report['model_file_name'] = model_file_name
        with gzip.open(model_file_name,'wb') as f:
            pickle.dump(batch_report, f, protocol=pickle.HIGHEST_PROTOCOL)  
        model.save(model_file_name)
        print_log('Model saved to:{}'.format(model_file_name))
