In [None]:
import keras
import h5py
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

from os.path import isfile, join
from os import listdir

import gzip
import pickle
import gc
from random import shuffle

In [None]:
PATH_TO_INTERMEDIATE = "../data/intermediate"
PATH_TO_MODELS = "../data/models"

In [None]:
train_batches_x = [join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle")  and x.startswith('train')]
validation_batches_x = [join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle") and x.startswith('validation')]
test_batches_x= [join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle") and x.startswith('test')]

print("Original number of train batches:", len(train_batches_x))
print("Original number of validation batches:", len(validation_batches_x))
print("Original number of test batches:", len(test_batches_x))

In [None]:
train_x = None
train_y = None

validation_x = None
validation_y = None

test_x = None
test_y = None

current_train_batch = 0
current_validation_batch = 0
current_test_batch = 0

In [None]:
current_train_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "train.batch.{}.x.pickle".format(current_train_batch))
current_train_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "train.batch.{}.y.pickle".format(current_train_batch))
with gzip.open(current_train_batch_file_name_x,'rb') as f:
    train_x = pickle.load(f)
with gzip.open(current_train_batch_file_name_y,'rb') as f:
    train_y = pickle.load(f)    

In [None]:
current_validation_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "validation.batch.{}.x.pickle".format(current_validation_batch))
current_validation_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "validation.batch.{}.y.pickle".format(current_validation_batch))
with gzip.open(current_validation_batch_file_name_x,'rb') as f:
    validation_x = pickle.load(f)
with gzip.open(current_validation_batch_file_name_y,'rb') as f:
    validation_y = pickle.load(f)    

In [None]:
current_test_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "test.batch.{}.x.pickle".format(current_test_batch))
current_test_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "test.batch.{}.y.pickle".format(current_test_batch))
with gzip.open(current_test_batch_file_name_x,'rb') as f:
    test_x = pickle.load(f)
with gzip.open(current_test_batch_file_name_y,'rb') as f:
    test_y = pickle.load(f)   

In [None]:
batch_size = 1024
num_classes = 1
epochs = 50
img_rows, img_cols = 16, 16

In [None]:
train_x = train_x.reshape(train_x.shape[0], img_rows, img_cols, 1)
test_x = test_x.reshape(test_x.shape[0], img_rows, img_cols, 1)
validation_x = validation_x.reshape(validation_x.shape[0], img_rows, img_cols, 1)

input_shape = (img_rows, img_cols, 1)

In [None]:
train_y = train_y.reshape(train_y.shape[0],1)
test_y = test_y.reshape(test_y.shape[0],1)
validation_y = validation_y.reshape(validation_y.shape[0],1)

train_y = train_y.astype('int')
test_y = test_y.astype('int')
validation_y = validation_y.astype('int')

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
current_batch = 0 
for current_train in train_batches_x:
    batch_report = { 'batch_id' : current_batch , 
                     'batch_file' : current_train} 
    
    with gzip.open(current_train,'rb') as f:
        train_x = pickle.load(f)
    with gzip.open(current_train.replace("x.pickle","y.pickle"),'rb') as f:
        train_y = pickle.load(f) 
        
    train_x = train_x.reshape(train_x.shape[0], img_rows, img_cols, 1)
    train_y = train_y.reshape(train_y.shape[0],1)
    train_y = train_y.astype('int')
        
    for current_validation in validation_batches_x:
        with gzip.open(current_validation,'rb') as f:
            validation_x = pickle.load(f)
        with gzip.open(current_validation.replace("x.pickle","y.pickle"),'rb') as f:
            validation_y = pickle.load(f) 
            
        validation_x = validation_x.reshape(validation_x.shape[0], img_rows, img_cols, 1)
        validation_y = validation_y.reshape(validation_y.shape[0],1)
        validation_y = validation_y.astype('int')
            
        model.fit(train_x, train_y,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1,
                  validation_data=(validation_x, validation_y))

    batch_report['score'] = []
    for current_test in test_batches_x:
        with gzip.open(current_test,'rb') as f:
            test_x = pickle.load(f)
        with gzip.open(current_test.replace("x.pickle","y.pickle"),'rb') as f:
            test_y = pickle.load(f) 

        test_x = test_x.reshape(test_x.shape[0], img_rows, img_cols, 1)
        test_y = test_y.reshape(test_y.shape[0],1)
        test_y = test_y.astype('int')
            
        score = model.evaluate(validation_x, validation_y, verbose=1)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
        batch_report['score'].append(score)

    with gzip.open(join(PATH_TO_MODELS,'model.meta.{}.h5'.format(current_batch)),'wb') as f:
        pickle.dump(batch_report, f, protocol=pickle.HIGHEST_PROTOCOL)  
    model.save(join(PATH_TO_MODELS,'model.{}.h5'.format(current_batch)))
    current_batch += 1
        