In [None]:
import keras
import h5py
from keras.datasets import mnist
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
from keras.utils import to_categorical

from random import shuffle

import numpy as np

import os
from os.path import isfile, join
from os import listdir
from datetime import datetime

import logging
import math
import pickle
import gc
from random import shuffle

In [None]:
log_location = "../../logs/"
PATH_TO_INTERMEDIATE = "../../data/segmentation.pickle/0"
PATH_TO_MODELS = "../../data/segmentation.models/0"

In [None]:
logger = logging.getLogger()
logging.basicConfig(format="%(asctime)-15s %(message)s",
                    level=logging.DEBUG,
                    filename=os.path.join(log_location,'keras.' + datetime.now().strftime("%Y%m%d%H%M%S.%f") + '.log'))

In [None]:
def IsDevelopmentEnvironment():
    return False

DevelopmentEnvironment = IsDevelopmentEnvironment()

In [None]:
def log(msg):
    logging.debug(msg)
 
def print_log(msg):
    log(msg)
    print(msg)   

In [None]:
train_batches_x = sorted([join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle")  and x.startswith('train')])
validation_batches_x = sorted([join(PATH_TO_INTERMEDIATE, x) for x in listdir(PATH_TO_INTERMEDIATE) if isfile(join(PATH_TO_INTERMEDIATE, x)) and x.endswith("x.pickle") and x.startswith('validation')])

print_log("Original number of train batches:{}".format(len(train_batches_x)))
print_log("Original number of validation batches:{}".format(len(validation_batches_x)))

In [None]:
train_x = None
train_y = None

validation_x = None
validation_y = None

test_x = None
test_y = None

current_train_batch = 0
current_validation_batch = 0
current_test_batch = 0

print_log("Finished Resetting the global arrays...")

In [None]:
current_train_batch_file_name_x = join(PATH_TO_INTERMEDIATE, "train.batch.{}.x.pickle".format(current_train_batch))
current_train_batch_file_name_y = join(PATH_TO_INTERMEDIATE, "train.batch.{}.y.pickle".format(current_train_batch))
with open(current_train_batch_file_name_x,'rb') as f:
    train_x = pickle.load(f)
with open(current_train_batch_file_name_y,'rb') as f:
    train_y = pickle.load(f) 

batch_total_number_samples = train_x.shape[0]

print_log("Finished Reading the sample train batches...")    



In [None]:
batch_size = 64
num_classes = 2
epochs = 1
development_machine_samples = 10000
img_rows, img_cols = train_x.shape[1], train_x.shape[2]

In [None]:
id = 0
training_parameters = []
for current_num_models in range(1):
    for current_filters_number in [32,24]:
        for current_dropout in [0.50]:
            training_parameters.append(
                { 'id' : id, 
                  'interaction': current_num_models, 
                  'filters' : current_filters_number, 
                  'dropout' : current_dropout })
            id = id + 1

In [None]:
first = True

total_validation_x = None
total_validation_y = None

validation_x = None
validation_y = None

for current_validation in validation_batches_x[:1]:
    print_log("Merging Validation Batch:{}".format(current_validation))
    with open(current_validation,'rb') as f:
        validation_x = pickle.load(f)
    with open(current_validation.replace("x.pickle","y.pickle"),'rb') as f:
        validation_y = pickle.load(f) 
    if not first:
        total_validation_x = np.concatenate((total_validation_x, validation_x))
        total_validation_y = np.concatenate((total_validation_y, validation_y))
    else:
        total_validation_x = validation_x
        total_validation_y = validation_y
    first = False
    
validation_x = total_validation_x  
validation_y = total_validation_y

print_log("Finished merging validation batches...")

In [None]:
general_input_shape = (img_rows, img_cols, 1)

In [None]:
general_input_shape
batch_total_number_samples

In [None]:
def create_model(parameters):
    model = Sequential()
    model.add(Conv2D(parameters['filters'], kernel_size=(2, 2),
                     activation='relu',
                     input_shape=general_input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(parameters['dropout']))

    model.add(Conv2D(parameters['filters'], kernel_size=(2, 2),
                     activation='relu',
                     input_shape=general_input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(parameters['dropout']))

    model.add(Flatten())
    model.add(Dense(parameters['filters'], activation='relu'))
    model.add(Dropout(parameters['dropout']))

    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model


In [None]:
DevelopmentEnvironment = False
print_log("Global epochs:{}".format(len(training_parameters)))
first = True
for current_parameter in training_parameters:
    print(str(current_parameter))
    train_x = None
    train_y = None
    gc.collect()
    model = create_model(current_parameter)
    histories = []
    
    num_steps_per_batch = 100
    original_sample_size = batch_total_number_samples
    current_sample_size_for_fit = int(original_sample_size / (current_parameter['filters'] * 12))
    number_of_interactions = int(original_sample_size / current_sample_size_for_fit)

    for current_interaction in range(number_of_interactions):
        for current_train in train_batches_x:
            i = datetime.now()
            current_date_time = i.strftime('%Y%m%d.%H%M%S')
            
            print_log("Loading for Training Batch:{} on GlobalEpoch:{}".format(current_train, current_parameter['id']))
            with open(current_train,'rb') as f:
                train_x = pickle.load(f)
            with open(current_train.replace("x.pickle","y.pickle"),'rb') as f:
                train_y = pickle.load(f) 

            print_log("Sampling...")
            
            start_sample = current_sample_size_for_fit * current_interaction
            end_sample = current_sample_size_for_fit * (current_interaction+1)

            print_log("Iniciando o fit ({} in {}, now running on {} to {})...".format(original_sample_size, number_of_interactions, start_sample,end_sample))
            
            train_x = train_x[start_sample:end_sample]
            train_y = train_y[start_sample:end_sample]

            current_history = model.fit(train_x, train_y,
                      epochs = int(train_x.shape[0] / num_steps_per_batch),
                      verbose=1,
                      steps_per_epoch=num_steps_per_batch,
                      callbacks=[
                          EarlyStopping(
                          monitor='loss',
                          min_delta=0.001,                              
                          patience=2,                              
                          verbose=1,
                          mode='auto')])
            histories.append(current_history)
            current_interaction = current_interaction + 1
    
    score = model.evaluate(validation_x, validation_y, verbose=1)
    batch_report = { 'batch_id' :  current_parameter['id'], 'parameters' : current_parameter } 
    print_log('Test loss:{}'.format(score[0]))
    print_log('Test accuracy:{}'.format(score[1]))
    batch_report['score'] = score
    #batch_report['history'] = histories
    batch_report['input_shape'] = general_input_shape

    model_file_name = join(PATH_TO_MODELS,'{}.model.meta.b{}.{}.h5'.format(current_date_time, current_parameter['id'],score[1]))
    batch_report['model_file_name'] = model_file_name
    with open(model_file_name.replace(".h5",".pickle"),'wb') as f:
        pickle.dump(batch_report, f, protocol=pickle.HIGHEST_PROTOCOL)  
    model.save(model_file_name)
    print_log('Model saved to:{}'.format(model_file_name))
    print_log('Model metadata saved to:{}'.format(model_file_name.replace(".h5",".pickle")))
