In [1]:
from __future__ import print_function

import sys
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

from model import get_model
from utils import crps, real_to_cdf, preprocess, rotation_augmentation, shift_augmentation
DATA_DIR = '/media/haidar/Storage/Data/SADSB/'




Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 0.75% of memory, CuDNN 3007)

Using Theano backend.
Couldn't import dot_parser, loading of dot files will not be possible.





In [2]:
"""
Training systole and diastole models.
"""
print('Loading and compiling models...')
model_systole = get_model()
model_diastole = get_model()

Loading and compiling models...


In [12]:
import h5py
import numpy as np

DATA_DIR = '/media/haidar/Storage/Data/SADSB/'
print('Loading training data...')
"""
Load training data from .npy files.
"""
trainFile = h5py.File(DATA_DIR+'trainData.h5', 'r')
X = trainFile['X']
Y = trainFile['Y']
print(X.shape)
print(Y.shape)
# split to training and test
split_ratio = 0.8
split = int(X.shape[0] * split_ratio)


Loading training data...
(159466, 30, 128, 128)
(2,)


In [13]:
with X.astype(np.float32):
    X[1,:,:,:] /= 255

seed = np.random.randint(1, 10e6)
np.random.seed(seed)
np.random.shuffle(X)
np.random.seed(seed)
np.random.shuffle(y)

print('Pre-processing images...')
X = preprocess(X)



Loading training data...
uint8


IOError: Can't prepare for writing data (No write intent on file)

In [6]:
nb_iter = 200
epochs_per_iter = 1
batch_size = 32
calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

datagen = ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    samplewise_center=True,
    samplewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X[1:100, ...])

# remember min val. losses (best iterations), used as sigmas for submission
min_val_loss_systole = sys.float_info.max
min_val_loss_diastole = sys.float_info.max

print('-'*50)
print('Training...')
print('-'*50)

for i in range(nb_iter):
    print('-'*50)
    print('Iteration {0}/{1}'.format(i + 1, nb_iter))
    print('-'*50)

#     print('Augmenting images - rotations')
#     X_train_aug = rotation_augmentation(X_train, 15)
#     print('Augmenting images - shifts')
#     X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

#     print('Fitting systole model...')
#     hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
#                                      batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

#     print('Fitting diastole model...')
#     hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
#                                        batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))
    print('Fitting systole model...')
    hist_systole = model_systole.fit_generator(datagen.flow(X, Y[:,0], batch_size=batch_size), nb_epoch=epochs_per_iter, samples_per_epoch=len(X))

    print('Fitting diastole model...')
    hist_diastole = model_diastole.fit_generator(datagen.flow(X, Y[:,1], batch_size=batch_size), nb_epoch=epochs_per_iter, samples_per_epoch=len(X))

    # sigmas for predicted data, actually loss function values (RMSE)
    loss_systole = hist_systole.history['loss'][-1]
    loss_diastole = hist_diastole.history['loss'][-1]
    val_loss_systole = hist_systole.history['val_loss'][-1]
    val_loss_diastole = hist_diastole.history['val_loss'][-1]

    if calc_crps > 0 and i % calc_crps == 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
        pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
        val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
        val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(test) = {0}'.format(crps_test))

    print('Saving weights...')
    # save weights so they can be loaded later
    model_systole.save_weights('weights_systole.hdf5', overwrite=True)
    model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

    # for best (lowest) val losses, save weights
    if val_loss_systole < min_val_loss_systole:
        min_val_loss_systole = val_loss_systole
        model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

    if val_loss_diastole < min_val_loss_diastole:
        min_val_loss_diastole = val_loss_diastole
        model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open('val_loss.txt', mode='w+') as f:
        f.write(str(min_val_loss_systole))
        f.write('\n')
        f.write(str(min_val_loss_diastole))

--------------------------------------------------
Training...
--------------------------------------------------
--------------------------------------------------
Iteration 1/200
--------------------------------------------------
Fitting systole model...


TypeError: Argument sequence too long