In [1]:
from __future__ import print_function

import sys
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from model import get_model
from utils import crps, real_to_cdf, preprocess, rotation_augmentation, shift_augmentation

DATA_DIR = '/media/haidar/Storage/Data/SADSB/'

"""
Training systole and diastole models.
"""
print('Loading and compiling models...')
model_systole = get_model()
model_diastole = get_model()
print('Done loading models...')

Using Theano backend.


Couldn't import dot_parser, loading of dot files will not be possible.
Loading and compiling models...
Done loading models...


Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 0.85% of memory, CuDNN 3007)


In [None]:
print('Loading training data...')
"""
Load training data from .npy files.
"""
trainFile = h5py.File(DATA_DIR+'trainData.h5', 'r')
XTrain = trainFile['XTrain']
YTrain = trainFile['YTrain']
XVal = trainFile['XVal']
YVal = trainFile['YVal']

print(XTrain.shape)
print(YTrain.shape)
print(XVal.shape)
print(YVal.shape)

Loading training data...
(4831, 30, 256, 256)
(4831, 2)
(516, 30, 256, 256)
(516, 2)


In [None]:
nb_iter = 200
epochs_per_iter = 1
batch_size = 2
calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

datagen = ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    samplewise_center=True,
    samplewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=False)

# remember min val. losses (best iterations), used as sigmas for submission
min_val_loss_systole = sys.float_info.max
min_val_loss_diastole = sys.float_info.max

print('-'*50)
print('Training...')
print('-'*50)

for i in range(nb_iter):
    print('-'*50)
    print('Iteration {0}/{1}'.format(i + 1, nb_iter))
    print('-'*50)

    print('Fitting systole model...')
    hist_systole = model_systole.fit_generator(datagen.flow(XTrain, YTrain[:,0], batch_size=batch_size), 
                                               nb_epoch=epochs_per_iter, samples_per_epoch=len(XTrain))
    
    hist_systole_val = model_systole.evaluate(XVal, YVal[:,0], batch_size=batch_size, 
                                              show_accuracy=False, verbose=1)
    
    print('Fitting diastole model...')
    hist_diastole = model_diastole.fit_generator(datagen.flow(XTrain, YTrain[:,1], batch_size=batch_size), 
                                                 nb_epoch=epochs_per_iter, samples_per_epoch=len(XTrain))
    
    hist_diastole_val = model_diastole.evaluate(XVal, YVal[:,1], batch_size=batch_size, 
                                                show_accuracy=False, verbose=1)
    
    # sigmas for predicted data, actually loss function values (RMSE)
    loss_systole = hist_systole.history['loss'][-1]
    loss_diastole = hist_diastole.history['loss'][-1]
    val_loss_systole = hist_systole_val #hist_systole.history['val_loss'][-1]
    val_loss_diastole = hist_diastole_val #hist_diastole.history['val_loss'][-1]

    if calc_crps > 0 and i % calc_crps == 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(XTrain, batch_size=batch_size, verbose=1)
        pred_diastole = model_diastole.predict(XTrain, batch_size=batch_size, verbose=1)
        val_pred_systole = model_systole.predict(XVal, batch_size=batch_size, verbose=1)
        val_pred_diastole = model_diastole.predict(XVal, batch_size=batch_size, verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((YTrain[:, 0], YTrain[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((YVal[:, 0], YVal[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(val) = {0}'.format(crps_test))

    print('Saving weights...')
    # save weights so they can be loaded later
    model_systole.save_weights('weights_systole.hdf5', overwrite=True)
    model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

    # for best (lowest) val losses, save weights
    if val_loss_systole < min_val_loss_systole:
        min_val_loss_systole = val_loss_systole
        model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

    if val_loss_diastole < min_val_loss_diastole:
        min_val_loss_diastole = val_loss_diastole
        model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open('val_loss.txt', mode='w+') as f:
        f.write(str(min_val_loss_systole))
        f.write('\n')
        f.write(str(min_val_loss_diastole))

--------------------------------------------------
Training...
--------------------------------------------------
--------------------------------------------------
Iteration 1/200
--------------------------------------------------
Fitting systole model...
Epoch 1/1
 128/4831 [..............................] - ETA: 1836s - loss: 199678.6074