In [None]:
# Updates
# clean up custom function definitions
# make model loadable from other file

In [1]:
## LIBRARIES
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Model, load_model, Sequential
from keras.layers import Conv1D,MaxPooling1D,LSTM,BatchNormalization,Dropout,Input,Dense,Bidirectional,Activation,Flatten
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.backend import squeeze
from keras.callbacks import ModelCheckpoint, TensorBoard, CSVLogger
from scipy import stats

Using TensorFlow backend.


In [2]:
# define custom metrics

def coeff_determination(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true-y_pred))
    SS_tot = K.sum(K.square(y_true-K.mean(y_true)))
    return (1-SS_res/(SS_tot+K.epsilon()))

def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r')
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'pearson_r'  in i.name.split('/')]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return pearson_r*abs(pearson_r)
    

In [3]:
# evaluations on test data 

def eval_on_test(X_test, Y_test, model, fname):
    loss = model.evaluate(X_test, Y_test)
    Y_pred = model.predict(X_test)
    slope, intercept, rvalue, pvalue, stderr = stats.linregress(Y_test[:,0], Y_pred[:,0])
    x = Y_test
    y = Y_pred
    plt.figure(0)
    plt.plot(x, y, 'o', label='original data')
    plt.plot(x, intercept + slope*x, 'r', label='fitted line')
    plt.legend()
    plt.title('R2 = {}'.format(rvalue*abs(rvalue)))
    plt.xlabel('Y_true')
    plt.ylabel('Y_pred')
    plt.savefig(fname+'.pdf', bbox_inches='tight')
    plt.close()
    return loss, rvalue*abs(rvalue)


In [4]:
#checks if running notebook or standalone python script
def is_interactive():
    import __main__ as main
    return not hasattr(main, '__file__')

import configparser
import sys
args = (sys.argv)

model_name = "THIS_model"
DROPOUT = 0   # dropout
ALPHA = 0.01 # learnrate
BETA = 0.01
EPOCHS = 2 # epochs
MBATCH = 100 # batch size

#loading from config 
if not is_interactive():
    config_file = args[1]
    config = configparser.ConfigParser()
    config.read(config_file)
    ALPHA = config.getfloat('main', 'alpha')
    DROPOUT = config.getfloat('main', 'dropout')
    EPOCHS = config.getint('main', 'epochs')
    MBATCH = config.getint('main', 'mbatch')
    BETA = config.getfloat('main', 'beta')


In [5]:
exec('from ' + model_name + '_input import *')

In [6]:
# Load data

data_path = "../data/THIS_data.npz"
if not is_interactive():
    data_path = "./data/THIS_data.npz"
    
Xh_train,Xh_test,Xv_train,Xv_test,Y_train,Y_test = load_data(data_path)

X_train = list()
X_train.append(Xh_train)
#X_train.append(Xv_train)

X_test = list()
X_test.append(Xh_test)
#X_test.append(Xv_test)



In [7]:
# should go to model definition
if is_interactive():
    def POC_model(input_shape_hot,DR):

        X_input1 = Input(shape = input_shape_hot)

        # L 1: CONV 
        X1 = Conv1D(filters=128, kernel_size=30, strides=1, activation='relu')(X_input1) # 620/1 + 1 = 621
        X1 = BatchNormalization()(X1)
        X1 = Dropout(DR)(X1)

        X1 = Flatten()(X1)

        X = Dense(64, activation='relu')(X1)
        X = BatchNormalization()(X)
        X = Dropout(DR)(X) 

        X = Dense(1)(X)

        model = Model(inputs = [X_input1], outputs = X)

        return model



In [8]:
input_shape = Xh_train.shape[1:3]
model = POC_model(input_shape,DROPOUT)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 650, 4)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 621, 128)          15488     
_________________________________________________________________
batch_normalization_1 (Batch (None, 621, 128)          512       
_________________________________________________________________
dropout_1 (Dropout)          (None, 621, 128)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 79488)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                5087296   
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256       
__________

In [9]:
from keras import backend as K
def coeff_determination(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true-y_pred))
    SS_tot = K.sum(K.square(y_true-K.mean(y_true)))
    return (1-SS_res/(SS_tot+K.epsilon()))

import tensorflow as tf

def correlation_coefficient(y_true, y_pred):
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r')
    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'pearson_r'  in i.name.split('/')]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        pearson_r = tf.identity(pearson_r)
        return pearson_r*abs(pearson_r)



In [10]:
opt = Adam(lr=ALPHA, beta_1=0.9, beta_2=0.999, decay=BETA)
model.compile(loss='mse', optimizer=opt, metrics=[coeff_determination, correlation_coefficient, 'mse'])

In [11]:
# Set callbacks
# checkpoint
# https://machinelearningmastery.com/check-point-deep-learning-models-keras/
# https://keras.io/callbacks/ - for now save every epoch
suffix = "{epoch:03d}-{val_loss:.3f}.hdf5"
filepath = "{}.{}".format(model_name, suffix)
if not is_interactive():
    model_name = args[2]

    filepath = "{}.{}".format(model_name, suffix)

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, 
                             save_best_only=False, save_weights_only=False, mode='auto', period=1)

# tensorboard
# http://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/

tensorboard = TensorBoard(log_dir='./tensorboard_logs/' + args[3])

csv_logger_filename = model_name + "val_results.csv"
if not is_interactive():
    csv_logger_filename = args[3]
csv = CSVLogger(csv_logger_filename, separator = ",", append = True)

callbacks_list = [checkpoint, tensorboard, csv]

# in terminal run: tensorboard --logdir=logs/
# val_loss error is in callbacks, probably modelcheckpoint

In [12]:
#checking if model exist then load best
import glob
import re

def find_best_model(all_models):
        epochs = []
        losses = []
        for i, file in enumerate(all_models):
            groups = re.findall(model_name+'.(.*)-(.*).hdf5', file)
            if groups:
                epochs.append(int(groups[0][0]))
                losses.append(float(groups[0][1]))
        return (all_models[np.argmin(losses)] )

all_models = glob.glob(model_name+'*.hdf5')

if all_models:
    best_model = find_best_model(all_models)
    print("Loading weights from {}".format(best_model))
    model.load_weights(best_model)


model.fit(X_train, Y_train, batch_size=MBATCH, epochs=EPOCHS, validation_split=0.1, shuffle=False, callbacks=callbacks_list)
# keras model checkpoint KeyError: 'val_loss'
# fix: https://github.com/keras-team/keras/issues/6104
# must add validation_split=xx

Loading weights from THIS_model.001-0.699.hdf5
Train on 3605 samples, validate on 401 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1c1eac9eb8>

In [46]:
# tests data on test set
# need input fname for output saving
test_loss, test_r2 = eval_on_test(X_test, Y_test, model, model_name)




In [48]:
# Save test results

d = {'coeff_determination' : [test_loss[1]], 
     'correlation_coefficient' : [test_loss[2]],
     'loss' : [test_loss[3]],
     'mean_squared_error' : [test_loss[0]],
     'R2_scipy_stats' : [test_r2]}

test_df = pd.DataFrame(data=d)

test_results_filename = model_name + "_test_results.csv"
if not is_interactive():
    test_results_filename = args[3] + "_testset"
    
test_df.to_csv(test_results_filename)