In [1]:
# V4 Updates
# fixed functions for metrics

In [2]:
def is_interactive():
    import __main__ as main
    return not hasattr(main, '__file__')

## LIBRARIES
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import stats

if is_interactive():
    import matplotlib
    import pylab as plt
else:
    import matplotlib
    matplotlib.use('agg')
    import pylab as plt
    
import keras.backend as K
from keras.models import Model, load_model
from keras.layers import Conv1D,MaxPooling1D,LSTM,BatchNormalization,Dropout,Input,Dense,Bidirectional,Activation,Flatten
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.backend import squeeze
from keras.callbacks import ModelCheckpoint, TensorBoard, CSVLogger
from keras.metrics import mean_squared_error

import random 
random.seed(123)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#to display train and validation metrics on same tensorboard plot
class TrainValTensorBoard(TensorBoard):
    def __init__(self, log_dir='./tensorboard_logs', **kwargs):
        # Make the original `TensorBoard` log to a subdirectory 'training'
        training_log_dir = os.path.join(log_dir, 'training')
        super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)

        # Log the validation metrics to a separate subdirectory
        self.val_log_dir = os.path.join(log_dir, 'validation')

    def set_model(self, model):
        # Setup writer for validation metrics
        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
        super(TrainValTensorBoard, self).set_model(model)

    def on_epoch_end(self, epoch, logs=None):
        # Pop the validation logs and handle them separately with
        # `self.val_writer`. Also rename the keys so that they can
        # be plotted on the same figure with the training metrics
        logs = logs or {}
        val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
        for name, value in val_logs.items():
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value.item()
            summary_value.tag = name
            self.val_writer.add_summary(summary, epoch)
        self.val_writer.flush()

        # Pass the remaining logs to `TensorBoard.on_epoch_end`
        logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
        super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)

    def on_train_end(self, logs=None):
        super(TrainValTensorBoard, self).on_train_end(logs)
        self.val_writer.close()

In [4]:
# define metrics and tests

# Keras backend implementations
def coef_det_k(y_true, y_pred): # order of variables defined in https://keras.io/backend/
    SS_res =  K.sum(K.square(y_true-y_pred))
    SS_tot = K.sum(K.square(y_true-K.mean(y_true)))
    return 1-SS_res/(SS_tot+K.epsilon())

def corr_coef_k(y_true, y_pred):
    xm, ym = y_true-K.mean(y_true), y_pred-K.mean(y_pred)
    r_num = K.sum(tf.multiply(xm,ym))
    r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
    return K.maximum(K.minimum(r_num/(r_den+K.epsilon()), 1.0), -1.0)

# numpy implementations
def mse_np(y_true, y_pred):
    return np.mean(np.square(y_true-y_pred))

def coef_det_np(y_true, y_pred):
    SS_res =  np.sum(np.square(y_true-y_pred))
    SS_tot = np.sum(np.square(y_true-np.mean(y_true)))
    return 1-SS_res/(SS_tot+1e-7)

def corr_coef_np(y_true, y_pred):
    return np.corrcoef(y_pred[:,0],y_true[:,0])[0,1]

# evaluations on test data 
def eval_on_test(X_test, Y_test, model, fname='', return_np=False):
    loss = model.evaluate(X_test, Y_test, X_test[0].shape[0])
    Y_pred = model.predict(X_test)
    
    x = Y_pred[:,0]
    y = Y_test[:,0]
    slope, intercept, rvalue, pvalue, stderr = stats.linregress(x,y)
    plt.plot(x, y, 'o', label='original data')
    plt.plot(x, intercept + slope*x, 'r', label='fitted line')
    plt.legend()
    plt.title('R2 = {}'.format(loss[1]))
    plt.xlabel('Y_pred')
    plt.ylabel('Y_true')
    if len(fname)>0:
        plt.savefig(fname+'.pdf', bbox_inches='tight')
    
    loss.append(rvalue)
    
    if return_np:
        loss.append(mse_np(Y_test, Y_pred))
        loss.append(coef_det_np(Y_test, Y_pred))
        loss.append(corr_coef_np(Y_test, Y_pred))
    
    return loss


In [5]:
import os
#setting default path if interactive mode (run this cell only ONCE if in interactive mode)
if is_interactive():
    os.chdir("../")
    

In [6]:
import configparser
import sys
from os.path import basename

#change only THESE
model_path = "./models/THIS_CNN_NN_vals_input.py"
data_path = "./data/THIS_data.npz"


weights_dir = "./weights"
results_dir = "./results"
model_name = os.path.splitext(basename(model_path))[0]
weight_path = os.path.join(weights_dir, model_name)
csv_logger_path = os.path.join(results_dir, model_name + "_val_results.csv")
test_results_path = os.path.join(results_dir, model_name + "_test_results.csv")

for filename in [weight_path, csv_logger_path, test_results_path]:
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
args = (sys.argv)
config_path = ""
if not is_interactive():
    model_path = args[1]
    config_path = args[2]
    weight_path = args[3]
    csv_logger_path = args[4]
    test_results_path = args[4] + "_testset"
    data_path = args[5]
    
suffix = "{epoch:03d}-{val_loss:.3f}.hdf5"
weight_model_path = "{}.{}".format(weight_path, suffix)


#hyperparameters    
DROPOUT = 0   # dropout
ALPHA = 0.01 # learnrate
BETA = 0.01
EPOCHS = 2 # epochs
MBATCH = 100 # batch size
SHUFFLE = True

#loading from config 
if not is_interactive():
    config_file = args[2]
    config = configparser.ConfigParser()
    config.read(config_file)
    ALPHA = config.getfloat('main', 'alpha')
    DROPOUT = config.getfloat('main', 'dropout')
    EPOCHS = config.getint('main', 'epochs')
    MBATCH = config.getint('main', 'mbatch')
    BETA = config.getfloat('main', 'beta')
    SHUFFLE = config.getboolean('main', 'SHUFFLE')

#loading model difinitions    
model_file = open(model_path, 'r').read()
exec(model_file)

In [7]:
#loading data
X_train, X_test, Y_train, Y_test = load_data(data_path)


In [8]:
input_shape = X_train[0].shape[1:3]
model = POC_model(input_shape, DROPOUT)
model.summary()


Loading model from disk..
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 650, 4)       0                                            
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 641, 128)     5248        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 641, 128)     512         conv1d_6[0][0]                   
__________________________________________________________________________________________________
dropout_7 (Dropout)             (None, 641, 128)     0           batch_normalization_7[0][0]      
___________________________________________________________________________________

In [9]:
opt = Adam(lr=ALPHA, beta_1=0.9, beta_2=0.999, decay=BETA)
model.compile(loss='mse', optimizer=opt, metrics=[coef_det_k, corr_coef_k])


In [10]:
# Set callbacks
# checkpoint
# https://machinelearningmastery.com/check-point-deep-learning-models-keras/
# https://keras.io/callbacks/ - for now save every epoch

checkpoint = ModelCheckpoint(weight_model_path, monitor='val_loss', verbose=0, 
                             save_best_only=False, save_weights_only=False, mode='auto', period=1)

# tensorboard
# http://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/

tensorboard = TrainValTensorBoard(write_graph=False, log_dir='./tensorboard_logs/' + \
                                  basename(data_path) + '_' + basename(csv_logger_path))

csv = CSVLogger(csv_logger_path, separator = ",", append = True)

callbacks_list = [checkpoint, tensorboard, csv]

# in terminal run: tensorboard --logdir=logs/
# val_loss error is in callbacks, probably modelcheckpoint

In [11]:
#checking if model exist then load best
import glob
import re

def find_best_model(all_models):
        epochs = []
        losses = []
        for i, file in enumerate(all_models):
            groups = re.findall(weight_path + '.(.*)-(.*).hdf5', file)
            if groups:
                epochs.append(int(groups[0][0]))
                losses.append(float(groups[0][1]))
        return (all_models[np.argmin(losses)] )

all_models = [os.path.join(os.path.dirname(weight_path), f) \
              for f in os.listdir(os.path.dirname(weight_path)) \
              if re.match(os.path.basename(weight_path) + '\.(.*)-(.*).hdf5', f)]


if all_models:
    best_model = find_best_model(all_models)
    print("Loading weights from {}".format(best_model))
    model.load_weights(best_model)


Loading weights from ./weights/THIS_CNN_NN_vals_input.002-0.456.hdf5


In [12]:
# keras model checkpoint KeyError: 'val_loss'
# fix: https://github.com/keras-team/keras/issues/6104
# must add validation_split=xx

model.fit(X_train, Y_train, batch_size=MBATCH, epochs=EPOCHS, validation_split=0.1, shuffle=SHUFFLE, callbacks=callbacks_list)

Train on 3605 samples, validate on 401 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1a18e961d0>

In [13]:
# tests data on test set 
test_loss = eval_on_test(X_test, Y_test, model, fname=test_results_path)




In [14]:
# Save test results
d = {'MSE' : [test_loss[0]],
     'coef_determination' : [test_loss[1]], 
     'corr_coef' : [test_loss[2]],
     'corr_coef_plot' : [test_loss[3]]}

test_df = pd.DataFrame(data=d)
test_df.to_csv(test_results_path, index=False)


In [15]:
fname = test_results_path

In [16]:
loss = model.evaluate(X_test, Y_test, X_test[0].shape[0])
Y_pred = model.predict(X_test)

x = Y_pred[:,0]
y = Y_test[:,0]
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x,y)
plt.plot(x, y, 'o', label='original data')
plt.plot(x, intercept + slope*x, 'r', label='fitted line')
plt.legend()
plt.title('R2 = {}'.format(loss[1]))
plt.xlabel('Y_pred')
plt.ylabel('Y_true')
plt.savefig('test'+'.pdf', bbox_inches='tight')

