In [5]:
from __future__ import print_function

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop, Adadelta, Adagrad, Adam, Nadam, SGD
from keras.callbacks import EarlyStopping, TerminateOnNaN
from keras import backend as K
from keras.losses import mean_squared_error
from keras.models import load_model, Model
import tensorflow as tf

# Normalize the data.
from sklearn import preprocessing
from keras.regularizers import l1_l2

import random

def pass_arg(nsim, tr_size, dropoutrate):
    print("Tr_size:", tr_size)
    def fix_seeds(seed):
        random.seed(seed)
        np.random.seed(seed)
        tf.random.set_seed(seed)
        session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
        sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
    #     K.set_session(sess)
        tf.compat.v1.keras.backend.set_session(sess)

    ss = 1
    fix_seeds(ss)

    # MC dropout
    class MCDropout(Dropout):
        def call(self, inputs, training=None):
            return super(MCDropout, self).call(inputs, training=True)

    # import pickle

    # def save_obj(obj, name):
    #     with open(name, 'wb') as f:
    #         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

    
    # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred)
    def root_mean_squared_error(y_true, y_pred):
            return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

    # Making sure final porosity is less than initial
    def poros(poroi, porof):
        return K.relu(tf.negative(porof)) + K.relu(porof-poroi)

    def phy_loss_mean(params):
        # useful for cross-checking training
        loss1, lam1 = params
        def loss(y_true,y_pred):
            return lam1*K.mean(K.relu(loss1))
        return loss

    #function to calculate the combined loss = sum of rmse and phy based loss
    def combined_loss(params):
        loss1, lam1 = params
        def loss(y_true,y_pred):
            return mean_squared_error(y_true, y_pred) + lam1 * K.mean(K.relu(loss1))
        return loss

    def PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp):

    #     fix_seeds(ss)

        # Hyper-parameters of the training process
    #     batch_size = tr_size
        batch_size = 10
        num_epochs = 300
        val_frac = 0.25
        patience_val = 80

        # Initializing results filename
        exp_name = "DNN_pre_loss_" + pre_train + optimizer_name + '_trsize' + str(tr_size) + '_lamda' + str(lamda) + '_iter' + str(iteration)
        exp_name = exp_name.replace('.','pt')
        results_dir = '../results/'
        model_name = results_dir + exp_name + '_model.h5' # storing the trained model

        if reg==True and samp==25:
            results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model
        elif reg==False and samp==25:
            results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model
        elif reg==True and samp==1519:
            results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model
        elif reg==False and samp==1519:
            results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model

        # Load labeled data
        data = np.loadtxt('../data/labeled_data.dat')
        x_labeled = data[:, :2] # -2 because we do not need porosity predictions
        y_labeled = data[:, -2:-1] # dimensionless bond length and porosity measurements
        if samp==25:
            data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat')
            x_unlabeled = data[:, :]
        elif samp==1519:
            data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat')
            x_unlabeled = data[:, :]

        x_unlabeled1 = x_unlabeled[:1303, :]
        x_unlabeled2 = x_unlabeled[-6:, :]
        x_unlabeled = np.vstack((x_unlabeled1,x_unlabeled2))

        # initial porosity
        init_poro = x_unlabeled[:, -1]
        x_unlabeled = x_unlabeled[:, :2]

        # normalize dataset with MinMaxScaler
        scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0))
        x_labeled = scaler.fit_transform(x_labeled)
        x_unlabeled = scaler.fit_transform(x_unlabeled)
        # y_labeled = scaler.fit_transform(y_labeled)

    #     # initial porosity & physics outputs are removed
    #     x_unlabeled = x_unlabeled[:, :-3]

        # train and test data
        trainX, trainY = x_labeled[:tr_size,:], y_labeled[:tr_size]
    #     testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:]   
        testX, testY = x_labeled[30:,:], y_labeled[30:]

        if use_YPhy == 0:
            # Removing the last column from x_unlabeled (corresponding to Y_PHY)
            x_unlabeled = x_unlabeled[:,:-1]

        dependencies = {
         'root_mean_squared_error': root_mean_squared_error
            }

        # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat)
        loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies)

        # Creating the model
        model = Sequential()
        for layer in np.arange(n_layers):
            if layer == 0:
                model.add(Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1],)))
            else:
                if reg:
                    model.add(Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001)))
                else:
                    model.add(Dense(n_nodes, activation='relu'))
            # model.add(Dropout(rate=drop_frac))
            model.add(MCDropout(rate=drop_frac))
        model.add(Dense(1, activation='linear'))

        # pass the weights to all layers but 1st input layer, whose dimensions are updated
        for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]):
            new_layer.set_weights(layer.get_weights())

        # physics-based regularization
        uinp_sc = K.constant(value=x_unlabeled) # unlabeled input data
        lam1 = K.constant(value=lamda[0]) # regularization hyper-parameter
    
        predictions = model(uinp_sc) # model output at depth i

        phyloss2 = poros(init_poro, predictions) # physics loss 1
        totloss = combined_loss([phyloss2, lam1])
        phyloss = phy_loss_mean([phyloss2, lam1])


        model.compile(loss=totloss,
                      optimizer=optimizer_val,
                      metrics=[phyloss, root_mean_squared_error])

        early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1)

    #     print('Running...' + optimizer_name)
        history = model.fit(trainX, trainY,
                            batch_size=batch_size,
                            epochs=num_epochs,
                            verbose=0,
                            validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()])
    
    
        test_score = model.evaluate(testX, testY, verbose=0)
        print(test_score)

        test_scores = []
        for i in range(int(nsim)):
#             print("simulation num:",i)
#             predictions = model.predict(testX)
#             samples.append(predictions)
            test_score = model.evaluate(testX, testY, verbose=0)
            test_scores.append(test_score[2])
        return np.array(test_scores)
    
    



    # Main Function
    if __name__ == '__main__':

        fix_seeds(1)

        # List of optimizers to choose from    
        optimizer_names = ['Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD']
        optimizer_vals = [Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True)]

        # selecting the optimizer
        optimizer_num = 1
        optimizer_name = optimizer_names[optimizer_num]
        optimizer_val = optimizer_vals[optimizer_num]

        # Selecting Other Hyper-parameters
        drop_frac = dropoutrate # Fraction of nodes to be dropped out
        use_YPhy = 1 # Whether YPhy is used as another feature in the NN model or not
        n_layers = 2 # Number of hidden layers
        n_nodes = 5 # Number of nodes per hidden layer

        # pre-trained model
        pre_train = 'Poro_Pre-trainAdadelta_drop0_nL2_nN5_trsize1308_iter0.h5'

        #set lamda
        lamda = [0.01] # Physics-based regularization constant  

#         # Iterating over different training fractions and splitting indices for train-test splits
#         trsize_range = [4,6,8,10,20]

#         #default training size = 5000
#         tr_size = trsize_range[4]
        
        tr_size = int(tr_size)

        # use regularizer
        reg = True

        # sample size used
        samp = 1519
    #     samp = 25

        # total number of runs
        iter_range = np.arange(1)
        testrmse=[]
        # iterating through all possible params
        for iteration in iter_range:
#             results, result_file, pred, obs, rmse, obs_train = PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, 
#                             iteration, n_layers, n_nodes, tr_size, lamda, reg, samp)
#             testrmse.append(rmse)
            pred = PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, 
                                               pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp)
            

    return np.squeeze(pred)

In [6]:
mean_rmses=[]
std_rmses=[]
# for ii in ([.005,.01,.02,.05,.1,.15,.2,.25,.3,0.5]):
for ii in ([5,10,15,20,30]):
    test_rmse = pass_arg(50, ii, 0.05)
    mean_rmse = np.mean(test_rmse)
    std_rmse = np.std(test_rmse)
    mean_rmses.append(mean_rmse)
    std_rmses.append(std_rmse)

Tr_size: 5
[0.00465495977550745, 0.0, 0.018452517688274384]
Tr_size: 10
[0.0038051134906709194, 0.0, 0.012717374600470066]
Tr_size: 15
[0.0004660136764869094, 0.0, 0.015454704873263836]
Tr_size: 20
[0.00044848566176369786, 0.0, 0.015597406774759293]
Tr_size: 30
[0.0002660432073753327, 0.0, 0.012856529094278812]


In [7]:
mean_rmses

[0.014654044937342405,
 0.010849869307130576,
 0.014632775839418173,
 0.01477941695600748,
 0.013223492782562971]

In [8]:
std_rmses

[0.002223141053603035,
 0.001305800732591835,
 0.0006557760316852748,
 0.0006305779694075574,
 0.0009157213424093879]

In [10]:
import pickle

def save_obj(obj, name):
    with open(name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
save_obj(mean_rmses, "../mean_rmse_dnn_updloss_MC.dat")
save_obj(std_rmses, "../std_rmse_dnn_updloss_MC.dat")


In [2]:
pred = pass_arg(50, 20)

Tr_size: 20
[0.015291403979063034, 0.0011981830466538668, 0.08771183341741562]


In [3]:
np.std(pred,axis=0)**2

array([3.37335584e-03, 1.94856222e-03, 4.34233528e-03, 1.79976993e-03,
       1.79456628e-03, 2.80441530e-03, 1.06975134e-03, 1.08144968e-03,
       2.36601476e-03, 2.80755199e-03, 9.52134375e-04, 7.28175917e-04,
       8.28316202e-04, 1.34716392e-03, 7.11925852e-04, 1.69787777e-03,
       1.15746888e-03, 1.34285321e-04, 1.15999870e-03, 3.23958276e-03,
       8.84820300e-04, 9.76184907e-04, 2.75417347e-03, 1.36949599e-03,
       3.28332465e-03, 1.53482391e-03, 1.64989592e-03, 7.30333093e-04,
       5.57333988e-04, 7.09685683e-03, 1.49872701e-03, 1.56181736e-03,
       2.13612244e-03, 8.86846101e-04, 9.92977992e-04, 2.32536369e-03,
       1.08897465e-03, 1.44896924e-03, 7.44325982e-04, 2.09604646e-03,
       2.44449568e-03, 1.82411820e-03, 3.52882384e-03, 2.68475548e-03,
       2.96447333e-03, 1.25102268e-03, 1.98329519e-03, 3.01357242e-03,
       1.52132928e-03, 1.97036774e-03, 3.22001870e-04, 1.20475620e-03,
       1.21259701e-03, 1.74869748e-03, 2.93545797e-03, 5.46205556e-03,
      

In [4]:
mc_pred=np.mean(pred,axis=0)
mc_pred

array([0.0431564 , 0.04969775, 0.03855895, 0.0473083 , 0.04825364,
       0.04153984, 0.03876032, 0.02742871, 0.05394102, 0.03834876,
       0.05311058, 0.04025944, 0.04402751, 0.0490651 , 0.04529456,
       0.05169499, 0.05430209, 0.03668012, 0.05224204, 0.03989543,
       0.05179229, 0.05054558, 0.04952174, 0.05267722, 0.03513261,
       0.05475615, 0.0515241 , 0.03853419, 0.02725641, 0.05868765,
       0.05132379, 0.06161826, 0.044791  , 0.05282198, 0.05968352,
       0.036227  , 0.04257078, 0.04440696, 0.050751  , 0.05052431,
       0.03119921, 0.05248038, 0.05077071, 0.05365586, 0.04209185,
       0.04676313, 0.04421702, 0.04579834, 0.04491324, 0.05062904,
       0.01135702, 0.05096268, 0.0548835 , 0.04697408, 0.04883337,
       0.02582511, 0.03095761, 0.03505554, 0.05255293, 0.04615692,
       0.04236671, 0.04940849, 0.05350411, 0.05224761, 0.04388189,
       0.04533507, 0.0102862 , 0.04446835, 0.04741205, 0.05951528,
       0.05657636, 0.02142219, 0.03366445, 0.04343371, 0.05729

In [5]:
import pickle

def save_obj(obj, name):
    with open(name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
save_obj(mc_pred, "../pred_upd_loss_MC_Xx.dat")