In [1]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial.distance as spdist

# Normalize the data.
from sklearn import preprocessing
from numpy.linalg import cholesky, det, lstsq
from scipy.optimize import minimize
from numpy.linalg import inv

def pass_arg(Xx, nsim, tr_size):
    
    # Compute the RMSE
    def root_mean_squared_error(y_true, y_pred):
        return np.sqrt(np.mean((y_pred-y_true)**2))

    print("tr_Size:",tr_size)
    # Making sure final porosity is less than initial
    def poros(poroi, porof):
        porofn = -porof*(porof<0)
        porofp = porof*(porof>=poroi) - poroi*(porof>=poroi)
        return porofp+porofn


    # Load labeled data
    data = np.loadtxt('../data/labeled_data.dat')
    x_labeled = data[:, :2].astype(np.float64) # -2 because we do not need porosity predictions
    y_labeled = data[:, -2:-1].astype(np.float64) # dimensionless bond length and porosity measurements

    # normalize dataset with MinMaxScaler
    scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0))
    x_labeled = scaler.fit_transform(x_labeled)
    # y_labeled = scaler.fit_transform(y_labeled)

    tr_size = int(tr_size)

    # train and test data
    trainX, trainY = x_labeled[:tr_size,:], y_labeled[:tr_size]
    testX, testY = x_labeled[30:,:], y_labeled[30:]


    # trainY = np.transpose(trainY)
    # testY = np.transpose(testY)

    data_phyloss = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat')
    x_unlabeled = data_phyloss[:, :]

    # initial porosity
    initporo = x_unlabeled[:, -1]

    x_unlabeled1 = x_unlabeled[:1303, :2]
    x_unlabeled2 = x_unlabeled[-6:, :2]
    x_unlabeled = np.vstack((x_unlabeled1,x_unlabeled2))

    x_unlabeled = scaler.fit_transform(x_unlabeled)
    init_poro1 = initporo[:1303]
    init_poro2 = initporo[-6:]
    init_poro = np.hstack((init_poro1,init_poro2))
    
    
    def covSEard(hyp=None, x=None, z=None):
        ''' Squared Exponential covariance function with Automatic Relevance Detemination
         (ARD) distance measure. The covariance function is parameterized as:

         k(x^p,x^q) = sf2 * exp(-(x^p - x^q)' * inv(P) * (x^p - x^q)/2)

         where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
         D is the dimension of the input space and sf2 is the signal variance.

         The hyperparameters are:

         hyp = [ log(ell_1)
                 log(ell_2)
                 ...
                 log(ell_D)
                 log(sqrt(sf2)) ]
        '''

        [n, D] = x.shape
        ell = 1/np.array(hyp[0:D])        # characteristic length scale
        
        
        sf2 = np.array(hyp[D])**2         # signal variance
        tmp = np.dot(np.diag(ell),x.T).T
        A = spdist.cdist(np.dot(np.diag(ell),x.T).T, np.dot(np.diag(ell),z.T).T, 'sqeuclidean') # cross covariances
        A = sf2*np.exp(-0.5*A)  

        return A


    def posterior_predictive(X_s, X_train, Y_train, l1=.1, l2=.1, sigma_f=.1, sigma_y=1e-5):
        '''  
        Computes the suffifient statistics of the GP posterior predictive distribution 
        from m training data X_train and Y_train and n new inputs X_s.

        Args:
            X_s: New input locations (n x d).
            X_train: Training locations (m x d).
            Y_train: Training targets (m x 1).
            l: Kernel length parameter.
            sigma_f: Kernel vertical variation parameter.
            sigma_y: Noise parameter.

        Returns:
            Posterior mean vector (n x d) and covariance matrix (n x n).
        '''


        K = covSEard(hyp=[l1,l2,sigma_f], x=X_train, z=X_train) + sigma_y**2 * np.eye(len(X_train))
        K_s = covSEard(hyp=[l1,l2,sigma_f], x=X_train, z=X_s)
        K_ss = covSEard(hyp=[l1,l2,sigma_f], x=X_s, z=X_s)  + 1e-8 * np.eye(len(X_s))
#         K_inv = inv(K)
        K_inv = np.linalg.pinv(K)
    
        # Equation (4)
        mu_s = K_s.T.dot(K_inv).dot(Y_train)

        # Equation (5)
        cov_s = K_ss - K_s.T.dot(K_inv).dot(K_s)
        
        return mu_s, cov_s


    def nll_fn(X_train, Y_train, x_unlabeled, init_poro, naive=False):
        '''
        Returns a function that computes the negative log marginal
        likelihood for training data X_train and Y_train and given 
        noise level.

        Args:
            X_train: training locations (m x d).
            Y_train: training targets (m x 1).
            noise: known noise level of Y_train.
            naive: if True use a naive implementation of Eq. (7), if 
                   False use a numerically more stable implementation. 

        Returns:
            Minimization objective.
        '''

        def nll_stable(theta):
            # Numerically more stable implementation of Eq. (7) as described
            # in http://www.gaussianprocess.org/gpml/chapters/RW2.pdf, Section
            # 2.2, Algorithm 2.1.
            K = covSEard(hyp=[theta[0],theta[1],theta[2]], x=X_train, z=X_train) + \
                theta[3]**2 * np.eye(len(X_train))
            
            
            K += 1e-6 * np.eye(*K.shape)
            L = cholesky(K)
        

            mu_un, _ = posterior_predictive(x_unlabeled, X_train, Y_train, l1=theta[0], l2=theta[1], sigma_f=theta[2], sigma_y=theta[3])
            phyloss_poro = np.mean(poros(init_poro, mu_un))

            log_loss = np.sum(np.log(np.diagonal(L))) + \
                   0.5 * Y_train.T.dot(lstsq(L.T, lstsq(L, Y_train)[0])[0]) + \
                   0.5 * len(X_train) * np.log(2*np.pi)
        
            # print(500000*phyloss_poro,log_loss, theta)
            return 500000*phyloss_poro + log_loss

        if naive:
            return nll_naive
        else:
            return nll_stable

    
    # Optimization
    res = minimize(nll_fn(trainX, trainY, x_unlabeled, init_poro), x0 = [.1, .1, .1, 1e-3], 
                   bounds=((1e-5, None), (1e-5, None), (1e-5, None),(1e-7, None)),
                    method='L-BFGS-B')
    
#     print(f'After parameter optimization: l1={res.x[0]:.5f} l2={res.x[1]:.5f} sigma_f={res.x[2]:.5f}')
#     print(np.exp(res.x[0]),np.exp(res.x[1]), np.exp(res.x[2]))
    mu_s, cov_s = posterior_predictive(testX, trainX, trainY, *res.x)
    
    RMSE = []
    for ii in range(int(nsim)):
        samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 1)
        RMSE.append(root_mean_squared_error(testY, samples))
        
        print("RMSE:", root_mean_squared_error(testY, samples))


    RMSE = []
    for ii in range(int(nsim)):
        samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 1)
        RMSE.append(root_mean_squared_error(testY, samples))
        
        print("RMSE:", root_mean_squared_error(testY, samples))

#     return samples, RMSE
    return RMSE

In [2]:
mean_rmses=[]
std_rmses=[]
for ii in ([5,10,15,20,30]):
    test_rmse = pass_arg(1,50, ii)
    mean_rmse = np.mean(test_rmse)
    std_rmse = np.std(test_rmse)
    mean_rmses.append(mean_rmse)
    std_rmses.append(std_rmse)

tr_Size: 5




RMSE: 0.016412071690600622
RMSE: 0.015635332931227326
RMSE: 0.015435295685587058
RMSE: 0.020962162944846117
RMSE: 0.01919575237496423
RMSE: 0.021164275705192366
RMSE: 0.019146090992351053
RMSE: 0.015792679392809574
RMSE: 0.020684835696496453
RMSE: 0.015362988390504711
RMSE: 0.01611694926107186
RMSE: 0.02393729482671284
RMSE: 0.023816844795969207
RMSE: 0.02211793863655151
RMSE: 0.025892827856918803
RMSE: 0.015070830659711933
RMSE: 0.015291823183537535
RMSE: 0.01853706811929761
RMSE: 0.014462837421753983
RMSE: 0.015379637863317449
RMSE: 0.016387134939032308
RMSE: 0.018240720418088732
RMSE: 0.020258044928782845
RMSE: 0.019654957884162317
RMSE: 0.014540601935301234
RMSE: 0.02000458862756367
RMSE: 0.019163975873393465
RMSE: 0.020005295437418816
RMSE: 0.01717103482270023
RMSE: 0.01702137406019109
RMSE: 0.02388169292551761
RMSE: 0.023657032757602794
RMSE: 0.017896558938459324
RMSE: 0.023841020300320315
RMSE: 0.02305901975880227
RMSE: 0.014463664018344091
RMSE: 0.024222525244129444
RMSE: 0.014



RMSE: 0.022136781514408833
RMSE: 0.025826825582971485
RMSE: 0.041902775284299386
RMSE: 0.02510905888201999
RMSE: 0.02190792662589942
RMSE: 0.022858916677862363
RMSE: 0.025324857514797395
RMSE: 0.028180496872388484
RMSE: 0.023220280390706834
RMSE: 0.02056616309811056
RMSE: 0.028397146408566522
RMSE: 0.025178373919666215
RMSE: 0.02953853931661119
RMSE: 0.026513540833087886
RMSE: 0.024864051562616063
RMSE: 0.02514809509800361
RMSE: 0.03306768153064278
RMSE: 0.02620476318599672
RMSE: 0.028928776934042527
RMSE: 0.03863615775674751
RMSE: 0.029651623820534333
RMSE: 0.037149654165196506
RMSE: 0.02051504960119097
RMSE: 0.023476065859725115
RMSE: 0.02490523818405401
RMSE: 0.02581242782085304
RMSE: 0.032903884706565106
RMSE: 0.027899227411386535
RMSE: 0.025997268378496027
RMSE: 0.032677191837594655
RMSE: 0.027299000757914912
RMSE: 0.036434733240291464
RMSE: 0.020812972898982682
RMSE: 0.023448077883229473
RMSE: 0.02271406294470637
RMSE: 0.02216620439826946
RMSE: 0.02148758347490934
RMSE: 0.0310321



RMSE: 0.02535434592471112
RMSE: 0.0260586820990922
RMSE: 0.019639648478873138
RMSE: 0.01942118388471454
RMSE: 0.02242275080784647
RMSE: 0.019521798024463403
RMSE: 0.023641434170339544
RMSE: 0.02050653259058473
RMSE: 0.02520831614429359
RMSE: 0.026686643200485003
RMSE: 0.02500306682130456
RMSE: 0.022968560913452098
RMSE: 0.027908922902179226
RMSE: 0.027460418622189252
RMSE: 0.029234031466095392
RMSE: 0.029162005610610962
RMSE: 0.032492335711742225
RMSE: 0.02652106938180153
RMSE: 0.02451273072335821
RMSE: 0.020965827333195547
RMSE: 0.02074708658979768
RMSE: 0.030063870129090235
RMSE: 0.02083807383493902
RMSE: 0.025342995119208192
RMSE: 0.025268133278438732
RMSE: 0.023991868228753797
RMSE: 0.02989219345298879
RMSE: 0.02340363400668046
RMSE: 0.029287040946715547
RMSE: 0.018018419037360744
RMSE: 0.024944357037120387
RMSE: 0.027065891085742064
RMSE: 0.02718108160237113
RMSE: 0.028856265831120737
RMSE: 0.03448776144074567
RMSE: 0.031386623158655196
RMSE: 0.026253077754595736
RMSE: 0.025180111



RMSE: 0.019311742410967902
RMSE: 0.021921628849234952
RMSE: 0.02783926933222161
RMSE: 0.021611622778368766
RMSE: 0.02304333078415535
RMSE: 0.01870202757054808
RMSE: 0.020998536440851413
RMSE: 0.020392046317860842
RMSE: 0.025742739799538297
RMSE: 0.020208380839829832
RMSE: 0.01965865082444535
RMSE: 0.02222731786547079
RMSE: 0.020190280873231277
RMSE: 0.018654693573851987
RMSE: 0.02094584269670194
RMSE: 0.021880608441937205
RMSE: 0.024942238597272685
RMSE: 0.018480600536875575
RMSE: 0.026072602323194537
RMSE: 0.019284689677401375
RMSE: 0.021592205096091912
RMSE: 0.024527725181328622
RMSE: 0.022532203417867432
RMSE: 0.023216221884951895
RMSE: 0.01806776085165412
RMSE: 0.02475151579176292
RMSE: 0.020081642481429195
RMSE: 0.021877055063058058
RMSE: 0.026483067891251478
RMSE: 0.023983360888491928
RMSE: 0.025426082319884757
RMSE: 0.02407291238921117
RMSE: 0.01917710030126644
RMSE: 0.018092522333443765
RMSE: 0.022112324807397624
RMSE: 0.01695972000351397
RMSE: 0.02088861370322523
RMSE: 0.02056



RMSE: 0.018538710387197902
RMSE: 0.017840475580751372
RMSE: 0.018126444116799933
RMSE: 0.019136335686455133
RMSE: 0.019830326202197508
RMSE: 0.019248231046417593
RMSE: 0.02026410177030685
RMSE: 0.01913838891654585
RMSE: 0.01968416281998671
RMSE: 0.02094719412058828
RMSE: 0.01808900942331259
RMSE: 0.01923549245371473
RMSE: 0.01802368206155001
RMSE: 0.02082540299058159
RMSE: 0.01908392235357878
RMSE: 0.018234002585016146
RMSE: 0.019900908365564535
RMSE: 0.021998723963374208
RMSE: 0.021514604416244944
RMSE: 0.021087444955822984
RMSE: 0.02176079307241127
RMSE: 0.019508700095652764
RMSE: 0.020396717952979315
RMSE: 0.01947425055025196
RMSE: 0.01919837068152196
RMSE: 0.019326318152424443
RMSE: 0.01971137869715115
RMSE: 0.017902778337879694
RMSE: 0.018817375035114978
RMSE: 0.01913757757529994
RMSE: 0.019493025359160664
RMSE: 0.0191685860903915
RMSE: 0.019425582552543265
RMSE: 0.019970155030736167
RMSE: 0.01941186900035874
RMSE: 0.018573661214905907
RMSE: 0.019440665271430034
RMSE: 0.0174465177

In [3]:
mean_rmses

[0.019680287151681092,
 0.026330479677938015,
 0.024241694357950084,
 0.021800031949894057,
 0.019818269248780895]

In [4]:
std_rmses

[0.004639653160830472,
 0.00406660048479454,
 0.0041254337913148725,
 0.0026375730760001674,
 0.0011028689857341442]

In [22]:
Xx = np.random.uniform(size=(3, 2))
ss, rmse = pass_arg(Xx, 100, 30)

tr_Size: 30




RMSE: 0.01887178119891527
RMSE: 0.022422125472662455
RMSE: 0.01709485051420006
RMSE: 0.01894643997926025
RMSE: 0.024926161122790655
RMSE: 0.02088521855032978
RMSE: 0.02123508813025256
RMSE: 0.017469613535328905
RMSE: 0.02082993830179254
RMSE: 0.019368945066724246
RMSE: 0.021151389805499684
RMSE: 0.016658314991575485
RMSE: 0.021130084199330715
RMSE: 0.01908215000508387
RMSE: 0.01939684132909302
RMSE: 0.020203666011849638
RMSE: 0.01970623097466492
RMSE: 0.0190532637895618
RMSE: 0.018216483685318384
RMSE: 0.01974181719310583
RMSE: 0.02034516659833612
RMSE: 0.018584352201264587
RMSE: 0.02271928636080908
RMSE: 0.017779981884677682
RMSE: 0.024036591140256997
RMSE: 0.019111905320128877
RMSE: 0.02280856459857227
RMSE: 0.017998203325819247
RMSE: 0.018034947546228416
RMSE: 0.019466857744585567
RMSE: 0.021510992271884966
RMSE: 0.01890676622351696
RMSE: 0.020457298453895458
RMSE: 0.01941486616834205
RMSE: 0.020304294711812124
RMSE: 0.019659313038087448
RMSE: 0.017278456134758792
RMSE: 0.0187872673

In [23]:
np.mean(rmse)

0.02002908291162017