In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
import autograd.numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import math
import scipy.stats as st

In [3]:
def obse_pred_plot(X_mea, X_pred):
    fig, ax = plt.subplots()
    _x = np.linspace( np.min(X_mea), np.max(X_mea), 100 )
    ax.plot(_x, _x, 'r--', label='$x=y$')
    ax.plot(X_mea, X_pred, 'o')
    ax.set_title('Predictions vs Measurements', fontsize=15)
    ax.set_xlabel('Measured', fontsize=15)
    ax.set_ylabel('Predicted', fontsize=15)
    ax.tick_params( labelsize=15 )
    fig.tight_layout()
    return ax

In [4]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from pymanopt.manifolds import Stiefel
from pymanopt.optimizers import ConjugateGradient, SteepestDescent, TrustRegions
from pymanopt import Problem
import pymanopt
from sklearn.model_selection import train_test_split
from scipy.linalg import cholesky, solve_triangular, cho_solve

class grf:
    def __init__(self, X_train, X_test, Y_train, Y_test, m_ridge, n_restart=20, tol=1e-2):
        """
        X          -- input data
        Y          -- output data
        m_ridge    -- ridge function input dimension
        n_restart  -- number of times to restart fitting and pick the model with the lowest objective function value
        tol        -- error tolerance of cost to stop iteration
        test_size  -- size to split data to train and test sets [0, 1]
        """
        
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = Y_train
        self.y_test = Y_test
        self.n_restart = n_restart
        self.tol = tol
        dim = X_train.shape[1] # original dimension
        self.manifold = Stiefel(dim, m_ridge)
        # initialize projection matrix M
        self.M = np.random.rand(dim, m_ridge)
        # initialize covariance kernel
        self.kernel = 1.0 * RBF(length_scale=[1 for _ in range(m_ridge)]) + WhiteKernel(noise_level=1.0)

    def create_cost(self):
        @pymanopt.function.autograd(self.manifold)
        def cost(M):
            U_train = self.X_train @ M
            U_test = self.X_test @ M

            N_train, m = self.X_train.shape

            lengthscales = self.kernel.get_params()['k1__k2__length_scale'] # rbf lengthscale
            sigma2_f = self.kernel.get_params()['k1__k1__constant_value'] # rbf variance
            sigma2_n = self.kernel.get_params()['k2__noise_level'] # noise variance
            L_inv = np.diag(1. / lengthscales.reshape(-1))
            dim = U_train.shape[1] # dimension of ridge function space

            U_train_tilde = U_train @ L_inv 
            # covariance on training data
            G = sigma2_f * np.exp(-0.5*(np.sum(U_train_tilde**2,1).reshape(-1,1) + np.sum(U_train_tilde**2,1) - 
                                       2 * np.dot(U_train_tilde, U_train_tilde.T)))

            G = G + sigma2_n * np.eye(N_train)
            b = np.linalg.solve(G, self.y_train)

            N_test = self.X_test.shape[0]
            U_test_tilde = U_test @ L_inv
            # covariance of testing and training data K(U_test, U_train)
            K_test = sigma2_f * np.exp(-0.5*(np.sum(U_test_tilde**2,1).reshape(-1,1) + np.sum(U_train_tilde**2,1) - 
                                       2 * np.dot(U_test_tilde, U_train_tilde.T)))
            g_test = K_test @ b
            r = 0.5 * np.linalg.norm(self.y_test - g_test) ** 2 / N_test
            return r





#         @pymanopt.function.numpy(self.manifold)
#         def cost(M):
#             U_train = self.X_train @ M
#             U_test = self.X_test @ M
#             N_test = self.y_test.shape[0]

#             G = self.kernel(U_train)
#             b = np.linalg.solve(G, self.y_train)

#             K_test = self.kernel(U_test, U_train)
#             g_test = K_test @ b

#             r = 0.5 * np.linalg.norm(self.y_test - g_test)**2 / N_test
#             return r
        


#         @pymanopt.function.numpy(self.manifold)
#         def dcost(M):
#             ell = np.reshape(self.kernel.get_params()['k1__k2__length_scale'], (-1))
#             U_train = self.X_train @ M
#             U_test = self.X_test @ M
#             N_test = self.y_test.shape[0]

#             G = self.kernel(U_train)
#             b = np.linalg.solve(G, self.y_train)
#             K_test = self.kernel(U_test, U_train)
#             g_test = K_test @ b
#             inv_P = np.diag(1.0/ell**2)
#             dr = np.zeros(M.shape)
#             for i in range(N_test):
#                 U_tilde = U_test[i,:] - U_train
#                 dgdu = -inv_P @ U_tilde.T @ (K_test[i,:] * b)
#                 dy = -np.outer(dgdu, self.X_test[i,:]).T
#                 assert(dy.shape == M.shape)
# #                 dr += (self.y_test[i] - g_test[i]) * (dy - M @ dy.T @ M)
#                 dr += (self.y_test[i] - g_test[i]) * dy 
#             return dr / N_test
      
        return cost
    

    def pred(self, X_test_pred, return_var=False):
        """ 
        X_test_pred: test points to evaluate ridge function outputs
        M: projection matrix
        kernel: kernel with fitted hyper-parameters
        return_var: whether to return variance at test points
        
        Return:
        g_test: predictions of posterior mean using ridge function
        var_test: posterior variance at test points
        """
        U_train = self.X_train @ self.M
        U_test = X_test_pred @ self.M

        G = self.kernel(U_train)
        L_ = cholesky(G, lower=True, check_finite=False) # lower triangular
        b = cho_solve((L_, True), self.y_train, check_finite=False)

        K_test = self.kernel(U_test, U_train) # covariance of testing and training data
        g_test = K_test @ b # predicted posterior mean
        
        if not return_var:
            return g_test
        else:  
            v = solve_triangular(L_, K_test.T, lower=True, check_finite=False)
            var_test = self.kernel.diag(U_test).copy() - np.einsum("ij,ji->i", v.T, v)
            return g_test.squeeze(), var_test.squeeze()
            
            
            
    def set_XY(self, X_new, Y_new):
        """
        Update GPR model dataset
        """
        self.X_train = np.vstack((self.X_train, X_new))
        self.y_train = np.hstack((self.y_train, Y_new.squeeze()))
        
    @staticmethod
    def BIC(gpr):
        """
        Return BIC using log-likelihood
        """
        return gpr.log_marginal_likelihood_value_ - 0.5 * (gpr.n_features_in_ + 2) * math.log(gpr.y_train_.shape[0])

    def grf_fit(self):
        last_r =1e10
        err = np.inf
        d, m = self.M.shape
        n_iter = 0
        
        # re-initialize projection matrix M
        V = np.random.randn(d, m)
        q = np.linalg.qr(V)[0]
        self.M = q.copy()
        
        while err > self.tol:
            self.M_pred = self.M.copy()
            n_iter += 1
            U_train = self.X_train @ self.M_pred
            
            
#             lengthscales = self.kernel.get_params()['k1__k2__length_scale'] # rbf lengthscale
#             sigma2_f = self.kernel.get_params()['k1__k1__constant_value'] # rbf variance
#             sigma2_n = self.kernel.get_params()['k2__noise_level'] # noise variance
            # prior covariance
            ker = 1.0 * RBF(length_scale=[1.0 for _ in range(m)], length_scale_bounds=(1e-5, 1e5)) \
              + WhiteKernel(noise_level=1e-4, noise_level_bounds=(1e-5,1e2)) # noise_level: iid noise variance
            
            gpr = GaussianProcessRegressor(kernel=ker, n_restarts_optimizer=30, alpha=1e-10, normalize_y=False) 
            # n_restars_optimizer: number of optimizations for hyper-parameters
            # alpha: adding to diagonal of covariance matrix to prevent numerical issue during fitting
            gpr.fit(U_train, self.y_train)
            g_test_pred, std_test = gpr.predict(self.X_test@self.M_pred, return_std=True)
            # expectation of variance
            E_var = np.mean(std_test ** 2)

            self.kernel = gpr.kernel_ # posterior kernel

            my_cost = self.create_cost()
#             my_cost, my_dcost = self.create_cost()
            problem = Problem(manifold=self.manifold, cost=my_cost)
#             optimizer = ConjugateGradient(verbosity=0)
#             optimizer = SteepestDescent(verbosity=0)
            optimizer = TrustRegions(verbosity=0)
            M_new = optimizer.run(problem).point
        
            r = my_cost(self.M)
            err = np.abs(last_r - r) / last_r
            last_r = r
            self.M = M_new.copy()
        bic = self.BIC(gpr)
        return self.M_pred, gpr, r, bic, n_iter, E_var
    
    def __call__(self):
#         M_all = []
#         gpr_all = []
#         r_all = []
#         E_var_all = []
        E_var_min = np.inf
        r_min = np.inf
        for i in range(self.n_restart):
            M, gpr, r, bic, n_iter, E_var = self.grf_fit();
            print(i, r, E_var)
            if r < r_min:
                M_opt = M
                gpr_opt = gpr
                r_min = r
                E_var_min = E_var
#             M_all.append(M)
#             gpr_all.append(gpr)
#             r_all.append(r)
#             E_var_all.append(E_var)
        self.M = M_opt
        self.kernel = gpr_opt.kernel_
        return M_opt, gpr_opt, r_min, E_var_min

## Testing linear ridge function
Paper section 4.1

In [None]:
# get training and testing data
d = 10 # dimension of input
m = 2 # ridge subspace dimension
N = 100
X = np.random.rand(N, d) * 2 - 1 # x in [-1,1]
# training and testing data
Ureal = np.random.randn(d, m)
q = np.linalg.qr(Ureal)[0]
Ureal = q.copy() # orthogonal
U_data = X @ Ureal
Y = np.sum(U_data, axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state=20)
grf_test = grf(X_train, X_test, y_train, y_test, 2, n_restart=10, tol=1e-1)
results_grf = grf_test()
M_final, gpr_final, r_final, E_var_final = results_grf
obse_pred_plot(y_test, grf_test.pred(X_test))

In [None]:
y_test.shape

In [None]:
M_final, gpr_final, r_final, bic_final, n_final = results_grf
obse_pred_plot(y_test, grf_test.pred(X_test, M_final, gpr_final.kernel_))

In [None]:
bic = results_grf[4]
r = results_grf[2]
print(f'cost={r}')
print(f'BIC = {bic}')

In [None]:
# verify sklearn predict method to calculate posterior mean 
M_final = results_grf[0]
gpr_final = results_grf[1]
obse_pred_plot(grf_test.pred(X_test), gpr_final.predict(X_test @ M_final))

In [None]:
# plot ridge function
ax = plt.figure().add_subplot(projection='3d')
U_test_final = X_test @ M_opt # using optimized M
ax.scatter(U_test_final[:,0], U_test_final[:,1], y_test, c=y_test)
# ax.invert_yaxis()
ax.set_xlabel('$m_1$')
ax.set_ylabel('$m_2$')
ax.set_zlabel('$f$')
plt.show()

### Turbomachinery case study

In [None]:
# load data
X_train = np.loadtxt('bladeA_cs_training_inputs.dat')
X_test = np.loadtxt('bladeA_cs_validation_inputs.dat')
y_train = np.loadtxt('bladeA_cs_training_outputs.dat')
y_test = np.loadtxt('bladeA_cs_validation_outputs.dat')
# grf_test = grf(X_train, y_train, 2, n_restart=10, tol=1e-3)
# results_grf = grf_test()
# M_final, gpr_final, r_final, bic_final, n_final = results_grf
# obse_pred_plot(grf_test.y_test, grf_test.pred(grf_test.X_test))

In [None]:
# plot ridge function
ax = plt.figure().add_subplot(projection='3d')
U_test_final = grf_test.X_test @ grf_test.M 
ax.scatter(U_test_final[:,0], U_test_final[:,1], grf_test.y_test, c=grf_test.y_test)
# ax.invert_yaxis()
ax.set_xlabel('$m_1$')
ax.set_ylabel('$m_2$')
ax.set_zlabel('$f$')
plt.show()

### Bayesian Optimization for chiller model calibration

In [None]:
from dymola.dymola_interface import DymolaInterface
dymola = None
dymola = DymolaInterface(showwindow=True)
dymola.openModel(path="C:\Jiacheng Ma\Modelica libraries\DynamicVCC\DynamicVCC\package.mo",changeDirectory=False)

In [None]:
if dymola is not None:
    dymola.close()
    dymola = None

In [None]:
def L_HX(theta_in):
    problem = "DynamicVCC.Examples.Chiller.Cycle"
    startTime = 2000
    stopTime = 3500
    outputInterval = 10
#     numberOfIntervals = 500
    method = "Dassl"
    tolerance = 0.0001
    initialNames = ['u[{}]'.format(i) for i in range(1,len(theta_in)+1)]
    initialValues = theta_in
    dymola.experimentSetupOutput(events=False)
    result, finalVar = dymola.simulateExtendedModel(problem=problem,
                                          startTime=startTime, 
                                          stopTime=stopTime,
                                          outputInterval=outputInterval,
                                          method=method,
                                          tolerance=tolerance,
                                          initialNames=initialNames,
                                          initialValues=initialValues)
    if not result:
        print(theta_in)
        print("Simulation failed. Below is the translation log.")
        log = dymola.getLastErrorLog()
        print(log)
        exit(1)
        return None, None
    else:
        Nrows = dymola.readTrajectorySize("dsres.mat")
        outputNames = ['y[{}]'.format(i) for i in range(1,8)] + ['y_mea[{}]'.format(i) for i in range(1,8)]
        outputVar = dymola.readTrajectory("dsres.mat", outputNames, Nrows)
        pred = np.array(outputVar[:7])
        Mea = np.array(outputVar[7:])
        ner = np.linalg.norm(pred[:,10:] - Mea[:,10:], axis=1) / np.linalg.norm(Mea[:,10:], axis=1) # omit some initialization points
        W = np.eye(ner.shape[0])
        cost = np.dot(ner.T,W.dot(ner))
        return cost, outputVar

# Objective function to minimize
def J_calib(u, lb, ub):
    """
    u    -- Scaled HTC
    lb   -- HTC lower bound
    ub   -- HTC upper bound
    """
    u_truescale = np.round(lb + u * (ub - lb),1)
    cost, outputVar = L_HX(list(u_truescale))
    if not cost:
        exit(1)
    else:
        return -np.log(cost)

In [None]:
# Test dymola model
theta_test = [5e4, 8e4, 5e4, 2e4, 131146, 8e4, 5e4, 2e4, 123886.44, 100]
# theta_test = [789625.0, 888625.0, 344125.0, 712787.5, 83605.6, 616375.0, 566875.0, 888625.0, 702787.5, 147888.0, 126.2]
cost, outputVar = L_HX(theta_test)

In [None]:
fig, ax = plt.subplots()
ax.plot(outputVar[1])

Lower and upper bounds for calibration parameters

In [None]:
# lb = np.array([1e4, 1e4, 1e4, 1e3, 65573, 1e4, 1e4, 1e4, 1e3, 61943, 50]) # Lower bounds of input space
# ub = np.array([1e6, 1e6, 1e6, 1e6, 196719, 1e6, 1e6, 1e6, 1e6, 247770, 150]) # Upper bounds of input space
# chiller grey-box
lb = np.array([1233.3, 1233.3, 1233.3, 1365.4, 65573, 8867.3, 8867.3, 6118.9, 61943, 50])
ub = np.array([123330, 123330, 123330, 136540, 196719, 886730, 886730, 611890, 247770, 150])

Use Latin Hypercube designs to generate random samples

In [None]:
# Generate some starting data
np.random.seed(123456) # repeatable
n_init = 300 # Number of data points

from pyDOE import lhs

# Generate scaled samples of the input space
# X_normalize = np.random.rand(n_init, len(lb))
X_normalize = lhs(len(lb), n_init, 'c')

# Get corresponding results at function space
Y = np.zeros(n_init)
for i in range(n_init):
    Y[i] = J_calib(X_normalize[i,:], lb, ub)
    print(i+1, Y[i])
X_normalize = X_normalize[~np.isnan(Y),:]
Y = Y[~np.isnan(Y)]

# Plot funciton values
fig, ax = plt.subplots()
ax.plot(Y,'kx',markersize=10, markeredgewidth=2)
ax.set_xlabel('$n$')
ax.set_ylabel('$J(u)$')


In [None]:
# save data
np.savetxt('YX_ChillerCycle.txt', np.hstack((Y[:,None], X_normalize)), delimiter=',')

Gaussian ridge function for calibration parameter space

In [5]:
YX_data = np.loadtxt('YX_ChillerCycle.txt', delimiter=',')
Ydata = YX_data[:,0]
Xdata = YX_data[:,1:]

# split data
# X_normalize, X_test_normalize, Y, Y_test = train_test_split(Xdata, Ydata, test_size=0.3, random_state=10)
# X_normalize = Xdata
# Y = Ydata

# Xdata10 = np.delete(Xdata,5,axis=1)

# normalize training data
from sklearn import preprocessing
scaler_y = preprocessing.StandardScaler().fit(Ydata.reshape(-1,1))
Y_scaled = scaler_y.transform(Ydata.reshape(-1,1))

X_scaled = Xdata
# scaler_X = preprocessing.StandardScaler().fit(X_normalize)
# X_scaled = scaler_X.transform(X_normalize)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y_scaled, test_size=0.5, random_state=10)

In [7]:
grf_HX = grf(X_train, X_test, y_train[:,0], y_test[:,0], 3, n_restart=20, tol=5e-2)
results_HX = grf_HX()
M_final, gpr_final, r_final, E_var_final = results_HX

0 0.14942944720721785 0.5545754809915254
1 0.079290091294924 0.3806922690144886
2 0.15820083101223834 0.5754040297619986
3 0.09022397654902108 0.4625586393917616
4 0.14167678333489694 0.5271341625491619
5 0.12861551234545188 0.5082286311041155
6 0.1597895929655612 0.568730217365808
7 0.45087955786360034 0.9023212318198148
8 0.2030521423502812 0.4639087369155697
9 0.13206502858339894 0.49984209779277383
10 0.14386159119756295 0.33340963733094264
11 0.12323428381287808 0.38563840714191705
12 0.16365537673638522 0.5510888105940076
13 0.1572269594432318 0.5776087907205357
14 0.1267067595337228 0.5010076644148534
15 0.15228769794456462 0.5174638696858549
16 0.15828105887853763 0.5582650635313449
17 0.15784045373354005 0.5688666857590431
18 0.1665393653933118 0.5588774766381163
19 0.1562246237041931 0.5682727080116606


In [8]:
print(f'kernel[{M_final.shape[1]}]={grf_HX.kernel}')
print(f'M{M_final.shape[1]}={grf_HX.M}')
print(f'r={r_final}')
print(f'E_var={E_var_final}')

kernel[3]=2.07**2 * RBF(length_scale=[0.319, 0.53, 1.18]) + WhiteKernel(noise_level=0.305)
M3=[[ 0.30905016  0.19487734  0.34874473]
 [-0.14194462  0.01235287 -0.08324151]
 [ 0.2525642   0.16558666  0.18679346]
 [-0.58107838 -0.2976114  -0.00755912]
 [ 0.07008129  0.28041956  0.01400916]
 [-0.00673038 -0.13087691 -0.01437159]
 [ 0.38527326  0.26066977 -0.36987492]
 [-0.08893276 -0.12576332  0.2751395 ]
 [ 0.51332277 -0.79172864  0.14332404]
 [-0.11949768 -0.17472503 -0.44948924]
 [-0.20932699  0.09479787  0.6332493 ]]
r=0.079290091294924
E_var=0.3806922690144886


In [9]:
# performance on training data
obse_pred_plot(scaler_y.inverse_transform(y_train), scaler_y.inverse_transform(grf_HX.pred(X_train)[:,None]))
# performance on test data
obse_pred_plot(scaler_y.inverse_transform(y_test), scaler_y.inverse_transform(grf_HX.pred(X_test)[:,None]))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<AxesSubplot:title={'center':'Predictions vs Measurements'}, xlabel='Measured', ylabel='Predicted'>

In [None]:
obse_pred_plot(scaler_y.inverse_transform(y_train), scaler_y.inverse_transform(gpr_final.predict(X_train@M_final)[:,None]))

Run over all possible dimensions to determine ridge space dimension

In [None]:
grf_all = []
M_all = []
gpr_all = []
r_all = []
E_var_all = []
for i in range(1, 4):
    grf_HX = grf(X_train, X_test, y_train[:,0], y_test[:,0], i, n_restart=20, tol=5e-2)
    results_HX = grf_HX()
    M_final, gpr_final, r_final, E_var_final = results_HX
    grf_all.append(grf_HX)
    M_all.append(M_final)
    gpr_all.append(gpr_final)
    r_all.append(r_final)
    print(f'm={i} Expectation of posterior variance on test data: {E_var_final}')
    print(f'r={r_final}')
    E_var_all.append(E_var_final)


In [None]:
print(f'r={r_all}')
print(f'E_var={E_var_all}')

In [None]:
for i in range(len(grf_all)):
    print(f'kernel[{i+1}]={grf_all[i].kernel}')

for i in range(len(grf_all)):
    print(f'M[{i+1}]={M_all[i]}')

Plot results for different ridge space dimension

In [None]:
# error and variance expectation for test data
fig, ax = plt.subplots(figsize=(9,6), dpi=100)
x = range(1, len(r_all)+1)
ln1 = ax.plot(x, r_all, '-s', markersize=8, label='Prediction error $r$')
ax.set_ylabel('$r$', fontsize=18, fontweight='bold')
ax.set_xlabel('Ridge subspace dimension', fontsize=15, fontweight='bold')
plt.xticks(x, fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')
ax.grid()
ax1 = ax.twinx()
ax1.set_ylabel('$\mathbb{E}[V]$', fontsize=18, fontweight='bold')
ln2 = ax1.plot(x, E_var_all, 'r-o', markersize=8, label='Variance expectation $\mathbb{E}[V]$')
lns = ln1+ln2
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='best', fontsize=14)
plt.yticks(fontsize=12, fontweight='bold')



In [None]:
# performance only on training data
obse_pred_plot(scaler_y.inverse_transform(y_train), scaler_y.inverse_transform(grf_HX.pred(grf_all[2].X_train)[:,None]))

In [None]:
# compare gpr.predict() method and grf.pred() method
y_train_predict = scaler_y.inverse_transform(gpr_all[2].predict(grf_all[2].X_train @ grf_all[2].M)[:,None])
y_train_pred = scaler_y.inverse_transform(grf_all[2].pred(grf_all[2].X_train)[:,None])
obse_pred_plot(y_train_pred, y_train_predict)
# for using gpr.predict, input should be projected onto M_final, which is used for training

In [None]:
# performance only on testing data
obse_pred_plot(scaler_y.inverse_transform(y_test), scaler_y.inverse_transform(grf_all[2].pred(X_test)[:,None]))

In [None]:
U_train = X_train @ grf_bo.M
G = grf_bo.kernel(U_train)
b = np.linalg.solve(G, grf_bo.y_train[:,None])

In [None]:
# plot ridge space (only for 2D)
ax = plt.figure().add_subplot(projection='3d')
U_test = X_test @ grf_all[1].M # using optimized M
ax.scatter(U_test[:,0], U_test[:,1], y_test, c=y_test)
# ax.plot_surface(U_test[:,0], U_test[:,1], grf_HX.y_test)
# ax.invert_yaxis()
ax.set_xlabel('$u_1$')
ax.set_ylabel('$u_2$')
ax.set_zlabel('$y$')
plt.show()

In [None]:
print(f'r={r_final}')
print(f'BIC={bic_final}')

In [None]:
kernel_final = grf_all[3].kernel
print(kernel_final)
lengthscale_final = kernel_final.get_params()['k1__k2__length_scale'] # rbf lengthscale
sigma2_f_final = kernel_final.get_params()['k1__k1__constant_value'] # rbf variance
sigma2_n_final = kernel_final.get_params()['k2__noise_level'] # noise variance
print(lengthscale_final, sigma2_f_final, sigma2_n_final)

In [None]:
grf_test = grf_all[2]

### Bayesian optimization

In [None]:
def BGOmaximize(f, gpr, X_design, alpha, f_params={}, alpha_params={}, max_it=15, plot=False):
    """Optimize a function using Bayesian global optimization
    Arguments
    f              -- The function to optimize
    gpr            -- Gaussian process regression model to approximate the objective function
    X_design       -- The set of candidate points to evaluate the function for identifying the optimal point
    alpha          -- Information acquisition function
    alpha_params   -- Extra parameters to the information acquisition function
    max_it         -- The maximum number of iterations
    plot           -- Whether or not to plot function evaluations v.s. iterations at max_it
    """
    af_all = [] # Store values of acquisition function 
    x_all = []
    y_all = []
    for count in range(max_it):
        # Using GPR model to get posterior mean and variance at given design points
        g, var = gpr.pred(X_design, return_var=True) # posterior mean and variance
        # Evaluate information acquisition function
        af_values = alpha(g, np.sqrt(var.squeeze()), gpr.y_train.max(), **alpha_params)
        # Find index of the next point to evaluate
        i_opt = np.argmax(af_values)
        # Evaluate the function and stack the new data point to observations
        x_new = X_design[i_opt,:] # original input space, not ridge function input space
        y_new = f(x_new, **f_params)
  
        if not y_new:
            X_design = np.delete(X_design, i_opt, axis=0)
            print(x_new, y_new)
        else:
            x_all.append(x_new)
            y_all.append(y_new)
            af_all.append(af_values[i_opt])
            # Update GPR
            y_new_scaled = scaler_y.transform(y_new.reshape(-1,1))
            gpr.set_XY(x_new, y_new_scaled)
            print(count+1, x_new, y_new_scaled.squeeze())
            
    if plot:
        fig, ax = plt.subplots()
        ax.plot(y_all, '-*', markersize=10, markeredgewidth=2)
        ax.set_xticks(range(1,max_it+1,5))
        ax.set_xlabel('Iterations')
        ax.set_ylabel('$f(x)$')
            
    return gpr, af_all, x_all, y_all

# maximum upper interval
def mui(m, sigma, ymax, psi=1.96):
    return m + psi * sigma

# Expected improvement acquisition function
def ExpectedImprovement(m, sigma, ymax):
  """Return the expected improvement.

  Arguments
  m       -- The predictive mean at the test points.
  sigma   -- The predictive standard deviation at the test points.
  ymax    -- The maximum observed value so far.
  """
  u = (m - ymax) / sigma
  ei = sigma * (u * st.norm.cdf(u) + st.norm.pdf(u))
  ei[sigma <= 0.] = 0.
  return ei

In [None]:
grf_test = grf_all[2]

In [None]:
n_design = int(1e6)
max_it=50 # number of iterations
X_design_normalize = np.random.rand(n_design, len(lb))
gpr_grf, af_all, x_all, y_all = BGOmaximize(J_calib, grf_test, X_design_normalize, mui, f_params={'lb':lb,'ub':ub}, max_it=max_it, plot=1)

In [None]:
# Plot BO iterations
fig, ax = plt.subplots()
ax.plot(range(1,len(y_all)+1), y_all, '-*', linewidth=1.5, markersize=8)
ax.set_ylabel(r'$J(\theta)$', fontsize=18, fontweight='bold')
ax.set_xlabel('Iterations', fontsize=14, fontweight='bold')
plt.xticks(fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')
ax.grid()

In [None]:
print(y_all)

In [None]:
x_optimal = x_all[-1]
u_optimal = np.round(lb + x_optimal * (ub - lb),3)
print(u_optimal)

In [None]:
cost, outputVar = L_HX(list(u_optimal))

In [None]:
fig, ax = plt.subplots()
ax.plot(outputVar[0])

### Plot chiller transient response

In [None]:
fig, ax = plt.subplots(4,1, figsize=(8,10))
time = np.linspace(2100, 48000, 4591)
# pressures
ax[0].plot(time, np.array(outputVar[0][10:]) / 1e3, '-', linewidth=1.5, label='Suction (Predicted)')
ax[0].plot(time, np.array(outputVar[7][10:]) / 1e3, '--', linewidth=1.5, label='Suction (Measured)')
ax[0].plot(time, np.array(outputVar[1][10:]) / 1e3, '-', linewidth=1.5, label='Discharge (Predicted)')
ax[0].plot(time, np.array(outputVar[8][10:]) / 1e3, '--', linewidth=1.5, label='Discharge (Measured)')
ax[0].set_ylabel('Pressure [kPa]')
ax[0].grid()
ax[0].legend(fontsize='small', loc='best')
# water temperatures
ax[1].plot(time, np.array(outputVar[2][10:]), '-', linewidth=1.5, label='Evaporator (Predicted)')
ax[1].plot(time, np.array(outputVar[9][10:]), '--', linewidth=1.5, label='Evaporator (Measured)')
ax[1].plot(time, np.array(outputVar[3][10:]), '-', linewidth=1.5, label='Condenser (Predicted)')
ax[1].plot(time, np.array(outputVar[10][10:]), '--', linewidth=1.5, label='Condenser (Measured)')
ax[1].set_ylabel('Temperature [K]')
ax[1].grid()
ax[1].legend(fontsize='small', loc='best')
# subcooling and superheat
ax[2].plot(time, np.array(outputVar[4][10:]), '-', linewidth=1.5, label='Superheat (Predicted)')
ax[2].plot(time, np.array(outputVar[11][10:]), '--', linewidth=1.5, label='Superheat (Measured)')
ax[2].plot(time, np.array(outputVar[5][10:]), '-', linewidth=1.5, label='Subcooling (Predicted)')
ax[2].plot(time, np.array(outputVar[12][10:]), '--', linewidth=1.5, label='Subcooling (Measured)')
ax[2].set_ylabel('Temperature difference [K]')
ax[2].grid()
ax[2].legend(fontsize='small', loc='best')
# Power
ax[3].plot(time, np.array(outputVar[6][10:]) / 1e3, '-', linewidth=1.5, label='Predicted')
ax[3].plot(time, np.array(outputVar[13][10:]) / 1e3, '--', linewidth=1.5, label='Measured')
ax[3].set_ylabel('Power [kW]')
ax[3].grid()
ax[3].legend(fontsize='small', loc='best')
ax[3].set_xlabel('Time [s]');


In [None]:
(48000-2100)/10+1

### Mnaual implementation of Matern Kernel

In [None]:
def dist(X1, X2, length_scale):
    # kernel matrix of ||xi - xj||
    L_inv = np.diag(1. / length_scale.reshape(-1))
    X1_tilde = X1 @ L_inv
    X2_tilde = X2 @ L_inv
    return np.sqrt(np.sum(X1_tilde**2,1).reshape(-1,1) + np.sum(X2_tilde**2,1) - 2 * np.dot(X1_tilde, X2_tilde.T))
def Matern52(X1, X2, length_scale):
    K = np.sqrt(5) * dist(X1, X2, length_scale)
    K = (1.0 + K + K**2 / 3.0) * np.exp(-K)
    return K

In [None]:
X_train = np.random.rand(10,2)
X_test = np.random.rand(5,2)
y_train = np.sum(X_train, axis=1)
ker = Matern(length_scale=[1., 1.], nu=2.5)
gpr = GaussianProcessRegressor(kernel=ker, n_restarts_optimizer=20, alpha=1e-9, normalize_y=False) 
gpr.fit(X_train, y_train)

In [None]:
lengthscale_post = gpr.kernel_.get_params()['length_scale']
K_manual = Matern52(X_test, X_train, lengthscale_post)
K_sklearn = gpr.kernel_(X_test, X_train)

In [None]:
U_train = X_train @ grf_all[4].M
ker = 1.0 * RBF(length_scale=[1. for _ in range(5)], length_scale_bounds=(1e-7, 1e6)) \
              + WhiteKernel(noise_level=1e-4, noise_level_bounds=(1e-6,1e-1)) # noise_level: iid noise variance
gpr = GaussianProcessRegressor(kernel=ker, n_restarts_optimizer=50, alpha=1e-10, normalize_y=False) 
gpr.fit(U_train, y_train);

In [None]:
gpr.kernel_