In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [None]:
import autograd.numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import math

In [None]:
def obse_pred_plot(X_mea, X_pred):
    fig, ax = plt.subplots()
    _x = np.linspace( np.min(X_mea), np.max(X_mea), 100 )
    ax.plot(_x, _x, 'r--', label='$x=y$')
    ax.plot(X_mea, X_pred, 'o')
    ax.set_title('Predictions vs Measurements', fontsize=15)
    ax.set_xlabel('Measured', fontsize=15)
    ax.set_ylabel('Predicted', fontsize=15)
    ax.tick_params( labelsize=15 )
    fig.tight_layout()


In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from pymanopt.manifolds import Stiefel
from pymanopt.optimizers import ConjugateGradient, SteepestDescent, TrustRegions
from pymanopt import Problem
import pymanopt
from sklearn.model_selection import train_test_split

class grf:
    def __init__(self, X, Y, m_ridge, n_restart=20, tol=1e-2, test_size=0.5):
        """
        X          -- input data
        Y          -- output data
        m_ridge    -- ridge function input dimension
        n_restart  -- number of times to restart fitting and pick the model with the lowest objective function value
        tol        -- error tolerance of cost to stop iteration
        test_size  -- size to split data to train and test sets [0, 1]
        """
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=20)
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.n_restart = n_restart
        self.tol = tol
        dim = X.shape[1] # original dimension
        self.manifold = Stiefel(dim, m_ridge)
        # initialize projection matrix M
        V = np.random.randn(dim, m_ridge)
        q = np.linalg.qr(V)[0]
        self.M = q.copy()
        # initialize covariance kernel
        self.kernel = RBF
    

#     def create_cost_and_derivative(self):

#         @pymanopt.function.numpy(self.manifold)
#         def cost(M):
#             U_train = self.X_train @ M
#             U_test = self.X_test @ M

#             G = self.kernel(U_train)
#             b = np.linalg.solve(G, self.y_train)

#             K_test = self.kernel(U_test, U_train)
#             g_test = K_test @ b

#             r = 0.5 * np.linalg.norm(self.y_test - g_test)**2 / self.y_test.shape[0]
#             return r

#         @pymanopt.function.numpy(self.manifold)
#         def dcost(M):
#             ell = self.kernel.get_params()['k1__k2__length_scale']
#             U_train = self.X_train @ M
#             U_test = self.X_test @ M
#             N_test = self.y_test.shape[0]

#             G = self.kernel(U_train)
#             b = np.linalg.solve(G, self.y_train)
#             K_test = self.kernel(U_test, U_train)
#             g_test = K_test @ b

#             inv_P = np.diag(1.0/ell**2)
#             dr = np.zeros(M.shape)
#             for i in range(N_test):
#                 U_tilde = U_test[i] - U_train
#                 dgdu = inv_P @ U_tilde.T @ (K_test[i,:] * b)
#                 dy = np.outer(dgdu, self.X_test[i,:]).T
#                 assert(dy.shape == M.shape)
#                 dr += (self.y_test[i] - g_test[i]) * (dy - M @ dy.T @ M)
#             return dr / N_test
        
#         return cost, dcost

    def create_cost(self):
        @pymanopt.function.autograd(self.manifold)
        def cost(M):
            U_train = self.X_train @ M
            U_test = self.X_test @ M

            N_train, m = self.X_train.shape

            lengthscales = self.kernel.get_params()['k1__k2__length_scale'] # rbf lengthscale
            sigma2_f = self.kernel.get_params()['k1__k1__constant_value'] # rbf variance
            sigma2_n = self.kernel.get_params()['k2__noise_level'] # noise variance
            L_inv = np.diag(1. / lengthscales)
            dim = lengthscales.shape[0] # dimension of ridge function space

            U_train_tilde = U_train @ L_inv 
            # covariance on training data
            G = sigma2_f * np.exp(-0.5*(np.sum(U_train_tilde**2,1).reshape(-1,1) + np.sum(U_train_tilde**2,1) - 
                                       2 * np.dot(U_train_tilde, U_train_tilde.T)))

            G = G + sigma2_n * np.eye(N_train)
            b = np.linalg.solve(G, self.y_train)

            N_test = self.X_test.shape[0]
            U_test_tilde = U_test @ L_inv
            # covariance of testing and training data K(U_test, U_train)
            K_test = sigma2_f * np.exp(-0.5*(np.sum(U_test_tilde**2,1).reshape(-1,1) + np.sum(U_train_tilde**2,1) - 
                                       2 * np.dot(U_test_tilde, U_train_tilde.T)))
            g_test = K_test @ b
            r = 0.5 * np.linalg.norm(self.y_test - g_test)**2 / N_test
            return r
        return cost

    def pred(self, X_test_pred, return_var=False):
        """ 
        X_test_pred: test points to evaluate ridge function outputs
        
        Return:
        g_test: predictions of posterior mean using ridge function
        var_test: posterior variance at test points
        """
        U_train = self.X_train @ self.M
        U_test = X_test_pred @ self.M

        G = self.kernel(U_train)
        b = np.linalg.solve(G, self.y_train)

        sigma2_n = self.kernel.get_params()['k2__noise_level'] # noise variance
        K_test_train = self.kernel(U_test, U_train) # covariance of testing and training data
        g_test = K_test_train @ b # predicted posterior mean
        
        if not return_var:
            return g_test
        else:
            K_test = self.kernel(U_test)
            b_test = np.linalg.solve(G, self.kernel(U_train, U_test))
            cov_test = K_test - K_test_train @ b_test # posterior covariance
            var_test = np.diag(cov_test)
            return g_test, var_test
            
            
            
    def set_XY(self, X_new, Y_new):
        """
        Update GPR model dataset
        """
        self.X_train = np.vstack((self.X_train, X_new))
        self.y_train = np.hstack((self.y_train, Y_new))
        
    @staticmethod
    def BIC(gpr):
        """
        Return BIC using log-likelihood
        """
        return gpr.log_marginal_likelihood_value_ - 0.5 * (gpr.n_features_in_ + 2) * math.log(gpr.y_train_.shape[0])

    def grf_fit(self):
        last_r =1e10
        err = np.inf
        d, m = self.M.shape
        n_iter = 0
        
        # re-initialize projection matrix M
        V = np.random.randn(d, m)
        q = np.linalg.qr(V)[0]
        self.M = q.copy()
        
        while err > self.tol:
            M_guess = self.M.copy()
            n_iter += 1
            U_train = self.X_train @ M_guess
            # prior covariance
            ker = 1.0 * RBF(length_scale=[1 for _ in range(m)], length_scale_bounds=(1e-7, 1e7)) \
            + WhiteKernel(noise_level=1e-4, noise_level_bounds=(1e-8, 1e2)) # noise_level: iid noise variance
            gpr = GaussianProcessRegressor(kernel=ker, n_restarts_optimizer=20, alpha=1e-8, normalize_y=False) 
            # n_restars_optimizer: number of optimizations for hyper-parameters
            # alpha: adding to diagonal of covariance matrix to prevent numerical issue during fitting
            gpr.fit(U_train, self.y_train)

            self.kernel = gpr.kernel_ # posterior kernel

#             my_cost, my_dcost = self.create_cost_and_derivative()
#             problem = Problem(manifold=self.manifold, cost=my_cost, euclidean_gradient=my_dcost)
            my_cost = self.create_cost()
            problem = Problem(manifold=self.manifold, cost=my_cost)
#             optimizer = ConjugateGradient(verbosity=0)
            optimizer = SteepestDescent(verbosity=0)
#             optimizer = TrustRegions(verbosity=0)
            M_new = optimizer.run(problem).point
            self.M = M_new.copy()

            r = my_cost(self.M)
            err = np.abs(last_r - r) / last_r
            last_r = r
        bic = self.BIC(gpr)
        return M_guess, gpr, r, bic, n_iter
    
    def __call__(self):
        r_min = np.inf
        for _ in range(self.n_restart):
            M, gpr, r, bic, n_iter = self.grf_fit();
            if r < r_min:
                M_opt = M.copy()
                gpr_opt = gpr
                bic_opt = bic
                n_final = n_iter
                r_min = r
        return M_opt, gpr_opt, r_min, bic_opt, n_final

## Testing linear ridge function
Paper section 4.1

In [None]:
# get training and testing data
d = 10 # dimension of input
m = 2 # ridge subspace dimension
N = 100
X = np.random.rand(N, d) * 2 - 1 # x in [-1,1]
X_test = np.random.rand(50, d) * 2 - 1
# training and testing data
Ureal = np.random.randn(d, m)
q = np.linalg.qr(Ureal)[0]
Ureal = q.copy() # orthogonal
U_data = X @ Ureal
U_test = X_test @ Ureal
Y = np.sum(U_data, axis=1)
y_test = np.sum(U_test, axis=1)
grf_test = grf(X, Y, 2, n_restart=1, tol=1e-2, test_size=0.3)
results_grf = grf_test()
obse_pred_plot(y_test, grf_test.pred(X_test))

In [None]:
grf_test.X_train.shape

In [None]:
bic = results_grf[4]
r = results_grf[2]
print(f'cost={r}')
print(f'BIC = {bic}')

In [None]:
# verify sklearn predict method to calculate posterior mean 
M_final = results_grf[0]
gpr_final = results_grf[1]
obse_pred_plot(grf_test.pred(X_test), gpr_final.predict(X_test @ M_final))

In [None]:
# verify posterior variancey
y_pred, std = gpr_final.predict(X_test @ M_final, return_std=True)

In [None]:
# sklearn gpr.predict gives same results as manually compute posterior mean, so gpr.predict can be used for mean and variance
M_opt = results_grf[0]
gpr_opt = results_grf[1]
obse_pred_plot(grf_test.pred(X), gpr_opt.predict(X @ M_opt))

In [None]:
# plot ridge function
ax = plt.figure().add_subplot(projection='3d')
U_test_final = X_test @ M_opt # using optimized M
ax.scatter(U_test_final[:,0], U_test_final[:,1], y_test, c=y_test)
# ax.invert_yaxis()
ax.set_xlabel('$m_1$')
ax.set_ylabel('$m_2$')
ax.set_zlabel('$f$')
plt.show()

### Bayesian Optimization

In [None]:
from dymola.dymola_interface import DymolaInterface
dymola = None
dymola = DymolaInterface()
dymola.openModel(path="C:\Jiacheng Ma\Modelica libraries\DynamicVCC\DynamicVCC\package.mo",changeDirectory=False)

In [None]:
def L_HX(theta_in):
    problem = "DynamicVCC.Examples.Tests.Test_ShellTubeHX"
    startTime = 2000
    stopTime = 3500
    outputInterval = 10
#     numberOfIntervals = 500
    method = "Dassl"
    tolerance = 0.0001
    initialNames = ['u[{}]'.format(i) for i in range(1,len(theta_in)+1)]
    initialValues = theta_in
    dymola.experimentSetupOutput(events=False)
    result, finalVar = dymola.simulateExtendedModel(problem=problem,
                                          startTime=startTime, 
                                          stopTime=stopTime,
                                          outputInterval=outputInterval,
                                          method=method,
                                          tolerance=tolerance,
                                          initialNames=initialNames,
                                          initialValues=initialValues)
    if not result:
        print(theta_in)
        print("Simulation failed. Below is the translation log.")
        log = dymola.getLastErrorLog()
        print(log)
        exit(1)
        return None, None
    else:
        Nrows = dymola.readTrajectorySize("dsres.mat")
        outputNames = ['y[{}]'.format(i) for i in range(1,4)] + ['y_mea[{}]'.format(i) for i in range(1,4)]
        outputVar = dymola.readTrajectory("dsres.mat", outputNames, Nrows)
        pred = np.array(outputVar[:3])
        Mea = np.array(outputVar[3:])
        ner = np.linalg.norm(pred[:,10:] - Mea[:,10:], axis=1) / np.linalg.norm(Mea[:,10:], axis=1) # omit some initialization points
        W = np.eye(ner.shape[0])
        cost = np.dot(ner.T,W.dot(ner))
        return cost, outputVar

# Objective function to minimize
def J_calib(u, lb, ub):
    """
    u    -- Scaled HTC
    lb   -- HTC lower bound
    ub   -- HTC upper bound
    """
    u_truescale = np.round(lb + u * (ub - lb),1)
    cost, outputVar = L_HX(list(u_truescale))
    if not cost:
        exit(1)
    else:
        return -np.log(cost)

In [None]:
if dymola is not None:
    dymola.close()
    dymola = None

In [None]:
# Test dymola model
theta_test = [5e4, 5e4, 5e4, 5e4, 131146]
cost, outputVar = L_HX(theta_test)

Use Latin Hypercube designs to generate random samples

In [None]:
# Generate some starting data
np.random.seed(12345) # repeatable
n_init = 200 # Number of data points
lb = np.array([1e4,1e4,1e4,1e3,65573]) # Lower bounds of input space
ub = np.array([1e6,1e6,1e6,1e6,196719]) # Upper bounds of input space

from pyDOE import lhs

# Generate scaled samples of the input space
# X_normalize = np.random.rand(n_init, len(lb))
X_normalize = lhs(len(lb), n_init, 'c')

# Get corresponding results at function space
Y = np.zeros(n_init)
for i in range(n_init):
    Y[i] = J_calib(X_normalize[i,:], lb, ub)
    print(i+1, Y[i])
X_normalize = X_normalize[~np.isnan(Y),:]
Y = Y[~np.isnan(Y)]

# Plot funciton values
fig, ax = plt.subplots()
ax.plot(Y,'kx',markersize=10, markeredgewidth=2)
ax.set_xlabel('$n$')
ax.set_ylabel('$J(u)$')

# save data
np.savetxt('YX_chillerCond.txt', np.hstack((Y[:,None], X_normalize)), delimiter=',')

Gaussian ridge function for calibration parameter space

In [None]:
YX_data = np.loadtxt('YX_chillerCond.txt', delimiter=',')
Y = YX_data[:,0]
X_normalize = YX_data[:,1:]

# normalize training data
from sklearn import preprocessing
scaler_y = preprocessing.StandardScaler().fit(Y[:,None])
Y_scaled = scaler_y.transform(Y[:,None])

scaler_X = preprocessing.StandardScaler().fit(X_normalize)
X_scaled = scaler_X.transform(X_normalize)

In [None]:
grf_HX = grf(X_scaled, Y_scaled[:,0], 2, n_restart=30, tol=1e-2)
results_HX = grf_HX()
M_final, gpr_final = results_HX[0], results_HX[1]
obse_pred_plot(Y, scaler_y.inverse_transform(grf_HX.pred(X_scaled)[:,None]))

In [None]:
lengthscales = gpr_final.kernel_.get_params()['k1__k2__length_scale'] # rbf lengthscale
sigma2_f = gpr_final.kernel_.get_params()['k1__k1__constant_value'] # rbf variance
sigma2_n = gpr_final.kernel_.get_params()['k2__noise_level'] # noise variance
L_inv = np.diag(1. / lengthscales)

In [None]:
U_train = gpr_final.X_train_
U_test = np.random.rand(20,2)
U_train_tilde = U_train @ L_inv 
U_test_tilde = U_test @ L_inv
# covariance on training data
G = sigma2_f * np.exp(-0.5*(np.sum(U_test_tilde**2,1).reshape(-1,1) + np.sum(U_train_tilde**2,1) - 
                           2 * np.dot(U_test_tilde, U_train_tilde.T)))

In [None]:
G_est = gpr_final.kernel_(U_test, U_train)

In [None]:
# performance on training data
y_train = scaler_y.inverse_transform(grf_HX.y_train[:,None])
y_train_pred = scaler_y.inverse_transform(grf_HX.pred(grf_HX.X_train)[:,None])
obse_pred_plot(y_train, y_train_pred)

In [None]:
# compare gpr.predict() method and grf.pred() method
y_train_predict = scaler_y.inverse_transform(gpr_final.predict(grf_HX.X_train @ M_final)[:,None])
y_train_pred = scaler_y.inverse_transform(grf_HX.pred(grf_HX.X_train)[:,None])
obse_pred_plot(y_train_predict, y_train_pred)
# for using gpr.predict, input should be projected onto M_final, which is used for training

In [None]:
# performance only on testing data
X_test_scaled = grf_HX.X_test
y_test_scaled = grf_HX.y_test
obse_pred_plot(scaler_y.inverse_transform(y_test_scaled[:,None]), scaler_y.inverse_transform(grf_HX.pred(X_test_scaled)[:,None]))

In [None]:
print(f'r={results_HX[2]}')
print(f'BIC={results_HX[3]}')

In [None]:
gpr_final.optimizer=None

In [None]:
gpr_final.kernel_.get_params()

### Bayesian optimization

In [None]:
def BGOmaximize(f, gpr, X_design, alpha, f_params={}, alpha_params={}, max_it=15, optimize_it=100, plot=False):
    """Optimize a function using Bayesian global optimization
    Arguments
    f              -- The function to optimize
    gpr            -- Gaussian process regression model to approximate the objective function
    alpha          -- Information acquisition function
    alpha_params   -- Extra parameters to the information acquisition function
    X_design       -- The set of candidate points to evaluate the function for identifying the optimal point
    max_it         -- The maximum number of iterations
    plot           -- Whether or not to plot function evaluations v.s. iterations at max_it
    optimize_it    -- Iterations after to optimize the hyper-parameters of GPR model
    """
    af_all = [] # Store values of acquisition function 
    x_all = []
    y_all = []
    gpr.n_restarts_optimizer=0
    for count in range(max_it):
        # Using GPR model to get posterior mean and variance at given design points
        m, sigma = gpr.predict(X_design, return_std=True) # posterior mean and standard deviation
        # Evaluate information acquisition function
        af_values = alpha(m, sigma, gpr.y_train_.max(), **alpha_params)
        # Find index of the next point to evaluate
        i = np.argmax(af_values)
        # Evaluate the function and stack the new data point to observations
        x_new = np.linalg.solve(M_final @ M_final.T, M_final @ U_design[i, :].reshape(-1,1))
        y_new = f(np.clip(x_new.squeeze(),0.,1.), **f_params)
        print(count+1, x_new, y_new)
        if not y_new:
            X_design = np.delete(X_design, i, axis=0)
        else:
            x_all.append(x_new)
            y_all.append(y_new)
            af_all.append(af_values[i])
            u_new = M_final.T @ x_new
            # Update GPR
            gpr = grf_HX.set_XY(gpr, u_new.T, y_new[None])
            
            
    if plot:
        fig, ax = plt.subplots()
        ax.plot(y_all, '-*', markersize=10, markeredgewidth=2)
        ax.set_xticks(range(1,max_it+1,2))
        ax.set_xlabel('Iterations')
        ax.set_ylabel('$f(x)$')
            
    return af_all, x_all, y_all

# maximum upper interval
def mui(m, sigma, ymax, psi=1.96):
    return m + psi * sigma

In [None]:
n_design = int(1e6)
max_it=20 # number of iterations
X_design_normalize = np.random.rand(n_design, len(lb))
U_design = X_design_normalize @ M_final
gpr_final.n_restarts_optimizer = 0
af_all, x_all, y_all = BGOmaximize(J_calib, gpr_final, U_design, mui, f_params={'lb':lb,'ub':ub},max_it=max_it,
                                   optimize_it=500, plot=1)

In [None]:
gpr_final.y_train_.max()

In [None]:
m, sigma = gpr_final.predict(U_design, return_std=True) # posterior mean and standard deviation
# Evaluate information acquisition function
af_values = mui(m, sigma, gpr_final.y_train_.max())
# Find index of the next point to evaluate
i = np.argmax(af_values)
# Evaluate the function and stack the new data point to observations
x_new = np.linalg.solve(M_final @ M_final.T, M_final @ U_design[i, :].reshape(-1,1))