## Example using constrained GP model
This is the code used to produce the 'corroded pipeline' example in the paper _'Gaussian processes with linear operator inequality constraints'_, https://arxiv.org/abs/1901.03134

### Imports

In [1]:
### Basic imports ###
import sys, os

# For plotting
import plotly
import plotly.plotly as pltly
import plotly.graph_objs as go
import plotly.offline as pltlyoff
from IPython.display import display, HTML

# This is for plotting as static images (to show on e.g. GitHub)
import plotly.io as pio
from IPython.display import Image

# Numerics
import numpy as np
import scipy as sp
import itertools
import pyDOE
import pandas as pd

### Custom files ###

# Path to custom plotly module 'GPPlotly' for plotting 
# can be downloaded at https://github.com/cagrell/gp_plotly
dir_GPPlotly = 'C:\\Data\\git repos\\gp_plotly\\'
sys.path.append(dir_GPPlotly) 

# Path to the constrained GP moule 
# can be downloaded at https://github.com/cagrell/gp_constr
dir_gp_constr = 'C:\\Data\\git repos\\gp_constr\\'
sys.path.append(dir_gp_constr) 

# Import
from GPPlotly.plottingfunctions import PlotGP2d, add_traces_to_fig, gp_diagnostics, pred_vs_error_perc
from GPConstr.model import kernel_RBF, GPmodel, Constraint

### Setup notebook ###
pltlyoff.init_notebook_mode(connected=True)
print('Python version', sys.version)

Loading constrained GP module from C:\Data\git repos\gp_constr
Loading R wrapper...
Running R from rpy2: R version 3.4.3 (2017-11-30)


Python version 3.6.3 (v3.6.3:2c5fed8, Oct  3 2017, 18:11:49) [MSC v.1900 64 bit (AMD64)]


## 1. Define function for generating synthetic test/training data
The simplified pipe burst capacity in RP-F101

$P_{cap}(\sigma_{u}, D, t, d, l) = 1.05\frac{2t \sigma_{u}}{D - t} \frac{1 - d/t}{1 - \frac{d/t}{Q}}$, $Q = \sqrt{1 + 0.31\frac{l^{2}}{Dt}}$

We have the following constraints

1.) $0 \leq P_{cap} \leq 1.05\frac{t \sigma_{u}}{D - t}$

2.) $\frac{\partial P_{cap}}{\partial \sigma_{u}} > 0$

3.) $\frac{\partial P_{cap}}{\partial D} < 0$

4.) $\frac{\partial P_{cap}}{\partial t} > 0$

5.) $\frac{\partial P_{cap}}{\partial d} < 0$

6.) $\frac{\partial P_{cap}}{\partial l} < 0$

for $\sigma_{u} \in [450, 550]$ (MPa), $t \in [5, 30]$ (mm), $D \in [10t, 50t]$ (mm), $d \in [0, t]$ (mm) and $l \in [0, 1000]$ (mm).

Let $\textbf{x}$ denote the transformed input vector $\textbf{x} = [x_{1}, \dots, x_{5}]$ where $x_{1} = (\sigma_{u} - 450)/(550-450)$, $x_{2} = (D/t - 10)/(50 - 10)$, $x_{3} = (t - 5)/(30-5)$, $x_{4} = d/t$ and $x_{5} = l/1000$.  

We will make use of the function

$f(\textbf{x}) = P_{cap}(\textbf{x})$ with the corresponding constraints

1.) $0 \leq f(\textbf{x}) \leq 1.05\frac{2t \sigma_{u}}{D - t}$ 
$= 1.05 \cdot 2\frac{x_{1}(550-450)+450}{x_{2}(50-10)+10}$
$= 10.5\frac{x_{1} + 9}{4x_{2} + 1}$

2.) $\frac{\partial f}{\partial x_{1}} > 0$

3.) $\frac{\partial f}{\partial x_{2}} < 0$

4.) $\frac{\partial f}{\partial x_{3}} > 0$

5.) $\frac{\partial f}{\partial x_{4}} < 0$

6.) $\frac{\partial f}{\partial x_{5}} < 0$

for $\textbf{x} \in [0, 1]^{5}$.


In [2]:
# Function to emulate/estimate
def burst_cap(su, D, t, d, l):
    """ 
    Simplified burst capacity equation in DNV GL RP-F101 
    
    Input:    
    su =  Ultimate tensile strength [MPa]
    D  =  Pipe diameter             [mm]
    t  =  Pipe wall thickness       [mm]
    d  =  Defect depth              [mm]
    l  =  Defect length             [mm]

    """
    
    p0 = 1.05*2*t*su/(D-t)               # Un-corroded capacity
    Q = np.sqrt(1 + 0.31*(l**2)/(D*t))   # Length factor
    d_t = d/t                            # Relative depth
    R = (1-d_t)/(1 - d_t/Q)              # Capacity reduction factor due to the defect
    
    p_cap = p0*R                         # Capacity of pipe with defect
    
    return p_cap

# Ranges of input variables
rng = {
    'su':(450, 550),
    't': (5, 30),
    'D_t': (10, 50),
    'd_t': (0, 1),
    'l': (0, 1000),
}

def phys_to_x(su, D, t, d, l):
    """ Transform from physical to standardized domain """
    
    x = np.array([
        (su - rng['su'][0])/(rng['su'][1] - rng['su'][0]),
        (D/t - rng['D_t'][0])/(rng['D_t'][1] - rng['D_t'][0]),
        (t - rng['t'][0])/(rng['t'][1] - rng['t'][0]),
        (d/t - rng['d_t'][0])/(rng['d_t'][1] - rng['d_t'][0]),
        (l - rng['l'][0])/(rng['l'][1] - rng['l'][0])
    ])
    
    return x

def x_to_phys(x):
    """ Transform from standardized to physical domain """
    su = x[0]*(rng['su'][1] - rng['su'][0]) + rng['su'][0]
    D_t = x[1]*(rng['D_t'][1] - rng['D_t'][0]) + rng['D_t'][0]
    t = x[2]*(rng['t'][1] - rng['t'][0]) + rng['t'][0]
    d_t = x[3]*(rng['d_t'][1] - rng['d_t'][0]) + rng['d_t'][0]
    l = x[4]*(rng['l'][1] - rng['l'][0]) + rng['l'][0]
    
    return su, D_t*t, t, d_t*t, l

def fun(x):
    """ function used to generate data """
    x5 = np.array([0.5]*5) # 5-dim input
    x5[:len(x)] = x # Update first n elements of x5
    
    p_cap = burst_cap(*x_to_phys(x5))
    return p_cap

def fun_mult(X):
    """ For multiple x in array X """
    return np.array([fun(X[i,:]) for i in range(X.shape[0])])

In [None]:
# Generate some samples to plot

data = []

for _ in range(1000):
    x = np.random.uniform(size = 3)
    y = fun(x)
    
    x5 = np.array([0.5]*5) # 5-dim input
    x5[:len(x)] = x # Update first n elements of x5
    data.append(list(x_to_phys(x5)) + [y])
    #data.append(list(x_to_phys(x)) + [y])
    
df = pd.DataFrame(data)
df.columns = ['su', 'D', 't', 'd', 'l', 'y_true']
#df['y_mean'] = df['y_true'] 
#df['y_var'] = np.ones(df.shape[0])*0.001
df['D_t'] = df['D'] / df['t'] 

df.head()

In [None]:
# Histogram of output
%matplotlib inline
df.hist(column = 'y_true')

In [None]:
import seaborn as sns

sns.pairplot(df[df['y_true'] < 400])

## 2. Emulation (no noise) in $\mathbb{R}^{4}$

### 2.1. Generate synthetic training data

In [3]:
# Design data 
n_samples = 10
input_dim = 3

x_design = pyDOE.lhs(input_dim, samples = n_samples, criterion = 'maximin', iterations = 1000)
#y_design = fun_mult(x_design) # No Noise

noise_std = 2
y_design = fun_mult(x_design) + np.random.normal(0, noise_std, n_samples) # With noise


### 2.2. Define GP model (without constraints)

In [4]:
# Set up model
ker = kernel_RBF(variance = 1, lengthscale = [1]*input_dim)
#model = GPmodel(kernel = ker, likelihood = 1E-6, mean = 0) # No Noise
model = GPmodel(kernel = ker, likelihood = 1, mean = 0) # With noise

# Training data
model.X_training = x_design
model.Y_training = y_design

In [5]:
# Optimize
model.kernel.variance = 1
model.likelihood = 1
model.kernel.lengthscale = [1]*input_dim
model.optimize(include_constraint = False, fix_likelihood = False)
print(model)

..Running optimization for unconstrained GP ... DONE - Total time: 0.197 seconds
----- GP model ----- 
 mean = 0 
 likelihood = 4.55799382957736 
 kernel: 
   type = RBF 
   input dim = 3 
   lenghtscale = [2.55161511e+04 5.15623108e-01 7.05739028e+00] 
   variance = 2058.3055437881408 
 constraint: 
   No constraints 
---------------------


#### Test the unconstrained model 

In [6]:
# Helper function - put test data in dataframe
def get_diagnostics_df(x_test, y_test, mean, var):
    df = pd.DataFrame(x_test)
    df.columns = ['x' + str(i+1) for i in range(x_test.shape[1])]
    df['y_true'] = y_test
    df['y_mean'] = mean
    df['y_var'] = var
    
    return df

In [7]:
# Test data
n_test = 100
x_test = np.random.rand(n_test, input_dim) # Uniform samples
y_test = fun_mult(x_test)

# Run unconstrained model
mean_unconstr, var_unconstr = model.calc_posterior_unconstrained(x_test, full_cov = False)
mean_unconstr = np.array(mean_unconstr).flatten()
var_unconstr = np.array(var_unconstr).flatten()

# Collect data in df
df_unconstr = get_diagnostics_df(x_test, y_test, mean_unconstr, var_unconstr)


..Running calculation of K_w ... SKIP - (cached)
..Running calculation of Cholesky factor for K_w ... SKIP - (cached)
..Calculating f* | Y ... DONE - Total time: 0.002 seconds


In [8]:
# View first couple of records of dataframe with testing data
df_unconstr.head()

Unnamed: 0,x1,x2,x3,y_true,y_mean,y_var
0,0.717828,0.320467,0.244217,27.70312,24.771981,2.147918
1,0.211704,0.740406,0.876343,17.309821,20.561843,2.310085
2,0.761446,0.871282,0.921895,17.489369,18.297977,4.066268
3,0.56358,0.887058,0.776673,16.03409,17.720581,2.964932
4,0.936686,0.348845,0.824947,31.296919,26.796989,2.165749


In [9]:
# Plot diagnostics
#figs = gp_diagnostics(df_unconstr, y_name = 'y', subplots = True)
#for fig in figs:
#    pltlyoff.iplot(fig, filename='')

figs = gp_diagnostics(df_unconstr, y_name = 'y', subplots = False)
pltlyoff.iplot(figs[1], filename='')

In [10]:
# Some functions for test evaluation

def fQ2(y_pred, y_test):
    """
    Compute Q2 = determination coefficient (R2) computed from a test sample (y_pred, y_test)
    """

    # Sum of squares of residuals
    SS_res = ((y_pred - y_test)**2).sum()

    # Total sum of squares
    y_test_avg = y_test.mean()
    SS_tot = ((y_test - y_test_avg)**2).sum()

    #Q2
    Q2 = 1 - SS_res/SS_tot

    return Q2

def fPVA(y_pred, y_pred_var, y_test):
    """
    Return Predictive Variance Adequation (PVA)
    """
    # Sum of sqares of scaled residuals
    SS_res_var = (((y_pred - y_test)**2)/(y_pred_var**2)).sum()

    PVA = np.abs(np.log((1/y_pred.shape[0])*SS_res_var))
    
    return PVA

In [11]:
# Compute Q2 and PVA
Q2 = fQ2(mean_unconstr, y_test)
PVA = fPVA(mean_unconstr, var_unconstr, y_test)

print('Q2', Q2)
print('PVA', PVA)

Q2 0.9376193805627216
PVA 0.3092589703718982


#### Plot a 1D slice of the model

In [None]:
plot_x_dim = 1 # Dimension to plot
x_base_val = 0.1 # All other x has this value

# Test input
px_test = np.linspace(0, 1, 100)
px_test_arr = np.ones((len(px_test), input_dim))*x_base_val
px_test_arr[:,plot_x_dim] = px_test

# True function
y_true = fun_mult(px_test_arr)

# From GP
mean_unconstr, cov_unconstr = model.calc_posterior_unconstrained(px_test_arr, full_cov = True)
mean_unconstr = np.array(mean_unconstr).flatten()
var_unconstr = np.diagonal(cov_unconstr)

num_samples = 10
show_samplepaths = True
samplepaths_unconstr = []
if show_samplepaths: samplepaths_unconstr = np.random.multivariate_normal(mean_unconstr, cov_unconstr, num_samples).T

fig_unconstr_slice = PlotGP2d(x_mean = px_test, mean = mean_unconstr, var = var_unconstr,
                        x_true = px_test, y_true = y_true,
                        samplepaths = samplepaths_unconstr,
                        title = 'f(x) as a function of x[{0}] where x[i] = {1} for i != {0}'.format(plot_x_dim, x_base_val), xrange = [0, 1], smoothing = False)

pltlyoff.iplot(fig_unconstr_slice, filename='')

### 2.2.3. Include the constraints

In [12]:
def constant_function(val):
    """ Return the constant function"""
    def fun(x):
        return np.array([val]*x.shape[0])
    
    return fun

In [13]:
# Create derivative constraints
num_constr = input_dim # Set number of constraints

constr_deriv = [
    Constraint(LB = constant_function(0), UB = constant_function(float('Inf'))),
    Constraint(LB = constant_function(float('-Inf')), UB = constant_function(0)),
    Constraint(LB = constant_function(0), UB = constant_function(float('Inf'))),
    Constraint(LB = constant_function(float('-Inf')), UB = constant_function(0)),
    Constraint(LB = constant_function(float('-Inf')), UB = constant_function(0))
]

constr_deriv = constr_deriv[:num_constr]

In [14]:
# Add constraints to model
model.constr_deriv = constr_deriv
model.constr_likelihood = 1E-6

In [15]:
print(model)

----- GP model ----- 
 mean = 0 
 likelihood = 4.55799382957736 
 kernel: 
   type = RBF 
   input dim = 3 
   lenghtscale = [2.55161511e+04 5.15623108e-01 7.05739028e+00] 
   variance = 2058.3055437881408 
 constraint: 
   df/dx_1 [0], df/dx_2 [0], df/dx_3 [0] 
   constr_likelihood = 1e-06 
---------------------


In [None]:
# Find initial seed-set of XV points
#model.reset()
#df = model.initiate_XV_LHS(n_lhs = 100, p_target = 0.9, nu = 0, add_points_to_model = True, lhs_iterations = 100)
#print(model)
#print('constr prob', model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000))

In [16]:
# Search for a suitable set of virtual observation locations where the constraint is imposed -- finite search
# Finite dim searches
Omega = np.random.uniform(size = (1000, input_dim))
df = model.find_XV_subop(p_target = 0.9, Omega = Omega, batch_size = 1000, sampling_alg = 'minimax_tilting', num_samples = 1000,
                         max_iterations = 10, print_intermediate = True)

Searching for points XV s.t. P(a - nu < Lf < b + nu) > p_target = 0.9 for Lf = [df/dx_1, df/dx_2, df/dx_3] and nu = 1.2815515655446004e-06 ...
i = 2, XV[1] = [0.6361293  0.6434924  0.00131754], prob = 0.3812943014149657, acc. rate = 1, optimization time = 0.309 seconds
i = 2, XV[2] = [0.93164387 0.99856008 0.15200119], prob = 0.14457739214445287, acc. rate = 1, optimization time = 1.230 seconds
i = 1, XV[3] = [0.8678472  0.00562614 0.96134228], prob = 0.5003131930533155, acc. rate = 0.3235187106951008, optimization time = 1.331 seconds
i = 1, XV[4] = [0.93164387 0.99856008 0.15200119], prob = 0.543212798274548, acc. rate = 0.1618380453265765, optimization time = 1.219 seconds
i = 3, XV[5] = [0.01963152 0.99989369 0.90741471], prob = 0.8030848453840539, acc. rate = 0.08702484286313883, optimization time = 1.283 seconds
i = 1, XV[6] = [0.1147988  0.50623862 0.0185873 ], prob = 0.8427798528154834, acc. rate = 0.06954931453949922, optimization time = 1.112 seconds
i = 2, XV[7] = [0.3870549

In [18]:
Omega = np.random.uniform(size = (10000, input_dim))
df = model.find_XV_subop(p_target = 0.9, Omega = Omega, batch_size = 1500, sampling_alg = 'minimax_tilting', num_samples = 1000,
                         max_iterations = 10, print_intermediate = True)

Searching for points XV s.t. P(a - nu < Lf < b + nu) > p_target = 0.9 for Lf = [df/dx_1, df/dx_2, df/dx_3] and nu = 1.2815515655446004e-06 ...
DONE - Found 0 points. Min. constraint prob = 0.9329186351736822. Total time spent = 12.576 seconds


In [17]:
df

Unnamed: 0,num_Xv,update_constr,Xv[1],Xv[2],Xv[3],pc_2,pc_3,pc_4,acc_rate
0,0,2,0.636129,0.643492,0.001318,0.500296,0.381294,0.536503,1.0
1,1,2,0.931644,0.99856,0.152001,0.50048,0.144577,0.408998,1.0
2,2,1,0.867847,0.005626,0.961342,0.500313,0.880523,0.79306,0.323519
3,3,1,0.931644,0.99856,0.152001,0.543213,0.878243,0.801271,0.161838
4,4,3,0.019632,0.999894,0.907415,0.844264,0.881803,0.803085,0.087025
5,5,1,0.114799,0.506239,0.018587,0.84278,0.86206,0.922311,0.069549
6,6,2,0.387055,0.663702,0.984465,0.886654,0.847297,0.923288,0.055196
7,7,1,0.019632,0.999894,0.907415,0.887996,0.948126,0.932312,0.047066
8,8,1,0.755719,0.001756,0.098023,0.888169,0.947846,0.934734,0.038759
9,9,3,0.867847,0.005626,0.961342,0.95512,0.942204,0.927605,0.031872


In [19]:
# Search for a suitable set of virtual observation locations where the constraint is imposed
df = model.find_XV_subop(bounds =  [(0, 1)]*input_dim, p_target = 0.9, 
                         max_iterations = 3,
                         min_prob_unconstr_xv = -1, opt_method = 'differential_evolution')

Searching for points XV s.t. P(a - nu < Lf < b + nu) > p_target = 0.9 for Lf = [df/dx_1, df/dx_2, df/dx_3] and nu = 1.2815515655446004e-06 ...
DONE - Found 0 points. Min. constraint prob = 0.9286118828405283. Total time spent = 5.913 seconds


In [None]:
# Search for a suitable set of virtual observation locations where the constraint is imposed
df = model.find_XV_subop(bounds =  [(0, 1)]*input_dim, p_target = 0.9, 
                         max_iterations = 10, moment_approximation = False, num_samples = 100,
                         min_prob_unconstr_xv = -1, opt_method = 'shgo', print_intermediate = False)

In [None]:
print(model)
model.reset()
print('constr prob', model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000))

In [None]:
def print_constr_loglik(model, intermediate = True):
    v_loglik_unconstr = model._loglik_unconstrained() # P(Y)
    v_loglik_constr = np.log(model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000)) # P(C|Y)
    v_loglik_constr_cond = np.log(model.constrprob_Xv(posterior = False, algorithm = 'minimax_tilting', n = 1000)) # P(C)
    
    if intermediate:
        print('P(Y)', v_loglik_unconstr)
        print('P(C|Y) {} ({})'.format(v_loglik_constr, np.exp(v_loglik_constr)))
        print('P(C) {} ({})'.format(v_loglik_constr_cond, np.exp(v_loglik_constr_cond)))
        print('P(Y|C) {}'.format(v_loglik_unconstr + v_loglik_constr - v_loglik_constr_cond))
    
    print('P(Y, C)', v_loglik_unconstr + v_loglik_constr)

In [None]:
model.reset()
print_constr_loglik(model)

In [None]:
model._EM_update(fix_likelihood = False, bounds = None, n = 100, verbatim = True)
print(model)
print_constr_loglik(model)

In [None]:
# Optimize constrained

bounds = [(1e-1, None)]*6

model._optimize_constrained(fix_likelihood = True, conditional = False, opt_method = 'L-BFGS-B', 
                            algorithm = 'minimax_tilting', n = 100, bounds = bounds)
print(model)

In [None]:
# Optimize
model.optimize(include_constraint = False, fix_likelihood = True)
print(model)

In [None]:
#ker = kernel_RBF(variance = 1, lengthscale = [1]*input_dim)

model.kernel.variance = 1
model.kernel.lengthscale = [1]*input_dim

print(model)

In [None]:
t0 = time.time()
p = model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000)
print('p', p)
print('time', time.time() - t0)

In [None]:
t0 = time.time()
p = model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000)
print('p', p)
print('time', time.time() - t0)

## Plot likelihood

In [None]:
def loglik_unconstr(x):
    i = 1
    
    tmp = model.kernel.lengthscale[i]
    model.kernel.lengthscale[i] = x
    
    model.reset()
    res = model._loglik_unconstrained()
    
    model.kernel.lengthscale[i] = tmp
    model.reset()
    
    return res

def loglik_constr(x):
    i = 1
    
    tmp = model.kernel.lengthscale[i]
    model.kernel.lengthscale[i] = x
    
    model.reset()
    
    v_loglik_unconstr = model._loglik_unconstrained() # P(Y)
    v_loglik_constr = np.log(model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 100)) # P(C|Y)
            
    res = v_loglik_unconstr + v_loglik_constr # P(Y, C)
    
    model.kernel.lengthscale[i] = tmp
    model.reset()
    
    return res

In [None]:
# Optimize unconstrained
model.optimize(include_constraint = False, fix_likelihood = True)
print(model)

In [None]:
# Compute likelihood
#x = np.linspace(5, 40, 20)
x = np.linspace(0.1, 0.35, 30)
v_loglik_unconstr = np.array([loglik_unconstr(xx) for xx in x])

In [None]:
import time
t0 = time.time()
v_loglik_constr = np.array([loglik_constr(xx) for xx in x])
print(time.time() - t0)

In [None]:
t0 = time.time()
p = model.constrprob_Xv(posterior = True, algorithm = 'minimax_tilting', n = 1000)
print('p', p)
print('time', time.time() - t0)

In [None]:
# Plot likelihood
trace_P_Y = go.Scatter(x = x, y = v_loglik_unconstr, mode = 'lines', name = 'ln P(Y)')
trace_P_YC = go.Scatter(x = x, y = v_loglik_constr, mode = 'lines', name = 'ln P(Y, C)')

data = [trace_P_Y, trace_P_YC]
layout = go.Layout(title = 'log-likelihood', xaxis=dict(title = 'lengthscale'), yaxis=dict(title = 'log likelihood'))
fig = go.Figure(data = data, layout = layout)
pltlyoff.iplot(fig, filename='')

In [None]:
pltlyoff.iplot(fig, filename='')

In [None]:

# Set optimization bounds manually
bound_lik = None
bound_ker_var = (100, 5000)
bound_ker_len = [(0.1, 10)]*input_dim

bounds = [bound_lik] if bound_lik is not None else []
bounds = bounds + [bound_ker_var] + bound_ker_len



In [None]:
# Optimize constrained
#model.constr_likelihood = 1E-1
#model.likelihood = 0.1
#model.kernel.lengthscale = [1, 1, 1, 1, 1]
#model.kernel.variance = 1

#model.reset()
#model.optimize(include_constraint = True, conditional = True, fix_likelihood = True, bound_min = 0.1, pc_alg = 'minimax_tilting', n = 10)


# Set optimization bounds manually
bound_lik = None
bound_ker_var = (900, 5000)
bound_ker_len = [(0.1, 10)]*input_dim

bounds = [bound_lik] if bound_lik is not None else []
bounds = bounds + [bound_ker_var] + bound_ker_len

# Optimize
model.reset()
model._optimize_constrained(fix_likelihood = True, opt_method = 'differential_evolution', algorithm = 'minimax_tilting',
                            n = 10, conditional = True, bounds = bounds)

print(model)

In [None]:
print(model)

In [None]:
print(model)

In [None]:
model.constr_likelihood = 1E-1
model.likelihood = 0.1
model.kernel.lengthscale = [1, 1, 1, 1, 1]

print(model.kernel.likelihood)
print(model.kernel)

In [None]:
# Optimize hyperparameters
#model.optimize(include_constraint = True, fix_likelihood = True, n = 10)
#opt_args = {'options' : {'maxtime ':10}}
opt_args = {}
model._optimize_constrained(fix_likelihood = True, opt_method = 'shgo', algorithm = 'minimax_tilting', n = 10, opt_args = opt_args)
print(model)

In [None]:
model.kernel.variance = 10000
model.reset()
print(model)

In [None]:
# Print constraint probability P(C)
model.constrprob_Xv(n = 10)

#### Test the constrained model 

In [21]:
percentiles = [0.025, 0.5, 0.975]
mean, var, perc, mode, samples = model.calc_posterior_constrained(x_test, compute_mode = False, num_samples = 10000, save_samples = 30, algorithm = 'minimax_tilting', resample = False)
mean = np.array(mean).flatten()
lower = perc[0]
upper = perc[2]
var = np.array(var).flatten()

..Running calculation of K_w ... SKIP - (cached)
..Running calculation of Cholesky factor for K_w ... SKIP - (cached)
..Running preparation step 1 - dependence on (XS, X) ... DONE - time: 0.002 seconds
..Running preparation step 2 - dependence on (XV, X) ... SKIP - (cached)
..Running preparation step 3 - dependence on (XS, XV, X) ... DONE - time: 0.002 seconds
..sampling 10000 times from truncated constraint distribution C~|C, Y DONE - time: 0.150 seconds
..sampling 10000 times from constrained GP f*|C, Y DONE - time: 0.085 seconds
..computing statistics from samples DONE - time: 0.042 seconds
 DONE - Total time: 0.289 seconds


In [22]:
pltlyoff.iplot(figs[1], filename='')

In [23]:
fig = pred_vs_error_perc(mean, lower, upper, y_test, 95, title = ' With constraints')
pltlyoff.iplot(fig, filename='')
print('P(C)', model.constrprob_Xv(n = 100))

P(C) 0.030861417317692576


In [24]:
# Compute Q2 and PVA
Q2 = fQ2(mean, y_test)
PVA = fPVA(mean, var, y_test)

print('Q2', Q2)
print('PVA', PVA)

Q2 0.9516040563990925
PVA 0.5240033881642178


In [None]:
# Show plot as static image
Image(pio.to_image(fig, width=700, height=500, scale=1, format='png'))

#### Plot a 1D slice of the constrained model

In [None]:
# Plot constrained
mean, var, perc, mode, samples = model.calc_posterior_constrained(px_test_arr, compute_mode = False, num_samples = 1000, algorithm = 'minimax_tilting', resample = False)

p_lower = perc[0]
p_upper = perc[2]
p_label = '[p{}, p{}] conf.'.format(10, 90)

samplepaths_Z = []
if show_samplepaths: 
    samplepaths_Z = np.array(samples)

fig_constrained_1 = PlotGP2d(x_mean = px_test, mean = np.array(mean).flatten(),
                        samplepaths =  samplepaths_Z,
                        x_true = px_test, y_true = y_true,
                        p_lower = p_lower, p_upper = p_upper, p_label = p_label,
                        title = 'f(x) as a function of x[{0}] where x[i] = {1} for i != {0}'.format(plot_x_dim, x_base_val))

pltlyoff.iplot(fig_unconstr_slice, filename='')
pltlyoff.iplot(fig_constrained_1, filename='')

In [None]:
# Show plot as static image
Image(pio.to_image(fig_constrained_1, width=700, height=500, scale=1, format='png'))

In [None]:
y_pred = mean
var = var
y_test = y_test

Q2 (mean, y_test)

PVA(y_pred, var, y_test)


In [None]:
def Q2(y_pred, y_test):
    """
    Compute Q2 = determination coefficient (R2) computed from a test sample (y_pred, y_test)
    """

    # Sum of squares of residuals
    SS_res = ((y_pred - y_test)**2).sum()

    # Total sum of squares
    y_test_avg = y_test.mean()
    SS_tot = ((y_test - y_test_avg)**2).sum()

    #Q2
    Q2 = 1 - SS_res/SS_tot

    return Q2

def PVA(y_pred, y_pred_var, y_test):
    """
    Return Predictive Variance Adequation (PVA)
    """
    # Sum of sqares of scaled residuals
    SS_res_var = (((y_pred - y_test)**2)/(var**2)).sum()

    PVA = np.abs(np.log((1/y_pred.shape[0])*SS_res_var))
    
    return PVA



In [None]:
import time

t0 = time.time()
p_c, x_min = model._argmin_pc_subop_finite(1, 0, Omega, batch_size = 10000, verbatim = True)
print(time.time() - t0)

In [None]:
t0 = time.time()
p_c, x_min = model._argmin_pc_subop_finite(1, 0, Omega, batch_size = 2000, verbatim = True)
print(time.time() - t0)

In [None]:
Omega = np.random.uniform(size = (1000, input_dim))
tests = [100, 150, 1000]

for k in range(len(tests)):
    t0 = time.time()
    p_c, x_min = model._argmin_pc_subop_finite(1, 0, Omega, batch_size = tests[k], verbatim = False)
    t = time.time() - t0
    
    print('size = {}, time = {}'.format(tests[k], t))

In [None]:
Minimum probability = 0.9560970340294394, at x = [0.64544941 0.00645169 0.96327622 0.03918661 0.4968492 ]

In [None]:
batch_size = 8

assert batch_size <= Omega.shape[0], 'batch_size must be less than number of elements in Omega'

num_intervals, rem = np.divmod(Omega.shape[0], batch_size)

pc = []
for j in range(num_intervals): 
    pc += list(model._constrprob_xs_1(Omega[j*batch_size:(j+1)*batch_size], i = 1, nu = 0))

if rem != 0:
    pc += list(model._constrprob_xs_1(Omega[-rem:], i = 1, nu = 0))

print(pc)


In [None]:
pc = np.array(pc)

pc.argmin()

In [None]:
[0.5178513368409448, 0.5192353752860701, 0.5070947980832128, 0.5072131006293549, 0.5167014232261302, 0.5201002825557193, 0.5089747632329253, 0.5167196372476984, 0.5196718297398271, 0.5214311630911044]