In [1]:
import gpytorch
import numpy as np
import pandas as pd
import torch

from bo_functions import best_error_advanced
from bo_functions import calc_ei_advanced
from bo_functions import ExactGPModel
from bo_functions import train_GP_model
from bo_functions import calc_GP_outputs
from bo_functions import calc_y_expected

from bo_plotters import plotter_adv
from bo_plotters import basic_plotter
from bo_plotters import ei_plotter_basic
from bo_plotters import y_plotter_adv
from bo_plotters import stdev_plotter_adv
from bo_plotters import error_plotter_adv
from bo_plotters import ei_plotter_adv

# Plotting
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.tri as mtri
import matplotlib

In [2]:
#Pull x and Y data from CSV
#Pull x data from CSV
exp_data_doc = "exp_data.csv"
exp_data = np.array(pd.read_csv(exp_data_doc, header=0,sep=","))
Xexp = exp_data[:,1]
Yexp = exp_data[:,2]

n = len(Xexp)
print(n)
Theta_True = np.array([1,-1])

5


In [3]:
#Create training and test data
train_data_doc = "train_3_in_data.csv"
train_data = np.array(pd.read_csv(train_data_doc, header=0,sep=","))
# print(train_data)
train_theta = train_data[:,1:3]
train_p = torch.tensor(train_data[:,1:4])
train_y = torch.tensor(train_data[:,4])
print(train_p)
print(train_y)

tensor([[ 1.8567e+00,  7.6583e-01,  1.2417e+00],
        [-1.5359e+00, -1.4589e+00,  4.0733e-01],
        [ 1.1929e+00, -1.2549e+00, -1.8451e+00],
        [-1.9292e+00,  8.9024e-01,  1.6229e+00],
        [ 2.7640e-01,  6.0036e-02,  1.7340e+00],
        [ 6.7009e-01, -6.7689e-01,  1.1550e+00],
        [-2.6864e-01,  6.1656e-01, -1.7169e+00],
        [ 8.2577e-01,  1.3867e+00, -3.5378e-01],
        [ 1.0064e+00,  3.3627e-01,  6.2007e-01],
        [-1.6244e-03,  1.2003e-01,  7.8550e-01],
        [-1.3450e+00,  4.2636e-01, -8.2567e-01],
        [-9.8063e-01, -1.3159e-01, -4.0761e-01],
        [-1.0562e+00, -1.9284e+00,  3.4419e-01],
        [-1.7425e+00,  1.0433e+00, -1.5221e-01],
        [-1.3687e+00,  1.9121e+00,  2.0146e-01],
        [-1.6021e-01, -7.7287e-01, -6.7441e-01],
        [ 1.2909e+00,  1.7409e+00, -1.3370e+00],
        [-7.1961e-01, -9.3770e-01,  1.4750e+00],
        [-5.2986e-01,  1.5590e+00,  9.2303e-01]], dtype=torch.float64)
tensor([  5.4011,  -0.8001, -12.7549,   3.4883,

In [4]:
# initialize likelihood and model
##Assumes a homoskedastic noise model p(y | f) = f + noise
likelihood = gpytorch.likelihoods.GaussianLikelihood()

# We will use the simplest form of GP model, exact inference
#Defines our model in terms of the class parameters in bo_functions
model = ExactGPModel(train_p, train_y, likelihood)

In [5]:
#Set number of training iterations and train GP
iterations = 500
train_GP_model(model,likelihood, train_p, train_y, iterations)

In [6]:
# Get into evaluation (predictive posterior) mode
#Puts model in evaluation mode
model.eval()
#Puts likelihood in evaluation mode
likelihood.eval();

In [7]:
#Create Meshgrid
p = 10 #Any bigger than 10 and the kernel just dies
Theta1 = np.linspace(-2,2,p)
Theta2 = np.linspace(-2,2,p)

theta_mesh = np.array(np.meshgrid(Theta1,Theta2))
theta_space = torch.tensor(theta_mesh.T.reshape(-1,2))

In [8]:
theta1_mesh = theta_mesh[0]
theta2_mesh = theta_mesh[1]

# for i in range(p):
#     for j in range(p):
#         print([theta1_mesh[i,j],theta2_mesh[i,j]])

In [9]:
#Define f_bar and f(x)
f_bar = Yexp #(1xn)
#Will compare the rigorous solution and approximation later (multidimensional integral over each experiment using a sparse grid)

#Create an array in which to store expected improvement values
EI = np.zeros((p,p)) #(p1 x p2)
# Loop over theta 1
for i in range(p):
    #Loop over theta2
    for j in range(p):
        #Create array to store error values
        error = np.zeros(n)
        #Loop over Xexp
        for k in range(n):
            #Evaluate GP at a point p = [Theta1,Theta2,Xexp]
            eval_point = []
            eval_point.append([theta1_mesh[i,j],theta2_mesh[i,j],Xexp[k]])
            eval_point = np.array(eval_point)
            GP_Outputs = calc_GP_outputs(model, likelihood, eval_point[0:1])
            model_mean = GP_Outputs[3].numpy()[0] #1xn
            model_variance= GP_Outputs[1].numpy()[0] #1xn
#             print(eval_point,model_mean,model_variance)
            #Compute error for that point
#             print(eval_point,model_mean, f_bar[k])
            error[k] = -(f_bar[k] - model_mean)**2
#             print(error[k])
        #Define best_error as the maximum value in the error array
        best_error = -max(error)
#         print(best_error)
        #Loop over Xexp
        for k in range(n):
            #Caclulate EI for each value n given the best error
#             print(i,j,k)
            eval_point = []
            eval_point.append([theta1_mesh[i,j],theta2_mesh[i,j],Xexp[k]])
            eval_point = np.array(eval_point)
#             print(eval_point)
            GP_Outputs = calc_GP_outputs(model, likelihood, eval_point[0:1])
            model_mean = GP_Outputs[3].numpy()[0] #1xn
            model_variance= GP_Outputs[1].numpy()[0] #1xn
#             print(eval_point,model_mean,model_variance,Yexp[k])
            EI[i,j] += calc_ei_advanced(best_error, model_mean, model_variance, Yexp[k])
#             print(EI[i,j])

print(EI)
print(np.max(EI))
print(np.argmax(EI))
print(theta1_mesh[0,9], theta2_mesh[0,9])
print("The test parameter set that gives the highest expected imrovement is \nTheta1, Theta2 = \n")
# print(test_p_mesh_plot[argmax_ei]) #How should I find this vector?
# print(test_p_mesh_plot)
# print(model_y)

[[1.87762272e-01 4.52375169e-01 1.81228375e-01 2.04051831e-03
  6.30671154e-01 2.71424624e+00 3.01976867e+00 3.34045691e+00
  3.64757492e+00 3.88345846e+00]
 [2.47552278e-02 1.02264517e-01 4.37033933e-01 5.85000099e-05
  4.48207971e-01 9.29343006e-01 1.10714949e+00 1.31538449e+00
  1.54694807e+00 1.77785400e+00]
 [3.20539068e-05 1.09430622e-03 3.66317621e-02 3.69948215e-02
  4.99775967e-02 7.70218406e-02 1.20658231e-01 1.84763339e-01
  2.73816795e-01 3.91745596e-01]
 [2.17647659e-04 4.51695462e-04 2.46560755e-04 1.31375171e-02
  2.21227084e-05 1.04665672e-02 3.89082943e-03 4.13048099e-04
  1.63981357e-05 2.89725450e-03]
 [1.80064002e-02 8.99494493e-02 8.15836347e-02 7.94322862e-03
  1.46725833e-03 6.45120845e-03 4.28394103e-01 4.25978087e-01
  2.67484947e-01 1.19892213e-01]
 [1.00800745e-01 3.17254512e-01 3.18375092e-01 1.56345698e-01
  3.67764039e-02 9.30139524e-04 2.81658657e-07 2.62012539e-02
  6.73906208e-02 8.41862858e-02]
 [1.40441756e-01 4.19092002e-01 4.74529401e-01 3.64826954e

In [10]:
ei_plotter_basic(theta_mesh, EI, Theta_true, Theta_True,train_theta,plot_train=True)

NameError: name 'Theta_true' is not defined

## Analysis of Expected Improvement
 - Expected Improvement is largest farther from the edges
  - This is rational because you can't explore any further than the edges
 - Expected Improvement increases as error decreases
  - This is rational because as error decreases, more exploitation is possible
 - This means we are most likely to sample in the middle, farthet from the edges


## Analysis of Standard Deviation
 - The GP estimates that the standard deviation is lowest at points that were directly tested
  - This can be rationalized by the way that the contour plot is drawn
 - Standard deviation is smallest away from the edges and larger towards them
  - This is rationalized by the fact that there are less neighbors that the GP is tested and trained with at the boundaries
 - The more points that get tested, the more the standard deviations will decrease

## Analysis of Error Magnitude
 - The GP emulator is most inaccurate when all values of $\bar{p}$ are at their maximum. 
  - In general, the GP is less accurate at extreme points, this is rationalized by the fact that there are less neighbors that the GP is tested and trained with at the boundaries
 - The GP emulator is most accurate when x is at it's maximum, but $\bar{\Theta}= 0$
  - This is rationalized by the fact that multiple terms become zero if any of the values of $\bar{p}$ are zero 
 - GP error is mostly very high, as more iterations are added, these will decrease

## Analysis of GP Emulator (Model y)
 - The GP emulator correctly captures that y increases as $\bar{p}$ increases. This tells us that this GP emulator model could be viable
  - The GP emulator correctly estimates where the lowest y is achieved, but not the actual value of y
  - The GP emulator slightly mistakes where the most positive value of y is, and does not predict the actual value of y
 - The model as it is is inaccurate, BO should increase the accuracy of the emulator