# Gaussian Process Surrogate Modeling for Molecular Dynamics Optimization

## Imports and Functions

In [None]:
# Math Packages
import numpy as np
import torch as torch
from scipy import interpolate
from scipy.optimize import minimize
import time as time
import pandas as pd

# Plotting
import matplotlib.pyplot as plt  
from matplotlib.pyplot import figure

# Data saving packages
from pickle import dump, load

# Parallelization
from dask import config as cfg
import dask
from dask.distributed import Client, progress
from multiprocessing import Pool
import multiprocessing as mp
import os
mp.set_start_method('fork')
os.environ["OMP_NUM_THREADS"] = "1" 
import emcee

In [None]:
def se_kernel(x1, x2, l, width):
    """
    Computes the squared exponential kernel between the tensors x and y with hyper-parameters l and width.
    N corresponds to the number of samples and D corresponds to the number of dimensions of the input function.
    
    Parameters
    ----------
    
    x: Tensor [N,D]
        Feature vector for N samples with D dimensions each
    
    y: Tensor [N,D]
        Feature vector for N samples with D dimensions each
        
    l: Tensor [D]
        Lengthscale hyper parameter
        
    width: Float
        Width hyper parameter
    """
    K = width**2 * torch.exp(-(torch.cdist(x1/l,x2/l,p=2)**2)/2)
    return K

def surrogate(Xi, Xd, l, width, y, KddInv):
    """
    Computes the gaussian process estimate of the structure factor given a set of pair potential parameters. 
    
    Parameters
    ----------
    
    Xi: Tensor [η*M,D]
        Feature vector for M potential samples at η r evaluations with D dimensions each. This 
        corresponds to the points you wish to infer.
    
    Xd: Tensor [N,D]
        Feature vector for N samples with D dimensions each. This corresponds to the points you trained on.
        
    l: Tensor [D]
        Lengthscale hyper parameter.
        
    width: Float
        Width hyper parameter
        
    y: Tensor [N,1]
        Output feature vector corresponding to the Xd training set.  
        
    KddInv: Tensor [N,N]
        This is the inverted kernel matrix of the training set Xd
       
    Returns
    -------
       
    μ: Tensor [η,N]
        The mean estimate for each g(r,n,sigma,epsilon) given in Xi. 
        
    """
    V = torch.stack([((n/(n-6))*((n/6)**((6)/(n-6))))*e*((s/r)**n - (s/r)**6) for n,s,e in zip(Xi[:,0],Xi[:,1],Xi[:,2])])
    μ = torch.exp(-V/kbT)
    Kid = se_kernel(Xi, Xd, l, width)

    return 1 +  (Kid @ KddInv @ (y-1))

def local_surrogate(Xi, Xd, l, width, y, KddInv, μd):
    """
    Computes the subset gaussian process estimate of the structure factor given a set of pair potential parameters. 
    
    Parameters
    ----------
    
    Xi: Tensor [M,D]
        Feature vector for M samples with D dimensions each. This corresponds to the points you wish to infer.
    
    Xd: Tensor [M,D]
        Feature vector for N samples with D dimensions each. This corresponds to the points you trained on.
        
    l: Tensor [D]
        Lengthscale hyper parameter.
        
    width: Float
        Width hyper parameter
        
    KddInv: Tensor [N,N]
        This is the inverted kernel matrix of the training set Xd
        
    Returns
    -------
    
    μ: Tensor [η,N]
        The mean estimate for each g(r) at each potential. The g(r)'s are organized in vertical lines where the column
        dimension indexes the potential parameters. 
        
    """
    V = torch.stack([((n/(n-6))*((n/6)**((6)/(n-6))))*e*((s/r)**n - (s/r)**6) for n,s,e in zip(Xi[:,0],Xi[:,1],Xi[:,2])])
    μ = torch.exp(-V/kbT)
    Kid = se_kernel(Xi, Xd, l, width)
    return (μ +(Kid @ KddInv) @ (y-μd)).T

## Importing the training data

In [None]:
# Import the training data generated by 1_sample_gen.ipynb

rmin = 0.0118331810091873
rmax = 15.512161254882812
rnum = 73

k   = 3.29982916e-27 #boltzmann constant,   [kcal/K/particle]   
av  = 6.0223e23      #avagadro number,      [particle/mol]  
T   = 42.2           #temperature,          [K]
kbT = k * av * T

input_dict = load(open('training_data/xs.p', 'rb'))
xd = input_dict['xs'].reshape(3*320,3)

input_dict = load(open('training_data/rdfs.p', 'rb'))
model_rdf = input_dict['rdfs']

r = torch.linspace(rmin,rmax,rnum)

In [None]:
experimentalCSVFilename = 'exp_data/ne_42K_rdf.csv'

data = pd.read_csv(experimentalCSVFilename)
r_exp_raw = np.array(data['r'])
rdf_exp_raw = np.array(data[' g'])

rmin = 0.0118331810091873
rmax = 15.512161254882812
rnum = 73

# Interpolalate the experimental data to make it consistent with the simulations
r  = torch.tensor(np.linspace(rmin, rmax, num=rnum))
rdf_exp_i = interpolate.splrep(r_exp_raw, rdf_exp_raw, s=0)
rdf_exp = torch.from_numpy(interpolate.splev(r, rdf_exp_i, der=0))

print("Old length: ", len(r_exp_raw))
print("New length: ", len(r))

figure(figsize = (12,10),dpi=80)
plt.title("Experimental")
plt.scatter(r_exp_raw,rdf_exp_raw,alpha=0.4)
plt.plot(r, rdf_exp)
plt.xlim(rmin,rmax)
plt.xlabel("$\AA$")
plt.show()

In [None]:
#Visualization of training set

figure(figsize = (12,10),dpi=80)
plt.title("GP Training Set")
for i in range(len(model_rdf)):
    plt.plot(r,model_rdf[i],alpha=0.4)
plt.xlim(rmin,rmax)
plt.xlabel("$\AA^{-1}$")
plt.show()

fig, axs = plt.subplots(2, 2, figsize=(14, 8))
plt.suptitle('Potential Parameter Distributions')
axs[0,0].scatter(xd[:, 0], xd[:, 1],label="Samples")
axs[0,0].set_xlabel('n')
axs[0,0].set_ylabel('σ')
axs[0,1].scatter(xd[:, 0], xd[:, 2],label="Samples")
axs[0,1].set_xlabel('n')
axs[0,1].set_ylabel('ϵ')
axs[1,0].scatter(xd[:, 1], xd[:, 2],label="Samples")
axs[1,0].set_xlabel('σ')
axs[1,0].set_ylabel('ϵ')
plt.show()

## Basic Matricies for GP Calculations

In [None]:
n = len(xd)
η = len(r)
XdClassic = torch.zeros(n*η,4)
yClassic = torch.zeros(n*η)

k = 0 # Row index in Xd matrix and y vector.
for i in range(n):
    for j in range(η):
        # Xd_k = (n,σ,ϵ,q)
        XdClassic[k] = torch.tensor([xd[i][0],xd[i][1],xd[i][2],r[j]])
        yClassic[k] = model_rdf[i][j]
        k += 1
        
yClassic = torch.unsqueeze(yClassic,dim=0).transpose(0,1)

# Remake the Xd matrix for the subset matrix 
Xd = torch.tensor(xd).float()
y = model_rdf.float()

index = torch.arange(0,len(xd),1)

## Choosing the hyper-parameters based off LOO and LMLH

In [None]:
def g_i(i, θ):
    
    l = torch.tensor([θ[0],θ[1],θ[2]]).float()
    w = torch.tensor(θ[3]).float()
    σn = torch.tensor(θ[4]).float()
    
    Kdd = se_kernel(Xd,Xd,l,w) + torch.eye(len(Xd))*σn
    L = torch.linalg.cholesky(Kdd) # Extract Cholesky decomposition
    KddInv = torch.cholesky_inverse(L) 
    
    qN = KddInv @ y.T[i]
    
    KddInv_ii = torch.diagonal(KddInv, 0)
    
    qNover       = qN/torch.sqrt(KddInv_ii)
    logKddInv_ii = torch.log(KddInv_ii)
    
    g = (1/(2*len(Xd)))*(qNover@qNover) - (1/(2*len(Xd)))*torch.sum(logKddInv_ii) + (1/2)*np.log(2*np.pi)
    
    return g

def g(θ):
    
    l = torch.tensor([θ[0],θ[1],θ[2]]).float()
    w = torch.tensor(θ[3]).float()
    σn = torch.tensor(θ[4]).float()
    
    Kdd = se_kernel(Xd,Xd,l,w) + torch.eye(len(Xd))*σn
    KddInv = torch.linalg.inv(Kdd)
    
    KddInv_ii = torch.diagonal(KddInv, 0)
    
    logKddInv_ii = torch.log(KddInv_ii)
    
    g = (1/(2*len(Xd)))*torch.sum(((KddInv @ y).T/(torch.sqrt(KddInv_ii).repeat(73,1)))**2) - (73/(2*len(Xd)))*torch.sum(logKddInv_ii) + (73/2)*np.log(2*np.pi)
    
    return g

In [None]:
def compute_loo(hyperParamOptions,j_0, j_last):
    
    looμArr = torch.zeros((j_last - j_0,len(Xd),len(r))) 
    
    for j in range(j_0,j_last,1):

        # Calculate Kdd for local GP with hyper parameter index j
        arr = hyperParamOptions[j]
        l = torch.tensor([arr[0],arr[1],arr[2]]).float()
        w = torch.tensor(arr[3]).float()
        σn = torch.tensor(arr[4]).float()
        Kdd = se_kernel(Xd,Xd,l,w) + torch.eye(len(Xd))*σn
    
        looμArr_j = torch.zeros(len(Xd),len(r))

        Vd = torch.stack([((n/(n-6))*((n/6)**((6)/(n-6))))*e*((s/r)**n - (s/r)**6) for n,s,e in zip(Xd[:,0],Xd[:,1],Xd[:,2])])
        μd = torch.exp(-Vd/kbT).float()
    
        # Leave index i out from training and predict it using Local GP
        for i in range(len(Xd)): 
    
            Kdd_i = Kdd[index[index != i]].T[index[index != i]].T
            KddInv_i = torch.linalg.inv(Kdd_i)
    
            # Remove the same values from y
            y_i = y[index != i]
    
            # Again for X data
            Xd_i = Xd[index[index != i]]
            μd_i = μd[index[index != i]]
            Xi = Xd[i].unsqueeze(dim=0)
    
            # Compute the predictions after leaving one out
            looμ = local_surrogate(Xi,Xd_i,l,w,y_i,KddInv_i,μd_i).T
            looμArr_j[i] = looμ
        
        looμArr[j - j_0] = looμArr_j

    output_dict = dict(looμArr = looμArr, hyperParamOptions = hyperParamOptions[j_0:j_last])
    dump(output_dict, open('training_data/hyperparameter/PMF/hyperParams'+str(j_0)+'.p', 'wb'))

    return looμArr, hyperParamOptions

In [None]:
cfg.set({'distributed.scheduler.worker-ttl': None}) # This stops dask from crying when the sims take a long time.
client = Client(n_workers=80)
# https://ondemand.chpc.utah.edu/rnode/notch149.ipoib.int.chpc.utah.edu/8787/status
client

In [None]:
try:
    print("Attempting to load previously calculated hyper parameters")
    
    from pickle import load
    input_dict = load(open('training_data/hyperparameterG/PMF/hyperParams.p', 'rb'))
    results = input_dict['results']
    hyperParamOptions = input_dict['hyperParamOptions']
    print("Success!!!")
    
except:
    print("Failed")
    
    ℓ_nmin = 0.5
    ℓ_nmax = 4
    
    ℓ_σmin = 0.01
    ℓ_σmax = 0.05
    
    ℓ_ϵmin = 0.001
    ℓ_ϵmax = 0.01
    
    w_min = 1e-4
    w_max = 1e-1
    
    σn_min = 1e-4
    σn_max = 1e-2
    
    
    # Creates a set of hyper parameters to compare
    trials = 5_000
    hyperParamOptions = torch.zeros((trials,5))
    hyperParamOptions[:,0] = (ℓ_nmax - ℓ_nmin) * torch.rand(trials) + ℓ_nmin
    hyperParamOptions[:,1] = (ℓ_σmax - ℓ_σmin) * torch.rand(trials) + ℓ_σmin
    hyperParamOptions[:,2] = (ℓ_ϵmax - ℓ_ϵmin) * torch.rand(trials) + ℓ_ϵmin
    hyperParamOptions[:,3] = (w_max  - w_min)  * torch.rand(trials) + w_min
    hyperParamOptions[:,4] = (σn_max - σn_min) * torch.rand(trials) + σn_min

    # Queue up function calls into dask
    lazy_results = []
    for i in range(trials):
        lazy_results.append(dask.delayed(g)(hyperParamOptions[i]))

    print("Queued Lazy Results")

    results = dask.compute(*lazy_results)
        
    output_dict = dict(results = results, hyperParamOptions = hyperParamOptions) # logMarginalLHArr = logMarginalLHArr
    dump(output_dict, open('training_data/hyperparameterG/PMF/hyperParams.p', 'wb'))
    
    print("Done")

In [None]:
torch.argmin(torch.tensor(results))

In [None]:
print('Optimal hyperparameters based on the LOO error')
hyperParamOptions[torch.argmin(torch.tensor(results))]

In [None]:
#outdated method
# try:
#     print("Attempting to load previously calculated hyper parameters")
    
#     from pickle import load
#     input_dict = load(open('training_data/hyperparameter/PMF/hyperParams.p', 'rb'))
#     results = input_dict['results']
#     hyperParamOptions = input_dict['hyperParamOptions']
#     print("Success!!!")
    
# except:
#     print("Failed")
    
#     # Creates a set of hyper parameters to compare
#     hyperParamOptions = torch.zeros((trials,5))
#     hyperParamOptions[:,0] = (ℓ_nmax - ℓ_nmin) * torch.rand(trials) + ℓ_nmin
#     hyperParamOptions[:,1] = (ℓ_σmax - ℓ_σmin) * torch.rand(trials) + ℓ_σmin
#     hyperParamOptions[:,2] = (ℓ_ϵmax - ℓ_ϵmin) * torch.rand(trials) + ℓ_ϵmin
#     hyperParamOptions[:,3] = (w_max  - w_min)  * torch.rand(trials) + w_min
#     hyperParamOptions[:,4] = (σn_max - σn_min) * torch.rand(trials) + σn_min

#     # Storage for the leave one out prediction of local GP
#     looμArr = torch.zeros((len(hyperParamOptions),len(Xd),len(r))) 
    
#     # Queue up function calls into dask
#     lazy_results = []
#     for i in range(int(trials/50)):
#         j_0 = (i*50)
#         j_last = ((i+1)*50)
#         lazy_results.append(dask.delayed(compute_loo)(hyperParamOptions,j_0, j_last))

#     print("Queued Lazy Results")

#     results = dask.compute(*lazy_results)
        
#     output_dict = dict(results = results, hyperParamOptions = hyperParamOptions) # logMarginalLHArr = logMarginalLHArr
#     dump(output_dict, open('training_data/hyperparameter/PMF/hyperParams.p', 'wb'))
    
#     print("Done")

In [None]:
#outdated method
# # Storage for the leave one out prediction of local GP
# looμArr = torch.zeros((len(hyperParamOptions),len(Xd),len(r))) 

# for i in range(int(len(hyperParamOptions)/50)):
#     j_0 = (i*50)
#     j_last = ((i+1)*50)
#     looμArr[j_0:j_last] = results[i][0]

In [None]:
# outdated method
# # Compute the leave one out error for each parameter
# LooErr = torch.zeros(5000)
# for i in range(len(hyperParamOptions[:5000])):
#     LooErr[i] = torch.sum((looμArr[i] - y)**2)
# # Grab the one with the minimum error 
# LooIndex = torch.argmin(LooErr)

# print("Hyperparameters corresponding to the minimum leave-one-out error: ", hyperParamOptions[LooIndex])
# print("Average error per training example per point corresponding to the minimum leave-one-out error: ", np.sqrt(LooErr[LooIndex].item()*(1/960)*(1/len(r))))

## Validation

In [None]:
nsims = 320
ndims = 3
input_dict = load(open('testing_data/xs_test.p', 'rb'))
xs_test = input_dict['xs_test']

input_dict = load(open('testing_data/rdfs_test.p', 'rb'))
rdfs_test_bruh = input_dict['rdfs']

In [None]:
arr = [3.3110e+00, 4.5928e-02, 9.8081e-03, 9.4022e-02, 7.2281e-04]
l = torch.tensor([arr[0],arr[1],arr[2]]).float()
w = torch.tensor(arr[3]).float()
σn = torch.tensor(arr[4]).float()
Kdd = se_kernel(Xd,Xd,l,w) + torch.eye(len(Xd))*σn
KddInv = torch.linalg.inv(Kdd)
Xi = xs_test.float()

Vd = torch.stack([((n/(n-6))*((n/6)**((6)/(n-6))))*e*((s/r)**n - (s/r)**6) for n,s,e in zip(Xd[:,0],Xd[:,1],Xd[:,2])])
μd = torch.exp(-Vd/kbT).float() #PMF prior mean

In [None]:
GPOut = local_surrogate(Xi, Xd, l, w, y, KddInv, μd)

In [None]:
RMSE = torch.sqrt(torch.mean((GPOut.T - rdfs_test_bruh)**2,dim=0))
RMSE_total = torch.sqrt(torch.mean((GPOut.T - rdfs_test_bruh)**2))

print(np.sqrt(torch.sum(RMSE**2/73)))
print(RMSE_total)

In [None]:
err = torch.zeros(320)
for i in range(320):
    err[i] = torch.sqrt(torch.sum((GPOut.T[i] - rdfs_test_bruh[i])**2)/73)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12, 9))
axs[0,0].scatter(xs_test[:,0], err, color = 'r', alpha = 0.6, edgecolors = 'k')
axs[0,0].hlines(0.03,xmin=min(xs_test[:,0]),xmax=max(xs_test[:,0]),color='k',linestyle='dashed')
axs[0,0].set_xlabel('λ', fontsize = 16)
axs[0,0].set_ylabel('RMSE', fontsize = 16)
#axs[0,0].text(10.8, 0.076, '(a)', fontsize = 16)
axs[0,1].hlines(0.03,xmin=min(xs_test[:,1]),xmax=max(xs_test[:,1]),color='k',linestyle='dashed')
axs[0,1].scatter(xs_test[:,1], err, color = 'r', alpha = 0.6, edgecolors = 'k')
axs[0,1].set_xlabel('σ (Å)', fontsize = 16)
axs[0,1].set_ylabel('RMSE', fontsize = 16)
#axs[0,1].text(2.92, 0.076, '(b)', fontsize = 16)
axs[1,0].scatter(xs_test[:,2], err, color = 'r', alpha = 0.6, edgecolors = 'k')
axs[1,0].hlines(0.03,xmin=min(xs_test[:,2]),xmax=max(xs_test[:,2]),color='k',linestyle='dashed')
#axs[1,0].vlines(.15,0,0.15,color='g',linestyle='dashed')
axs[1,0].set_xlabel('ϵ (kcal/mol)', fontsize = 16)
axs[1,0].set_ylabel('RMSE', fontsize = 16)
#axs[1,0].text(0.087, 0.076, '(c)', fontsize = 16)
axs[1,1].plot(r,RMSE, color = 'k', label = 'Mean', linestyle = '-')
axs[1,1].set_xlim(0, r[-1])
axs[1,1].set_xlabel('r (Å)', fontsize = 16)
axs[1,1].set_ylabel('RMSE', fontsize = 16)
axs[1,1].hlines(0.03,xmin=rmin,xmax=rmax,color='k',linestyle='dashed')
axs[1,1].text(13.6, 0.095, '(d)', fontsize = 16)
# plt.savefig('rmse', dpi = 600)
plt.show()

In [None]:
for i in torch.argsort(err)[-15:]:
    plt.plot(r,GPOut.T[i],label="LGP Model")
    plt.plot(r,rdfs_test_bruh[i],label="True")
    plt.title(xs_test[i])
    plt.legend()
    plt.show()

## Timing Code

In [None]:
try:
    input_dict = load(open('training_data/ClassicGPTimesRDF', 'rb'))
    evaluationTimesClassic = input_dict['evaluationTimesClassic']
    inversionTimesClassic = input_dict['inversionTimesClassic']

    print("Loaded previously computed times..")
    
    print("Average inversion time for classic GP:", np.mean(inversionTimesClassic))
    print()

    print("Average evaluation time for classic GP:", np.mean(evaluationTimesClassic))
    print()
    
# Get time taken for regualar GP
except:
    evaluationTimesClassic = [] 
    inversionTimesClassic = []
    N_trialsClassic = 20
    
    print("Timing Kdd inversion...")
    
    for n in range(N_trialsClassic):
        
        t1 = time.time()
        
        # No need to apply good hps here, we only care about the time
        KddClassic = se_kernel(XdClassic,XdClassic,torch.ones(len(XdClassic[0])),1) + 2*torch.eye(len(XdClassic))
        KddInvClassic = torch.linalg.inv(KddClassic)
        
        t2 = time.time()
        
        inversionTimesClassic.append(t2-t1)
    print("Average inversion time for classic GP:", np.mean(inversionTimesClassic))
    print()
    
    print("Timing surrogate evaluation...")

    Xi = XdClassic[:len(r)]
    
    for n in range(N_trialsClassic):
        
        t1 = time.time()
        
        μ = surrogate(Xi,XdClassic,torch.ones(len(XdClassic[0])),1,yClassic,KddInvClassic)
        
        t2 = time.time()
        
        evaluationTimesClassic.append(t2-t1)
        
    print("Average evaluation time for classic GP:", np.mean(evaluationTimesClassic))
    print()

    from pickle import dump
    output_dict = dict(evaluationTimesClassic = evaluationTimesClassic, inversionTimesClassic = inversionTimesClassic)
    dump(output_dict, open('training_data/ClassicGPTimesRDF', 'wb'))

In [None]:
def local_surrogate2(Xi, Xd, l, width, y, KddInv, μd):
    """
    Computes the subset gaussian process estimate of the structure factor given a set of pair potential parameters. 
    
    Parameters
    ----------
    
    Xi: Tensor [M,D]
        Feature vector for M samples with D dimensions each. This corresponds to the points you wish to infer.
    
    Xd: Tensor [M,D]
        Feature vector for N samples with D dimensions each. This corresponds to the points you trained on.
        
    l: Tensor [D]
        Lengthscale hyper parameter.
        
    width: Float
        Width hyper parameter
        
    KddInv: Tensor [N,N]
        This is the inverted kernel matrix of the training set Xd
        
    Returns
    -------
    
    μ: Tensor [η,N]
        The mean estimate for each g(r) at each potential. The g(r)'s are organized in vertical lines where the column
        dimension indexes the potential parameters. 
        
    """
    Kid = se_kernel(Xi, Xd, l, width)
    return (μ +(Kid @ KddInv) @ (y-μd)).T

In [None]:
try:
    input_dict = load(open('training_data/SubsetGPTimesRDF', 'rb'))
    evaluationTimesSubset = input_dict['evaluationTimesSubset']
    inversionTimesSubset = input_dict['inversionTimesSubset']

    print("Loaded previously computed times..")
    print()
    
    print("Average inversion time for subset GP:", np.mean(inversionTimesSubset))
    print()

    print("Average evaluation time for subset GP:", np.mean(evaluationTimesSubset))
    print()

# Get time taken for subset GP
except:
    
    evaluationTimesSubset = [] 
    inversionTimesSubset = []
    
    N_trials = 1000
    
    print("Timing Kdd inversion...")
    for n in range(N_trials):
        
        t1 = time.time()

        KddSubset = se_kernel(Xd,Xd,l,w) + σn*torch.eye(len(Xd))
        KddInvSubset = torch.linalg.inv(KddSubset)
        
        t2 = time.time()
        
        inversionTimesSubset.append(t2-t1)
    print("Average inversion time for subset GP:", np.mean(inversionTimesSubset))
    print()

    XiSubset = torch.tensor([[Xd[0][0],Xd[0][1],Xd[0][2]]])
    
    print("Timing surrogate evaluation...")
    for n in range(N_trials):
        
        t1 = time.time()
        
        μ = local_surrogate2(XiSubset,Xd,l,w,y,KddInvSubset,μd)
        
        t2 = time.time()
        
        evaluationTimesSubset.append(t2-t1)
        
    
    print("Average evaluation time for subset GP:", np.mean(evaluationTimesSubset))
    print()

    from pickle import dump
    output_dict = dict(evaluationTimesSubset = evaluationTimesSubset, inversionTimesSubset = inversionTimesSubset)
    dump(output_dict, open('training_data/SubsetGPTimesRDF', 'wb'))

In [None]:
numMCMCSamps = 100_000
numGridSamps = 50**4
# Note these assume no overhead. 
print("Expected time for model calls during MCMC with subset: " + str(np.round(numMCMCSamps*np.mean(evaluationTimesSubset)/60,4)) +  " Mins")
print("Expected time for model calls during MCMC with regular gp: " + str(np.round(numMCMCSamps*np.mean(evaluationTimesClassic)/60,4)) +  " Mins")
print()
print("Expected time for model calls for grid with subset: " + str(np.round(numGridSamps*np.mean(evaluationTimesSubset)/60/60,4)) +  " Hours")
print("Expected time for model calls for grid with regular gp: " + str(np.round(numGridSamps*np.mean(evaluationTimesClassic)/60/60,4)) + " Hours")
print()
print("Evaluation Speed up Classic vs Subset:", np.mean(evaluationTimesClassic)/np.mean(evaluationTimesSubset))
print("Inversion Speed up Classic vs Subset:", np.mean(inversionTimesClassic)/np.mean(inversionTimesSubset))
print("Evaluation Speed up Classic vs Sim:", 1251 /np.mean(evaluationTimesClassic))
print("Evaluation Speed up Subset vs Sim:", 1251 /np.mean(evaluationTimesSubset))
print()