In [4]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import dill
import gzip
import logging

sys.path.append(os.path.abspath('../../surmise/emulationmethods'))
from AKSGP import Emulator

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        # logging.FileHandler('emulator_train.log', mode='w'),  # Log to file
        logging.StreamHandler()  # Log to console
    ]
)
logger = logging.getLogger(__name__)

ModuleNotFoundError: No module named 'dill'

# Mock data

In [7]:
from sklearn.datasets import make_friedman1

X, y = make_friedman1(n_samples=50, noise=0.0, random_state=0)
print(X.shape)
print(y.shape)


(50, 10)
(50,)


# Train and predict

### AKSGP

In [8]:
emu = Emulator(X, Y_mean=y, Y_std=None)
emu.fit(kernel='RBF', nrestarts=5, seed=13)

INFO:AKSGP:Training GPs with RBF kernel...

INFO:AKSGP:  Standardizing input space...
INFO:AKSGP:  Standardizing data...
INFO:AKSGP:Kernel after GP training for output dimension 0:
13.1**2 * RBF(length_scale=[4.09, 4.57, 9.73, 45.3, 92.2, 1e+03, 1e+03, 1e+03, 1e+03, 1e+03])
  Log-marginal-likelihood: 20.46249684566002



In [9]:
Xtest, ytest = make_friedman1(n_samples=10, noise=0.0, random_state=0)
means, std_devs = emu.predict(Xtest)
print("% error:\n", (1.0 - means/ytest.reshape(-1, 1))*100)

% error:
 [[ 7.30261573e-07]
 [ 2.78903603e-05]
 [-5.03119549e-05]
 [ 8.59131366e-07]
 [ 3.25264520e-05]
 [ 1.14952714e-08]
 [-1.13978809e-06]
 [-7.49448623e-06]
 [ 7.31229521e-08]
 [-1.35619645e-05]]


### PCGP / PCSK 

In [24]:
from surmise.emulation import emulator
import surmise
print(surmise.__version__)

import numpy as np

f = y.reshape(1, y.shape[0])
ftest = ytest.reshape(1, ytest.shape[0])
prior = {'min': np.min(y),
         'max': np.max(y)
        }# should not be required but required in PCGPR, rename possibly..

# !! PCGPR has hard-coded bugs... abandoning for now

xloc=np.array((1,))

emus = {}
for method in ['PCGP', 'PCSK']:
    if method == 'PCGPR':
        args = {'prior': prior}
    elif method == 'PCSK':
        args = {'simsd': 1e-3 * np.ones_like(f)}
    else:
        args= {}
    emus[method] = emulator(x=xloc, theta=X, f=f, method=method, args=args)

for method, emu in emus.items():
    print(method)
    pred = emu.predict(x=xloc, theta=Xtest)
    print('squared error: {:.6E}'.format(np.mean((pred.mean() - ftest)**2)))

0.2.2.dev3+g39c3ec4
PCGP
squared error: 2.837052E-05
PCSK
squared error: 8.272982E-11


## Pb-Pb dataset

In [5]:
from surmise.emulation import emulator
import numpy as np
# Load training data
train_dir = 'simulation_data/Grad_Pb-Pb-2760GeV/train'
X = np.loadtxt(os.path.join(train_dir, 'X.txt'))
Ymean = np.loadtxt(os.path.join(train_dir, 'Ymean.txt'))
Ystd = np.loadtxt(os.path.join(train_dir, 'Ystd.txt'))


# Load testing data
test_dir = 'simulation_data/Grad_Pb-Pb-2760GeV/test'

Xval = np.loadtxt(os.path.join(test_dir, 'X.txt'))
Ymeanval = np.loadtxt(os.path.join(test_dir, 'Ymean.txt'))
Ystdval = np.loadtxt(os.path.join(test_dir, 'Ystd.txt'))

In [12]:
Ymean.shape

(485, 110)

In [13]:
# x refers to the observable indices

xloc = np.arange(Ymean.shape[1])

In [16]:
emus = {}
for method in ['PCGP', 'PCSK']:
    if method == 'PCGPR':
        args = {'prior': prior}
    elif method == 'PCSK':
        args = {'simsd': Ystd.T[:10,:100]}  # truncated for speed
    else:
        args= {}
    emus[method] = emulator(x=xloc[:10], theta=X[:100], f=Ymean.T[:10,:100], method=method, args=args)

In [19]:
for method, emu in emus.items():
    print(method)
    pred = emu.predict(x=xloc[:10], theta=Xval)
    predmean = pred.mean()
    predvar = pred.var()
    print('squared error: {:.6E}'.format(np.mean((predmean - Ymeanval.T[:10])**2)))

PCGP
squared error: 4.281714E+03
PCSK
squared error: 7.834373E+03


# LCGP

In [13]:
import lcgp

np.random.seed(0)

n = X.shape[0]
train_inds = np.random.choice(np.arange(n), int(4*n/5)) 
test_inds = np.setdiff1d(np.arange(n), train_inds)

In [14]:
theta_train = X[train_inds]
f_train = Ymean[train_inds]
theta_test = X[test_inds]
f_test = Ymean[test_inds]

xloc = np.arange(Ymean.shape[1])

In [None]:
model = lcgp.LCGP(y=f_train.T, x=theta_train)

model.fit(verbose=1)