Check all hdf5 groups and attributes with:

h5dump -n 1 lalsuitetest.hdf5

This prints the description attribute:

h5dump -a description lalsuitetest.hdf5 

In [1]:
%pylab inline

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

np.set_printoptions(precision=6, linewidth=110)

Populating the interactive namespace from numpy and matplotlib


In [2]:
import h5py

In [3]:
sys.path.insert(0, '../../src')

import waveform as wave
import waveformset as ws
import trainingset as train
import taylorf2 as f2
import gaussianprocessregression as gpr
import designofexperiment as doe
import lalwaveform
import plotparams
import greedy
import empiricalinterpolation as eim
import surrogate
import diagnostics
import uncertaintysampling as us

import imp
imp.reload(wave)
imp.reload(ws)
imp.reload(train)
imp.reload(f2)
imp.reload(gpr)
imp.reload(doe)
imp.reload(lalwaveform)
imp.reload(greedy)
imp.reload(eim)
imp.reload(surrogate)
imp.reload(diagnostics)
imp.reload(us)

import constants
imp.reload(constants)
from constants import *




# Construct surrogate in way that can be directly converted to lalsuite code

In [41]:
def kernel(x1, x2, hyperparams):
    """Matern covariance function for n-dimensional data.
    
    Parameters
    ----------
    x1 : array with shape ndim
    x2 : array with shape ndim
    hyperparams : array with shape ndim+2 [sigma_f, ls0, ls1, ..., sigma_n]
        sigma_f : Approximately the range (ymax-ymin) of values that the data takes.
            sigma_f^2 called the signal variance.
        sigma_n : Noise term. The uncertainty in the y values of the data.
        lsi : Length scales for the variation in dimension i.
    
    Returns
    -------
    covariance : float
    """
    sigma_f = hyperparams[0]
    sigma_n = hyperparams[-1]
    ls = hyperparams[1:-1]
    ndim = len(ls)
    
    # Noise nugget for diagonal elements
    if np.array_equal(x1, x2):
        nugget = sigma_n**2
    else:
        nugget = 0.0
    #nugget = sigma_n**2
    
    # r**2
    rsq = np.sum(np.array([(x1[i]-x2[i])**2 / ls[i]**2 for i in range(ndim)]))
    r = np.sqrt(rsq)
    
    # nu = 5/2 Matern covariance
    matern = (1. + np.sqrt(5.)*r + 5.*r**2/3.) * np.exp(-np.sqrt(5.)*r)
    
    # Full covariance
    # You must include the nugget to agree with scikit-learn when the points x1, x2 are exactly the same
    return sigma_f**2 * matern + nugget

In [42]:
def gp_predict(xst, hyperparams, x_train, Kinv_dot_y):
    """Interpolate the function at the point xst using Gaussian process regression.
    
    Parameters
    ----------
    xst : array of shape ndim.
        Point x_* where you want to evaluate the function.
    hyperparams : array with shape ndim+2 [sigma_f, ls0, ls1, ..., sigma_n].
        Hyperparameters for the GPR kernel.
    x_train : array of shape (n_train, ndim).
        Training set points.
    Kinv_dot_y : array of shape n_train.
        The interpolating weights at each training set point.
    
    Returns
    -------
    yst : float
        Interpolated value at the point xst.
    """
    # Evaluate vector K_*
    Kst = np.array([kernel(xst, x, hyperparams) for x in x_train])

    # Evaluate y_*
    return np.dot(Kst, Kinv_dot_y)

In [43]:
def extract_data_from_scikit_learn(gp):
    """Extract the data in the scikit-learn GaussianProcessRegressor class 
    that you need for the lalsuite version.
    """
    # hyperparams = np.array([sigma_f, lq, ls1, ls2, llam1, llam2, sigma_n])
    hyperparams = gpr.get_hyperparameters(gp)
    
    # The training data
    x_train = gp.X_train_
    y_train = gp.y_train_
    
    # Evaluate K
    K = np.array([[kernel(x1, x2, hyperparams) for x2 in x_train] for x1 in x_train])
    
    # Evaluate K^{-1}
    Kinv = np.linalg.inv(K)
    
    # Evaluate (K^{-1})_{ij} y_j (array of length nparams).
    Kinv_dot_y = np.dot(Kinv, y_train)
    
    return hyperparams, x_train, Kinv_dot_y

# Load scikit-learn (python) version of surrogate

In [57]:
# Bamp_filename = '../../data/teobtest40hz/B_amp_corners_lhd.hdf5'
# Bphase_filename = '../../data/teobtest40hz/B_phase_corners_lhd.hdf5'
# gp_amp_filename = '../../data/teobtest40hz/gp_amp_corners_lhd.hdf5'
# gp_phase_filename = '../../data/teobtest40hz/gp_phase_corners_lhd.hdf5'
# sur = surrogate.GPSurrogate.load(Bamp_filename, Bphase_filename, gp_amp_filename, gp_phase_filename)

# Bamp_filename = '../../data/teobtest40hz/B_amp_lhd_uncsamp.hdf5'
# Bphase_filename = '../../data/teobtest40hz/B_phase_lhd_uncsamp.hdf5'
# gp_amp_filename = '../../data/teobtest40hz/gp_amp_lhd_uncsamp.hdf5'
# gp_phase_filename = '../../data/teobtest40hz/gp_phase_lhd_uncsamp.hdf5'
# sur = surrogate.GPSurrogate.load(Bamp_filename, Bphase_filename, gp_amp_filename, gp_phase_filename)

nodes_filename = '../../data/TEOBv4_20hz/nodes_corners_lhd.hdf5'
gp_amp_filename = '../../data/TEOBv4_20hz/gp_spline_amp_corners_lhd.hdf5'
gp_phase_filename = '../../data/TEOBv4_20hz/gp_spline_phase_corners_lhd.hdf5'
sur = surrogate.GPSplineSurrogate.load(nodes_filename, gp_amp_filename, gp_phase_filename, order=3, npoints=10000)

In [60]:
# Is there an alpha = 1.0e-10 term that in the scikit-learn version?

#Random point:
x = np.array([0.8, 0.2, 0.1, 1000, 2000])

# Point exactly in training set:
#x = np.array([1.0, 0.5, 0.5, 0.0, 0.0])

for i in range(len(sur.damp_gp_list)):
    gp = sur.damp_gp_list[i]
    gp.alpha = 0.0
    #gp.alpha_ *= 0.0
    #gp = sur.dphase_gp_list[0]
    hyperparams, x_train, Kinv_dot_y = extract_data_from_scikit_learn(gp)

    a = gp.predict(np.atleast_2d(x))[0]
    b = gp_predict(x, hyperparams, x_train, Kinv_dot_y)

    sigma_n = hyperparams[-1]
    print sigma_n
    print a, b, np.abs(b/a-1.), np.abs(b-a)

1.3958165518e-08
4.00576269014e-05 4.12811965884e-05 0.0305452364919 1.22356968701e-06
1.8430731096e-08
2.08984179775e-05 2.08714869997e-05 0.00128866107671 2.69309778124e-08
2.59302853552e-08
-2.62373865108e-05 -2.57074071238e-05 0.0201993970239 5.29979387001e-07
3.66657857757e-08
-0.000121183001163 -0.000121590807709 0.00336521246248 4.07806545755e-07
5.2113487004e-08
-0.000295432835249 -0.000294990661164 0.00149669918753 4.42174084486e-07
7.47213151587e-08
-0.000604401841992 -0.000604198128245 0.000337050174692 2.03713746427e-07
1.08349956077e-07
-0.00114002820377 -0.00113935558392 0.000590002812053 6.72619846043e-07
1.59785421116e-07
-0.00206757719942 -0.00206754142377 1.73031741685e-05 3.57756483882e-08
2.42822932456e-07
-0.00368443034664 -0.00368353143151 0.00024397669351 8.9891513344e-07
3.91881918394e-07
-0.00660817334476 -0.00660782749768 5.23362601699e-05 3.45847079419e-07
7.40771443708e-07
-0.0122385645405 -0.0122388243537 2.12290529418e-05 2.59813134562e-07
2.15307674445e-0

In [59]:
gp.alpha, gp.alpha_

(0.0,
 array([ 0.008289,  1.056433, -2.037547, -0.146928, -0.250674,  0.198302,  1.386065, -0.482063,  1.354522,
         0.985686,  1.203306, -0.272722,  0.80703 ,  0.189341, -1.02601 , -0.652048,  2.641279,  0.967909,
         0.855306,  0.942681,  1.803817, -0.599145, -0.767567, -2.05593 ,  0.848163, -1.188955, -0.769386,
        -2.349791,  1.150535,  0.580535,  0.888147,  3.0479  ,  1.479267, -0.979917, -1.364467, -1.082161,
        -0.150424, -1.554823,  0.014291,  0.881482,  0.008576,  2.185979, -2.153663,  0.605395, -2.087776,
        -2.839429, -1.635372, -0.421846,  0.721603,  1.123598,  0.223683,  1.947749,  1.766341,  0.247211,
        -3.929996,  0.445216,  1.274742,  0.832831,  1.032034, -0.784586, -1.435076, -1.108991,  0.834153,
        -0.650453,  0.366962, -0.490938,  0.626358,  2.609278,  0.989507,  0.732702,  0.610112, -2.25056 ,
         0.304925, -0.37478 , -1.895059, -0.722218,  0.551543, -1.277697, -1.178906, -0.864252,  0.588309,
        -0.171976, -0.799866, -

# Generate hdf5 file for lalsuite version

In [139]:
# def lalsuite_surrogate_format(filename, sur):
#     """Write data to an hdf5 file format that can be read by the 
#     lalsuite version of the code.
#     """
#     f = h5py.File(filename, libver='latest')
    
#     namp = len(sur.Bamp)
#     nphase = len(sur.Bphase)
    
#     f.attrs['description'] = \
# '''
# ********************************************************************************
# Data for TEOBv4_ROM reduced order model (aligned-spin BNS with tidal interactions).

# See B. Lackey, et al. arXiv:xxxx.xxxx.

# Parameter ranges:
# * 1/3 <= q <= 1
# * -0.4 <= spin_1z <= 0.4
# * -0.4 <= spin_2z <= 0.4
# * 0.1 <= lambda_1 <= 3000
# * 0.1 <= lambda_2 <= 3000
# * flow >= xxHz

# This ROM was built using the TEOBv4 waveform.

# The hyperparameters for the Gaussian process regression associated with each 
# basis function are listed in the order
# [sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
# where sigma_f is approximately the function range, sigma_n is the noise/tollerance, 
# and l_i is the correlation length scale for the parameter i.
# ********************************************************************************
# '''
    
#     # Frequency samples
#     f['mf'] = sur.mf
    
#     # Training set samples.
#     # They are the same for all basis functions so pick amp_0
#     gp = sur.damp_gp_list[0]
#     x_train = gp.X_train_
#     f['x_train'] = x_train
    
#     print 'Writing amplitude bases...'
#     for i in range(namp):
#         print i,
#         groupname = 'delta_ln_a_' + str(i)
#         group = f.create_group(groupname)
        
#         group.attrs['mf_node'] = sur.mf_amp[i]
#         group['basis'] = sur.Bamp[i].amp
    
#         gp = sur.damp_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         group['hyperparameters'] = hyperparameters
#         group['kinv_dot_y'] = kinv_dot_y
        
#     print '\nWriting phase bases...'
#     for i in range(nphase):
#         print i,
#         groupname = 'delta_phi_' + str(i)
#         group = f.create_group(groupname)
        
#         group.attrs['mf_node'] = sur.mf_phase[i]
#         group['basis'] = sur.Bphase[i].phase
        
#         gp = sur.dphase_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         group['hyperparameters'] = hyperparameters
#         group['kinv_dot_y'] = kinv_dot_y
        
#     f.close()

In [30]:
# def lalsuite_surrogate_format(filename, sur):
#     """Write data to an hdf5 file format that can be read by the 
#     lalsuite version of the code.
#     """
#     f = h5py.File(filename, libver='latest')
    
#     namp = len(sur.Bamp)
#     nphase = len(sur.Bphase)
    
#     f.attrs['description'] = \
# '''
# ********************************************************************************
# Data for TEOBv4_ROM reduced order model (aligned-spin BNS with tidal interactions).

# See B. Lackey, M. Puerrer, A. Taracchini. arXiv:xxxx.xxxx.

# Parameter ranges:
# * 1/3 <= q <= 1
# * -0.4 <= spin_1z <= 0.4
# * -0.4 <= spin_2z <= 0.4
# * 0.1 <= lambda_1 <= 3000
# * 0.1 <= lambda_2 <= 3000
# * flow >= xxHz

# This ROM was built using the TEOBv4 waveform.

# The hyperparameters for the Gaussian process regression associated with each 
# basis function are listed in the order
# [sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
# where sigma_f is approximately the function range, sigma_n is the noise/tollerance, 
# and l_i is the correlation length scale for the parameter i.
# ********************************************************************************
# '''
#     # Bounds
#     f['q_bounds'] = np.array([1./3., 1])
#     f['chi1_bounds'] = np.array([-0.4, 0.4])
#     f['chi2_bounds'] = np.array([-0.4, 0.4])
#     f['lambda1_bounds'] = np.array([0.1, 3000])
#     f['lambda2_bounds'] = np.array([0.1, 3000])

#     # Frequency samples
#     f['mf'] = sur.mf
    
#     # Training set samples.
#     # They are the same for all basis functions so pick amp_0
#     gp = sur.damp_gp_list[0]
#     x_train = gp.X_train_
#     f['x_train'] = x_train
    
#     print 'Writing amplitude bases...'
#     nodes_amp = []
#     B_amp = []
#     hyp_amp = []
#     kinv_dot_y_amp = []
#     for i in range(namp):
#         nodes_amp.append(sur.mf_amp[i])
#         B_amp.append(sur.Bamp[i].amp)
#         gp = sur.damp_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         hyp_amp.append(hyperparameters)
#         kinv_dot_y_amp.append(kinv_dot_y)
    
#     f['EI_nodes_amp'] = np.array(nodes_amp)
#     f['B_amp'] = np.array(B_amp)
#     f['hyp_amp'] = np.array(hyp_amp)
#     f['kinv_dot_y_amp'] = np.array(kinv_dot_y_amp)
    
#     print f['EI_nodes_amp'][:].shape
#     print f['B_amp'][:].shape
#     print f['hyp_amp'][:].shape
#     print f['kinv_dot_y_amp'][:].shape
    
    
#     print '\nWriting phase bases...'
#     nodes_phase = []
#     B_phase = []
#     hyp_phase = []
#     kinv_dot_y_phase = []
#     for i in range(nphase):
#         nodes_phase.append(sur.mf_phase[i])
#         B_phase.append(sur.Bphase[i].phase)
#         gp = sur.dphase_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         hyp_phase.append(hyperparameters)
#         kinv_dot_y_phase.append(kinv_dot_y)
    
#     f['EI_nodes_phi'] = np.array(nodes_phase)
#     f['B_phi'] = np.array(B_phase)
#     f['hyp_phi'] = np.array(hyp_phase)
#     f['kinv_dot_y_phi'] = np.array(kinv_dot_y_phase)
    
#     print f['EI_nodes_phi'][:].shape
#     print f['B_phi'][:].shape
#     print f['hyp_phi'][:].shape
#     print f['kinv_dot_y_phi'][:].shape
    
#     f.close()

In [20]:
def lalsuite_spline_surrogate_format(filename, sur):
    """Write data to an hdf5 file format that can be read by the 
    lalsuite version of the code.
    """
    f = h5py.File(filename, libver='latest')
    
    namp = len(sur.damp_gp_list)
    nphase = len(sur.dphase_gp_list)
    
    f.attrs['description'] = \
'''
********************************************************************************
Data for TEOBv4_surrogate surrogate (aligned-spin BNS with tidal interactions).

See B. Lackey, M. Puerrer, A. Taracchini. arXiv:xxxx.xxxx.

Parameter ranges:
* 1/3 <= q <= 1
* -0.5 <= spin_1z <= 0.5
* -0.5 <= spin_2z <= 0.5
* 0 <= lambda_1 <= 5000
* 0 <= lambda_2 <= 5000

This surrogate was built using the TEOBv4 waveform.

The first spline node for dphase is not listed since it is the same as the first
node for damp, and dphase = 0 for the first node.

The hyperparameters for the Gaussian process regression associated with each 
basis function are listed in the order
[sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
where sigma_f is approximately the function range, sigma_n is the noise/tolerance, 
and l_i is the correlation length scale for the parameter i.
********************************************************************************
'''
    # Bounds
    f['q_bounds'] = np.array([1./3., 1])
    f['chi1_bounds'] = np.array([-0.5, 0.5])
    f['chi2_bounds'] = np.array([-0.5, 0.5])
    f['lambda1_bounds'] = np.array([0, 5000])
    f['lambda2_bounds'] = np.array([0, 5000])

    # Nodes for splines.
    f['spline_nodes_amp'] = sur.mf_amp
    f['spline_nodes_phase'] = sur.mf_phase
    
    print f['spline_nodes_amp'][:].shape
    print f['spline_nodes_phase'][:].shape
    
    # Training set samples.
    # They are the same for all basis functions so pick amp_0
    gp = sur.damp_gp_list[0]
    x_train = gp.X_train_
    f['x_train'] = x_train
    
    print 'Writing amplitude bases...'
    hyp_amp = []
    kinv_dot_y_amp = []
    for i in range(namp):
        gp = sur.damp_gp_list[i]
        hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
        hyp_amp.append(hyperparameters)
        kinv_dot_y_amp.append(kinv_dot_y)
    
    f['hyp_amp'] = np.array(hyp_amp)
    f['kinv_dot_y_amp'] = np.array(kinv_dot_y_amp)
    
    print f['hyp_amp'][:].shape
    print f['kinv_dot_y_amp'][:].shape
    
    
    print '\nWriting phase bases...'
    hyp_phase = []
    kinv_dot_y_phase = []
    for i in range(nphase):
        gp = sur.dphase_gp_list[i]
        hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
        hyp_phase.append(hyperparameters)
        kinv_dot_y_phase.append(kinv_dot_y)
    
    f['hyp_phi'] = np.array(hyp_phase)
    f['kinv_dot_y_phi'] = np.array(kinv_dot_y_phase)
    
    print f['hyp_phi'][:].shape
    print f['kinv_dot_y_phi'][:].shape
    
    f.close()

In [22]:
filename = '../../data/TEOBv4_20hz/TEOBv4_surrogate.hdf5'
lalsuite_spline_surrogate_format(filename, sur)

(20,)
(19,)
Writing amplitude bases...
(20, 7)
(20, 159)

Writing phase bases...
(19, 7)
(19, 159)


## Testing

In [23]:
f = h5py.File(filename, libver='latest')

In [27]:
print f.attrs['description']

print f['q_bounds'][:]
print f['chi1_bounds'][:]
print f['chi2_bounds'][:]
print f['lambda1_bounds'][:]
print f['lambda2_bounds'][:]

print f['spline_nodes_amp'][:]
print f['spline_nodes_phase'][:]

print f['x_train'][:].shape


********************************************************************************
Data for TEOBv4_surrogate surrogate (aligned-spin BNS with tidal interactions).

See B. Lackey, M. Puerrer, A. Taracchini. arXiv:xxxx.xxxx.

Parameter ranges:
* 1/3 <= q <= 1
* -0.5 <= spin_1z <= 0.5
* -0.5 <= spin_2z <= 0.5
* 0 <= lambda_1 <= 5000
* 0 <= lambda_2 <= 5000

This surrogate was built using the TEOBv4 waveform.

The first spline node for dphase is not listed since it is the same as the first
node for damp, and dphase = 0 for the first node.

The hyperparameters for the Gaussian process regression associated with each 
basis function are listed in the order
[sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
where sigma_f is approximately the function range, sigma_n is the noise/tolerance, 
and l_i is the correlation length scale for the parameter i.
********************************************************************************

[ 0.333333  1.      ]
[-0.5  0.5]
[-0.5  0.5]
[  

In [28]:
print f['hyp_amp'][:][0]
print f['kinv_dot_y_amp'][:][0]

[  8.504062e-04   1.864503e+00   2.848990e+00   4.000000e+00   2.000000e+04   2.000000e+04   1.395817e-08]
[ -4.568730e+03  -5.169784e+04  -2.599941e+04   6.030558e+03  -3.116244e+04  -1.118356e+04  -2.638726e+04
  -7.906101e+04   3.115567e+04   3.551327e+04   6.959043e+04   2.281821e+04   5.845552e+04  -4.021202e+03
   1.995547e+04   2.826619e+04  -5.343637e+04  -1.274982e+04  -2.510644e+04  -2.993501e+04   1.136397e+04
   1.870288e+04   2.036711e+04   1.462085e+04  -2.363326e+04  -1.338883e+04  -7.248580e+03  -1.675821e+04
   1.835998e+04   1.967001e+04   2.359662e+04   1.560386e+04  -2.240806e+05  -6.470501e+04  -5.964531e+03
   9.001074e+03   8.826554e+04   2.558425e+04  -2.506301e+04  -1.562433e+04   4.634344e+04   8.082604e+04
  -5.280035e+04   4.238102e+04  -1.001640e+04  -5.483428e+04   8.490203e+04  -7.662924e+03   1.437318e+05
  -1.970871e+04   1.747141e+03  -3.733946e+04  -1.901198e+04   6.135409e+01  -6.969534e+03   4.089527e+02
  -1.518616e+05   3.368554e+04  -2.810648e+04

In [29]:
print f['hyp_phi'][:][0]
print f['kinv_dot_y_phi'][:][0]

[  4.085846e-03   1.125107e+00   1.160540e+00   3.001637e+00   1.717073e+04   2.184499e+04   4.904680e-05]
[ -1.208280e+03   7.964910e+02  -2.849333e+03  -2.345224e+03  -1.713935e+04   1.215501e+04   2.980847e+03
  -7.141455e+03   1.311195e+04  -8.818122e+03  -6.644801e+03   6.428997e+03   2.686756e+04  -2.060254e+03
   2.633957e+03  -3.574480e+03   3.667716e+03   1.248802e+02  -1.658871e+03   2.570703e+03  -1.973641e+03
  -8.285946e+02  -2.093941e+02   1.153182e+02  -1.889238e+03   7.070393e+02   1.989203e+03  -1.294392e+03
  -1.552302e+03   1.097635e+03  -2.046796e+02  -2.197122e+03  -1.337939e+04  -4.718471e+01   5.391549e+02
   8.009263e+03   3.564986e+03   5.934810e+03   2.908948e+03  -6.640595e+03  -1.208667e+03  -5.910303e+03
  -1.491329e+04  -3.088618e+03   7.203001e+03   5.461170e+03  -2.045209e+03  -9.405765e+03  -7.873692e+03
  -4.740142e+03  -2.773159e+04   8.255029e+03  -2.537253e+02  -9.962986e+03   6.879945e+03  -9.011212e+03
   4.610227e+03   2.592324e+04  -8.876536e+03

In [30]:
f.close()