Check all hdf5 groups and attributes with:

h5dump -n 1 lalsuitetest.hdf5

This prints the description attribute:

h5dump -a description lalsuitetest.hdf5 

In [1]:
%pylab inline

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

np.set_printoptions(precision=6, linewidth=110)

Populating the interactive namespace from numpy and matplotlib


In [2]:
import h5py

In [3]:
sys.path.insert(0, '../../src')

import waveform as wave
import waveformset as ws
import trainingset as train
import taylorf2 as f2
import gaussianprocessregression as gpr
import designofexperiment as doe
import lalwaveform
import plotparams
import greedy
import empiricalinterpolation as eim
import surrogate
import diagnostics
import uncertaintysampling as us

import imp
imp.reload(wave)
imp.reload(ws)
imp.reload(train)
imp.reload(f2)
imp.reload(gpr)
imp.reload(doe)
imp.reload(lalwaveform)
imp.reload(greedy)
imp.reload(eim)
imp.reload(surrogate)
imp.reload(diagnostics)
imp.reload(us)

import constants
imp.reload(constants)
from constants import *




# Construct surrogate in way that can be directly converted to lalsuite code

In [4]:
def kernel(x1, x2, hyperparams):
    """Matern covariance function for n-dimensional data.
    
    Parameters
    ----------
    x1 : array with shape ndim
    x2 : array with shape ndim
    hyperparams : array with shape ndim+2 [sigma_f, ls0, ls1, ..., sigma_n]
        sigma_f : Approximately the range (ymax-ymin) of values that the data takes.
            sigma_f^2 called the signal variance.
        sigma_n : Noise term. The uncertainty in the y values of the data.
        lsi : Length scales for the variation in dimension i.
    
    Returns
    -------
    covariance : float
    """
    sigma_f = hyperparams[0]
    sigma_n = hyperparams[-1]
    ls = hyperparams[1:-1]
    ndim = len(ls)
    
    # Noise nugget for diagonal elements
    if np.array_equal(x1, x2):
        nugget = sigma_n**2
    else:
        nugget = 0.0
    
    # r**2
    rsq = np.sum(np.array([(x1[i]-x2[i])**2 / ls[i]**2 for i in range(ndim)]))
    r = np.sqrt(rsq)
    
    # nu = 5/2 Matern covariance
    matern = (1. + np.sqrt(5.)*r + 5.*r**2/3.) * np.exp(-np.sqrt(5.)*r)
    
    # Full covariance
    # You must include the nugget to agree with scikit-learn when the points x1, x2 are exactly the same
    return sigma_f**2 * matern + nugget

In [5]:
def gp_predict(xst, hyperparams, x_train, Kinv_dot_y):
    """Interpolate the function at the point xst using Gaussian process regression.
    
    Parameters
    ----------
    xst : array of shape ndim.
        Point x_* where you want to evaluate the function.
    hyperparams : array with shape ndim+2 [sigma_f, ls0, ls1, ..., sigma_n].
        Hyperparameters for the GPR kernel.
    x_train : array of shape (n_train, ndim).
        Training set points.
    Kinv_dot_y : array of shape n_train.
        The interpolating weights at each training set point.
    
    Returns
    -------
    yst : float
        Interpolated value at the point xst.
    """
    # Evaluate vector K_*
    Kst = np.array([kernel(xst, x, hyperparams) for x in x_train])

    # Evaluate y_*
    return np.dot(Kst, Kinv_dot_y)

In [6]:
def extract_data_from_scikit_learn(gp):
    """Extract the data in the scikit-learn GaussianProcessRegressor class 
    that you need for the lalsuite version.
    """
    # hyperparams = np.array([sigma_f, lq, ls1, ls2, llam1, llam2, sigma_n])
    hyperparams = gpr.get_hyperparameters(gp)
    
    # The training data
    x_train = gp.X_train_
    y_train = gp.y_train_
    
    # Evaluate K
    K = np.array([[kernel(x1, x2, hyperparams) for x2 in x_train] for x1 in x_train])
    
    # Evaluate K^{-1}
    Kinv = np.linalg.inv(K)
    
    # Evaluate (K^{-1})_{ij} y_j (array of length nparams).
    Kinv_dot_y = np.dot(Kinv, y_train)
    
    return hyperparams, x_train, Kinv_dot_y

# Load scikit-learn (python) version of surrogate

In [7]:
Bamp_filename = '../../data/teobtest40hz/B_amp_corners_lhd.hdf5'
Bphase_filename = '../../data/teobtest40hz/B_phase_corners_lhd.hdf5'
gp_amp_filename = '../../data/teobtest40hz/gp_amp_corners_lhd.hdf5'
gp_phase_filename = '../../data/teobtest40hz/gp_phase_corners_lhd.hdf5'
sur = surrogate.GPSurrogate.load(Bamp_filename, Bphase_filename, gp_amp_filename, gp_phase_filename)

# Bamp_filename = '../../data/teobtest40hz/B_amp_lhd_uncsamp.hdf5'
# Bphase_filename = '../../data/teobtest40hz/B_phase_lhd_uncsamp.hdf5'
# gp_amp_filename = '../../data/teobtest40hz/gp_amp_lhd_uncsamp.hdf5'
# gp_phase_filename = '../../data/teobtest40hz/gp_phase_lhd_uncsamp.hdf5'
# sur = surrogate.GPSurrogate.load(Bamp_filename, Bphase_filename, gp_amp_filename, gp_phase_filename)

In [8]:
#Random point:
#x = np.array([0.8, 0.2, 0.1, 1000, 2000])

# Point exactly in training set:
x = np.array([3.333333e-01, -4.000000e-01, -4.000000e-01, 1.000000e-01, 1.000000e-01])

for i in range(len(sur.dphase_gp_list)):
    gp = sur.dphase_gp_list[i]
    #gp = sur.dphase_gp_list[0]
    hyperparams, x_train, Kinv_dot_y = extract_data_from_scikit_learn(gp)

    a = gp.predict(np.atleast_2d(x))[0]
    b = gp_predict(x, hyperparams, x_train, Kinv_dot_y)

    #sigma_n = hyperparams[-1]
    #print sigma_n
    print np.abs(b/a-1.)

5.42263345338e-10
1.16229470493e-10
1.42235012657e-09
5.21262366604e-10
9.73295777307e-10
6.05887562344e-10
3.14612225161e-10
2.41429098935e-10
2.05440109369e-10
4.93759477749e-11


# Generate hdf5 file for lalsuite version

In [139]:
# def lalsuite_surrogate_format(filename, sur):
#     """Write data to an hdf5 file format that can be read by the 
#     lalsuite version of the code.
#     """
#     f = h5py.File(filename, libver='latest')
    
#     namp = len(sur.Bamp)
#     nphase = len(sur.Bphase)
    
#     f.attrs['description'] = \
# '''
# ********************************************************************************
# Data for TEOBv4_ROM reduced order model (aligned-spin BNS with tidal interactions).

# See B. Lackey, et al. arXiv:xxxx.xxxx.

# Parameter ranges:
# * 1/3 <= q <= 1
# * -0.4 <= spin_1z <= 0.4
# * -0.4 <= spin_2z <= 0.4
# * 0.1 <= lambda_1 <= 3000
# * 0.1 <= lambda_2 <= 3000
# * flow >= xxHz

# This ROM was built using the TEOBv4 waveform.

# The hyperparameters for the Gaussian process regression associated with each 
# basis function are listed in the order
# [sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
# where sigma_f is approximately the function range, sigma_n is the noise/tollerance, 
# and l_i is the correlation length scale for the parameter i.
# ********************************************************************************
# '''
    
#     # Frequency samples
#     f['mf'] = sur.mf
    
#     # Training set samples.
#     # They are the same for all basis functions so pick amp_0
#     gp = sur.damp_gp_list[0]
#     x_train = gp.X_train_
#     f['x_train'] = x_train
    
#     print 'Writing amplitude bases...'
#     for i in range(namp):
#         print i,
#         groupname = 'delta_ln_a_' + str(i)
#         group = f.create_group(groupname)
        
#         group.attrs['mf_node'] = sur.mf_amp[i]
#         group['basis'] = sur.Bamp[i].amp
    
#         gp = sur.damp_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         group['hyperparameters'] = hyperparameters
#         group['kinv_dot_y'] = kinv_dot_y
        
#     print '\nWriting phase bases...'
#     for i in range(nphase):
#         print i,
#         groupname = 'delta_phi_' + str(i)
#         group = f.create_group(groupname)
        
#         group.attrs['mf_node'] = sur.mf_phase[i]
#         group['basis'] = sur.Bphase[i].phase
        
#         gp = sur.dphase_gp_list[i]
#         hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
#         group['hyperparameters'] = hyperparameters
#         group['kinv_dot_y'] = kinv_dot_y
        
#     f.close()

In [30]:
def lalsuite_surrogate_format(filename, sur):
    """Write data to an hdf5 file format that can be read by the 
    lalsuite version of the code.
    """
    f = h5py.File(filename, libver='latest')
    
    namp = len(sur.Bamp)
    nphase = len(sur.Bphase)
    
    f.attrs['description'] = \
'''
********************************************************************************
Data for TEOBv4_ROM reduced order model (aligned-spin BNS with tidal interactions).

See B. Lackey, M. Puerrer, A. Taracchini. arXiv:xxxx.xxxx.

Parameter ranges:
* 1/3 <= q <= 1
* -0.4 <= spin_1z <= 0.4
* -0.4 <= spin_2z <= 0.4
* 0.1 <= lambda_1 <= 3000
* 0.1 <= lambda_2 <= 3000
* flow >= xxHz

This ROM was built using the TEOBv4 waveform.

The hyperparameters for the Gaussian process regression associated with each 
basis function are listed in the order
[sigma_f, l_q, l_spin1z, l_spin2z, l_lambda1, l_lambda2, sigma_n]
where sigma_f is approximately the function range, sigma_n is the noise/tollerance, 
and l_i is the correlation length scale for the parameter i.
********************************************************************************
'''
    # Bounds
    f['q_bounds'] = np.array([1./3., 1])
    f['chi1_bounds'] = np.array([-0.4, 0.4])
    f['chi2_bounds'] = np.array([-0.4, 0.4])
    f['lambda1_bounds'] = np.array([0.1, 3000])
    f['lambda2_bounds'] = np.array([0.1, 3000])

    # Frequency samples
    f['mf'] = sur.mf
    
    # Training set samples.
    # They are the same for all basis functions so pick amp_0
    gp = sur.damp_gp_list[0]
    x_train = gp.X_train_
    f['x_train'] = x_train
    
    print 'Writing amplitude bases...'
    nodes_amp = []
    B_amp = []
    hyp_amp = []
    kinv_dot_y_amp = []
    for i in range(namp):
        nodes_amp.append(sur.mf_amp[i])
        B_amp.append(sur.Bamp[i].amp)
        gp = sur.damp_gp_list[i]
        hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
        hyp_amp.append(hyperparameters)
        kinv_dot_y_amp.append(kinv_dot_y)
    
    f['EI_nodes_amp'] = np.array(nodes_amp)
    f['B_amp'] = np.array(B_amp)
    f['hyp_amp'] = np.array(hyp_amp)
    f['kinv_dot_y_amp'] = np.array(kinv_dot_y_amp)
    
    print f['EI_nodes_amp'][:].shape
    print f['B_amp'][:].shape
    print f['hyp_amp'][:].shape
    print f['kinv_dot_y_amp'][:].shape
    
    
    print '\nWriting phase bases...'
    nodes_phase = []
    B_phase = []
    hyp_phase = []
    kinv_dot_y_phase = []
    for i in range(nphase):
        nodes_phase.append(sur.mf_phase[i])
        B_phase.append(sur.Bphase[i].phase)
        gp = sur.dphase_gp_list[i]
        hyperparameters, x_train, kinv_dot_y = extract_data_from_scikit_learn(gp)
        hyp_phase.append(hyperparameters)
        kinv_dot_y_phase.append(kinv_dot_y)
    
    f['EI_nodes_phi'] = np.array(nodes_phase)
    f['B_phi'] = np.array(B_phase)
    f['hyp_phi'] = np.array(hyp_phase)
    f['kinv_dot_y_phi'] = np.array(kinv_dot_y_phase)
    
    print f['EI_nodes_phi'][:].shape
    print f['B_phi'][:].shape
    print f['hyp_phi'][:].shape
    print f['kinv_dot_y_phi'][:].shape
    
    f.close()

In [31]:
filename = '../../data/teobtest40hz/TEOBv4_ROM.hdf5'
lalsuite_surrogate_format(filename, sur)

Writing amplitude bases...
(10,)
(10, 10000)
(10, 7)
(10, 159)

Writing phase bases...
(10,)
(10, 10000)
(10, 7)
(10, 159)


## Testing

In [32]:
f = h5py.File(filename, libver='latest')

In [34]:
print f['q_bounds'][:]
print f['chi1_bounds'][:]
print f['chi2_bounds'][:]
print f['lambda1_bounds'][:]
print f['lambda2_bounds'][:]

print f['mf'][:].shape
print f['x_train'][:].shape

[ 0.333333  1.      ]
[-0.4  0.4]
[-0.4  0.4]
[  1.000000e-01   3.000000e+03]
[  1.000000e-01   3.000000e+03]
(10000,)
(159, 5)


In [35]:
print f['EI_nodes_amp'][:]
print f['B_amp'][:].shape
print f['hyp_amp'][:][0]
print f['kinv_dot_y_amp'][:][0]

[ 0.003913  0.006297  0.00784   0.010067  0.013135  0.014564  0.018796  0.024249  0.027764  0.03    ]
(10, 10000)
[  2.417314e-02   1.611811e+00   3.200000e+00   3.200000e+00   1.199960e+04   1.199960e+04   9.511342e-05]
[ -1067.249711  -1186.107412  -5428.985905   -674.106178   -788.595096  -1173.681862  -1429.308318
  -1037.734476    254.822732    759.516248    294.557383   -710.243144    -35.218616    626.51537
   -123.363184   -428.368031  -1129.598357  -1692.43766   -1176.430816  -3289.984829   -138.213424
   -357.846508     90.734105    758.337567  -1137.13851     976.612925   -366.175835  -1120.443659
    394.884596   -855.368516    149.018832    488.628885   6301.598234  -3356.968241   4731.38433
  -3126.656469 -10555.674419    111.945535 -14780.414484   2818.862766  -1073.284494  -3057.707827
  -3060.77412     187.452186   5874.016447   1420.843806    453.172055   5951.008894   4769.943345
   6131.923492   1325.573405   3616.315512  16954.691907 -11753.266465   1182.200541  -2

In [36]:
print f['EI_nodes_phi'][:]
print f['B_phi'][:].shape
print f['hyp_phi'][:][0]
print f['kinv_dot_y_phi'][:][0]

[ 0.004953  0.007806  0.008551  0.009609  0.010371  0.011522  0.013772  0.02072   0.025745  0.03    ]
(10, 10000)
[  3.396457e+00   1.379969e+00   2.744254e+00   6.029316e+00   2.999900e+04   2.999900e+04   3.233302e-06]
[   7.71708    12.586714  -38.91106    14.689018    2.698471  -25.424575  -11.384529   10.503386    1.692011
   -0.927593   23.390454   20.476781  -21.332099  -34.734909   36.552586   19.444565  -47.409264   -4.433327
   -0.50682   -20.828924   -0.496367   10.859524   -0.416603    6.797257   -4.095638   13.842454   -0.480773
   -2.825646  -12.275552   -6.702231  -16.71071     1.296175  -31.835968    1.289071  -64.902055  -24.75757
   60.941454  -11.727459  129.004328    5.138491 -122.578181  180.126304   21.736462  -27.423158   22.971783
   15.983228  -62.819208 -120.863653    8.727839   -6.208731   91.155902  -57.176721  -56.727319   19.81419
   93.390594   -2.840643  -99.221425   29.576798    1.727601 -142.141618  -51.111255  -64.370205  107.373857
  -59.91821  -108.

In [37]:
f.close()

In [19]:
# print f['delta_ln_a_19'].attrs['mf_node']
# print f['delta_ln_a_19/hyperparameters'][:]
# print f['delta_ln_a_19/kinv_dot_y'][:]
# print f['delta_ln_a_19/basis'][:]

In [20]:
# print f['delta_phi_19'].attrs['mf_node']
# print f['delta_phi_19/hyperparameters'][:]
# print f['delta_phi_19/kinv_dot_y'][:]
# print f['delta_phi_19/basis'][:]