In [2]:
###
###  Generate whiten data "whiten_h_xxxx.h5" from "bbh..."
###

from scipy.interpolate import interp1d
import pycbc.noise
import pycbc.psd
import pycbc.waveform
import pylab
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.mlab as mlab
import numpy as np
import seaborn

from scipy import signal
from scipy.signal import butter, filtfilt, iirdesign, zpk2tf, freqz

#
# Define PSD
#
def aLIGO_PSD(flow = 10.0, FMAX=8000, delta_f = 1./16., SEED=None):
    flen = int(FMAX / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)
    return psd

def aLIGO_PSD_inp(flow = 10.0, FMAX=8000, delta_f = 1./16., SEED=None):
    flen = int(FMAX / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)
    #psd = pycbc.psd.analytical.aLIGOZeroDetHighPower(flen, delta_f, flow)
    freqs = psd.sample_frequencies
        
    ## have to tweak the data to avoid 1/0 problem
    sel = np.where( psd.data<1.e-60 )
    psd.data[sel] = 1
        
    psd_interp = interp1d(freqs, psd)

    if 0:
        plt.loglog(freqs, np.sqrt(psd) )
        plt.show()
    
    return psd_interp

def LIGO_PSD_inp(FMAX=8192, delta_f = 1./16.):
    flen = int(FMAX / delta_f) + 1
    freqs = np.linspace(0,FMAX,flen)
    Pxx = (1.e-22*(18./(0.1+freqs))**2)**2+0.7e-23**2+((freqs/2000.)*4.e-23)**2
    
    psd_interp = interp1d(freqs, Pxx)
    return psd_interp

def PSD_from(timeseries, RATE=8192, alpha=0.1):
    ## calculate the PSD of data
    NFFT=RATE     
    ss_psd, f_ss = mlab.psd(timeseries, Fs = RATE, NFFT = NFFT, noverlap=NFFT/8, window=signal.tukey(NFFT, alpha=alpha) )
    psd_inp = interp1d(f_ss, ss_psd)
    
    return f_ss, psd_inp


from scipy.signal import butter, filtfilt, iirdesign, zpk2tf, freqz

def whiten(strain, interp_psd, dt, bp=0):
    Nt = len(strain)
    f = np.fft.rfftfreq(Nt, dt)
    #print len(f), f
    
    #dwindow = 1  ##signal.tukey(Nt, alpha=alpha)
    hf = np.fft.rfft(strain)
    
    norm = np.sqrt(dt*2)
    white_hf = hf / np.sqrt(interp_psd(f)) * norm
    white_ht = np.fft.irfft(white_hf, n=Nt)
    
    if bp:
        ## Bandpassing with 4th-order Butterworth D/A filter
        fband = [10.,800.]
        bb, ab = butter(4, [fband[0]*2./RATE, fband[1]*2./RATE], btype='band')
        normalization = np.sqrt((fband[1]-fband[0])/(RATE/2.0))
        white_ht = filtfilt(bb, ab, white_ht) / normalization
        
        if 0:  ## TODO
            plt.figure()
            w, h = signal.freqs(bb, ab)
            plt.plot(w, 20 * np.log10(abs(h)))
            plt.xscale('log')
            plt.title('Butterworth filter frequency response')
            plt.xlabel('Frequency [radians / second]')
            plt.ylabel('Amplitude [dB]')
            plt.margins(0, 0.1)
            plt.grid(which='both', axis='both')
            plt.axvline(100, color='green') # cutoff frequency
            plt.show()

    return white_ht
            

In [6]:
###
###  I made two train/test datasets with sampling rate 8192/4096:
###     bbh_test_xxxx.h5
###     bbh_train_xxxx.h5
###

###  Update: I realized the whiten noise is exactly the Gaussian(0,1), so don't need to prepare it. 
### 
###
import h5py
RATE = 0   ## to be read from data file
MERGER_IDX=0.66

aligo_psd   = aLIGO_PSD()
f_aligo_psd = aligo_psd.sample_frequencies
aligo_psd_inp = aLIGO_PSD_inp()

def get_whiten_template(h5name, A=1.0):
  
    HDF5_FILE = h5name
    f = h5py.File(HDF5_FILE,'r')

    RATE = f["/waveform"].attrs.get('srate')
    dt=1.0/RATE
    keys = f["/waveform"].keys()
    #print(keys)
    
    co=0
    xhc = []
    xhp = []
    y1 = []
    y2 = []
    n = []
    for i in keys:
        i = int(i)
        key = 'waveform/%d'% i
        kp  = 'waveform/%d/hp'%i
        kc  = 'waveform/%d/hc'%i
    
        m1 = f[key].attrs['m'][0]
        m2 = f[key].attrs['m'][1]
        #s1   = f[key].attrs['sz'][0]
        #flow = f[key].attrs['F_low']
        midx = f[key].attrs['midx']   # merger idx is usually close to the last array index.
        hp  = f[kp][:]
        hc  = f[kc][:]

        if (co%100==0):  
            print ("process ...", m1, m2, co, i)
            print (midx)
        co=co+1
    
        #leng = len(hp)
        #maxh = max(np.abs(hp+hc*1.j))
        #print ("MAX: ", maxh)
 
        ## to slightly move peak randomly, find the starting index in the template
        W=RATE*2   ## fix 2-sec strain
        merger_idx = W * MERGER_IDX    ###  put merger at 66% position of template
        
        #idxm = np.where( t2m >= 0 )[0][0]                         ## locate the index of merger time
        
        y1.append( m1 )
        y2.append( m2 )

        #==========
        #for h in [hp, hc]:   ## generate 2 instances for each data
            
        #idx0 = int(midx - W * np.random.uniform(0.55, 0.7)  + 1)  
        idx0 = int(midx - merger_idx + 1)     #  usually positive
        if (idx0<0): idx0 = 0                                    ## if the template is too short, let it be.  

        # Generate LIGO noise
        #ligo_ns = pycbc.noise.noise_from_psd(W, dt, aLIGO_PSD_imp, seed=None)

        # Injected / shifted signal
        minidx = min(len(hp)-1, idx0+W)   

        shifted_hp = np.zeros(W)   ##ligo_ns.data
        shifted_hc = np.zeros(W)   ##ligo_ns.data

        shifted_hp[0:minidx-idx0] = shifted_hp[0:minidx-idx0] + hp[idx0:minidx]
        shifted_hc[0:minidx-idx0] = shifted_hc[0:minidx-idx0] + hc[idx0:minidx]

        whiten_hp = whiten(   shifted_hp, aligo_psd_inp, dt)
        whiten_hc = whiten(   shifted_hc, aligo_psd_inp, dt)

        tmphp =  whiten_hp[RATE/2:-RATE/2]   ###  keep only 1-sec (8192) data as GWDA dataset
        tmphc =  whiten_hc[RATE/2:-RATE/2]

        maxf = A * np.sqrt(2*np.log(2.)) / max( np.sqrt(tmphp**2+ tmphc**2)  )
        
        xhp.append( tmphp * maxf )   ### normalize to the scale of gaussian noise (0,1)
        xhc.append( tmphc * maxf )
    
    f.close()  
    return xhp, xhc, y1,y2

import h5py as h5
import numpy as np

###################################### Edit here
RATE=8192
RATE=4096
DM=1
DM=2
#######################################
OUTFILE="white_h_%d_dm%d.h5" % (RATE,DM)

save = h5.File(OUTFILE, "w")  #_small
#save.attrs[u'merger_idx'] = MERGER_IDX
#save.attrs[u'srate'] = RATE
save.attrs.create('srate', RATE, dtype=np.int)
save.attrs.create('merger_idx', MERGER_IDX, dtype=np.float)


for tag in ['train', 'val', 'test']:
    SRC = "bbh_%d_dm%d_%s.h5"%(RATE, DM,tag)
    xhp, xhc, y1, y2  = get_whiten_template(SRC)   ##0.07

    save.create_dataset("%s_hp"%tag,  data=xhp, dtype='f')  # compression='gzip'
    save.create_dataset("%s_hc"%tag,  data=xhc, dtype='f')  # compression='gzip'
    save.create_dataset("%s_m1"%tag,  data=y1, dtype='f')
    save.create_dataset("%s_m2"%tag,  data=y2, dtype='f')

save.close()


('process ...', 19, 61, 0, 241)
10162
('process ...', 57, 77, 100, 629)
10346
('process ...', 19, 43, 200, 232)
10614
('process ...', 9, 71, 300, 86)
9023
('process ...', 23, 71, 400, 303)
10063
('process ...', 33, 37, 500, 411)
10797
('process ...', 61, 61, 600, 640)
10461
('process ...', 20.0, 56.0, 0, 241)
10322
('process ...', 70.0, 74.0, 100, 650)
10343
('process ...', 62.0, 64.0, 200, 623)
10438
('process ...', 24.0, 36.0, 300, 286)
10821
('process ...', 20.0, 40.0, 400, 233)
10703
('process ...', 22.0, 24.0, 500, 253)
11009
('process ...', 54.0, 76.0, 600, 591)
10354
('process ...', 19.5, 63.5, 0, 241)
10117
('process ...', 39.5, 75.5, 100, 482)
10291
('process ...', 63.5, 67.5, 200, 628)
10410
('process ...', 33.5, 41.5, 300, 405)
10745
('process ...', 5.5, 7.5, 400, 1)
11415
('process ...', 9.5, 59.5, 500, 84)
9539
('process ...', 19.5, 29.5, 600, 224)
10923
