# LIGO Project
## PyCBC GW Match

In [101]:
# Filter the LIGO .gwf data file using the pyCBC software package
# Based on Alex Nitz's sample code
# Some lines added by Hao Liu to parameterize the program and for outputs

directory = 'Data/' # directory where the data files are located
suffix = '_LOSC_4_V2-1126257414-4096' # suffix of the file name, not including ".gwf"
match_low_frequency_cutoff = 15 # low frequency cutoff for matching, normally chosen to be same as highpass_freq
highpass_freq = 15 # frequency for the initial high pass (Hz)
highpass_edge_cut_left = 16 # time domain left-side cutoff after initial high pass (in second)
highpass_edge_cut_right = 16 # time domain right-side cutoff after initial high pass (in second)
snr_edge_cut = 2 # length of edge-cut at both ends for max-SNR determination. 2016
psd_segment_length = 16 # segment length for calculating the PSD by Welch's averaging

import lal as _lal
from pycbc.frame import read_frame
from pycbc.filter import highpass
from pycbc.psd import interpolate, inverse_spectrum_truncation
from pycbc.types.timeseries import load_timeseries
from pycbc.types.frequencyseries import load_frequencyseries
from pycbc.types.timeseries import TimeSeries
from pycbc.waveform import get_td_waveform
from pycbc.filter import matched_filter
from pycbc.waveform import apply_fseries_time_shift
from pycbc.filter import sigma

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.signal import tukey
from scipy.interpolate import interp1d
from scipy.stats import skew, kurtosis
from scipy.signal import butter, filtfilt

In [102]:
print('Start to read and high-pass the data, will cut some edge after that.')

t_slice = 96

strain, strain_i, stilde = {}, {}, {}
for ifo in ['H', 'L']:
    # Read the detector data
    fname = directory + '%s-%s1' % (ifo, ifo) + suffix + '.gwf'
    channel_name = '%s1:LOSC-STRAIN'  % ifo
    strain_i[ifo] = read_frame(fname, channel_name)
    strain[ifo] = strain_i[ifo].time_slice(1126259462-t_slice/2,1126259462+t_slice/2)

    print(' ')
    print('****************************************************')
    print("%s-file is: %s" %(ifo, fname))
    print("%s-file is from %s to %s, duration: %s" 
        %(ifo, strain[ifo].start_time, strain[ifo].end_time, strain[ifo].duration) )

    # save a copy of the unfiltered strain data to disk
    #strain[ifo].numpy().tofile(directory + '%s-%s1' % (ifo, ifo) + suffix + '_unfilt.bin')

    # Initial high pass to remove the strong low-frequency signal
    #strain_i[ifo] = highpass(strain_i[ifo], highpass_freq) 
    strain[ifo] = highpass(strain[ifo], highpass_freq)

    # Edge-cut to remove time corrupted by the high pass filter
    #strain_i[ifo] = strain_i[ifo].crop(highpass_edge_cut_left, highpass_edge_cut_right)
    strain[ifo] = strain[ifo].crop(highpass_edge_cut_left, highpass_edge_cut_right)

    # Print some information
    print("%s-strain after high-pass is now from %s to %s, duration: %s" 
        %(ifo, strain[ifo].start_time, strain[ifo].end_time, strain[ifo].duration) )

    # Also create a frequency domain version of the data
    stilde[ifo] = strain[ifo].to_frequencyseries()

Start to read and high-pass the data, will cut some edge after that.
 
****************************************************
H-file is: Data/H-H1_LOSC_4_V2-1126257414-4096.gwf
H-file is from 1126259414 to 1126259510, duration: 96.0
H-strain after high-pass is now from 1126259430 to 1126259494, duration: 64.0
 
****************************************************
L-file is: Data/L-L1_LOSC_4_V2-1126257414-4096.gwf
L-file is from 1126259414 to 1126259510, duration: 96.0
L-strain after high-pass is now from 1126259430 to 1126259494, duration: 64.0


In [103]:
print('Calculate the PSD from cut strain...')

psd_method = "losc"
psds = {}

if psd_method == "pycbc":
    print('Using PyCBC method.')
else:
    print('Using LOSC method.')

for ifo in ['H', 'L']:
    
    if psd_method == "pycbc":
        # Calculate the PSD by a Welch-style estimator (with the pyCBC timeseries.psd() method)
        # Then interpolate the PSD to the desired frequency step.
        
        psds[ifo] = interpolate(strain[ifo].psd(int(psd_segment_length)), stilde[ifo].delta_f)
        
        # Smooth to the desired corruption length
        psds[ifo] = inverse_spectrum_truncation(psds[ifo], 
                                                psd_segment_length * strain[ifo].sample_rate,
                                                low_frequency_cutoff=highpass_freq,
                                                trunc_method='hann')
        
        # Whiten strain
        strain[ifo] = (stilde[ifo] / psds[ifo]**0.5 ).to_timeseries()
        #strain[ifo].save('Results/'+'%s' % (ifo) + '_STRAIN_PYCBC.npy')
    
    elif psd_method == "losc":
        # Calculate the PSD by a Welch-style estimator (with the mlab.psd() function)
        # Then interpolate, using numpy's interp function, to the desired frequency step.
        
        fs = strain[ifo].sample_rate
        NFFT = 4*fs
        psd_window = np.blackman(NFFT)
        NOVL = NFFT/2
        
        Pxx, freqs = mlab.psd(strain[ifo], Fs = fs, NFFT = NFFT, window=psd_window, noverlap=NOVL)
        datafreqs = stilde[ifo].sample_frequencies
        psd_tmp = np.interp(datafreqs, freqs, Pxx)
        
        
        np.save('Results/'+'%s' %(ifo) + '_PSD_LOSC.npy',np.column_stack((datafreqs,psd_tmp)))
    
        psds[ifo] = load_frequencyseries('Results/'+'%s' % (ifo) + '_PSD_LOSC.npy')
        
        strain[ifo] = (stilde[ifo] / psds[ifo]**0.5 ).to_timeseries()
        
        #strain[ifo].save('Results/'+'%s' % (ifo) + '_STRAIN_LOSC.npy')
        #stilde[ifo].save('Results/'+'%s' % (ifo) + '_FREQSS_LOSC.npy')

print('PSD ready.')

Calculate the PSD from cut strain...
Using LOSC method.
PSD ready.


In [104]:
print('Start matching: ')

hp, hc = get_td_waveform(   approximant="SEOBNRv4", 
                            mass1=41.743, mass2=29.237, 
                            spin1z = 0.355,      spin2z = -0.769, 
                            f_lower=match_low_frequency_cutoff, delta_t = strain[ifo].delta_t)

hp.append_zeros(800)
hc.append_zeros(800)

window = tukey(len(hp), alpha=0.3)
hp._data = hp._data*window
hp.resize(len(strain[ifo]))

hp = hp.to_frequencyseries(delta_f=stilde[ifo].delta_f)

max_snr, max_time, max_phase = {}, {}, {}
for ifo in ['H', 'L']:
    snr = matched_filter(hp, stilde[ifo], psd=psds[ifo], low_frequency_cutoff=match_low_frequency_cutoff)
    snr = snr.crop(snr_edge_cut, snr_edge_cut)
    
    _, idx = snr.abs_max_loc()
      
    # The complex SNR at the peak
    max_snr[ifo] = snr[idx]
        
    # The time of the peak
    max_time[ifo] = float(idx) / snr.sample_rate + snr.start_time
    max_phase[ifo] = np.angle(max_snr[ifo])

    print(' ')
    print('****************************************************')
    print('%s: Consider SNR only from %s to %s, duration: %s' %(ifo, snr.start_time, snr.end_time, snr.duration))
    print('%s-SNR: %s'   %(ifo, np.absolute(max_snr[ifo])))
    print('%s-time: %s'  %(ifo, max_time[ifo]))
    print('%s-phase: %s' %(ifo, max_phase[ifo]))

tmax, hmax, smax = max_time, hp, max_snr

network_snr = (abs(np.array(max_snr.values())) ** 2.0).sum() ** 0.5
print('network SNR: %s' %(network_snr))

Start matching: 
 
****************************************************
H: Consider SNR only from 1126259432 to 1126259492, duration: 60.0
H-SNR: 19.41861457384273
H-time: 1126259461.0078125
H-phase: 2.259860844987864
 
****************************************************
L: Consider SNR only from 1126259432 to 1126259492, duration: 60.0
L-SNR: 13.46008298327633
L-time: 1126259461.000732422
L-phase: -0.6555751192325359
network SNR: 23.62745068525469


In [105]:
i_c = 16*4096
nq = 4096./2.
b, a = butter(4, [15./nq, 350./nq], btype = 'bandpass')
wt, h = {}, {}
for ifo in ['H', 'L']:
    # Shift the template to the maximum time at this sample rate
    dt =  tmax[ifo] - stilde[ifo].start_time
    inj = apply_fseries_time_shift(hmax, dt)

    # Scale the template to the SNR and phase
    inj /= sigma(hmax, psd=psds[ifo], low_frequency_cutoff=match_low_frequency_cutoff)
    inj *= smax[ifo]

    amp1 = smax[ifo] / sigma(hmax, psd=psds[ifo], low_frequency_cutoff=match_low_frequency_cutoff)
    amp1 = np.absolute(amp1)
    print('%s relative amplitude: %s' %(ifo, amp1*100))

    # Subtract from the data
    stilde2 = stilde[ifo] - inj
    stilde_tmp = stilde[ifo]
    
    # Whiten the data
    hoft = (stilde2 / psds[ifo] ** 0.5).to_timeseries()
    wh = (inj / psds[ifo] ** 0.5).to_timeseries()
    
    h[ifo] = wh
    wt[ifo] = hoft
    
    print('Bandpassing and slicing.')
    
    strain[ifo] = filtfilt(b, a, strain[ifo])
    wt[ifo] = filtfilt(b, a, wt[ifo])
    h[ifo] = filtfilt(b, a, h[ifo])
    
    strain[ifo] = strain[ifo][i_c:-i_c]
    wt[ifo] = wt[ifo][i_c:-i_c]
    h[ifo] = h[ifo][i_c:-i_c]
    
    if psd_method == 'pycbc':
        
        # Save a copy of the strain to disk
        np.save('Results/'+'%s' % (ifo) + '_STRAIN_PYCB.npy', strain[ifo])
        
        # save a copy of the residual strain to disk
        np.save('Results/'+'%s' % (ifo) + '_RESIDUAL_PYCBC.npy', wt[ifo])
        
        # save a copy of the template to disk
        np.save('Results/'+'%s' % (ifo) + '_TEMPLATE_PYCBC.npy', h[ifo])

    elif psd_method =='losc':
        
        # Save a copy of the strain to disk
        np.save('Results/'+'%s' % (ifo) + '_STRAIN_LOSC.npy', strain[ifo])
        
        # save a copy of the residual strain to disk
        np.save('Results/'+'%s' % (ifo) + '_RESIDUAL_LOSC.npy', wt[ifo])
        
        # save a copy of the template to disk
        np.save('Results/'+'%s' % (ifo) + '_TEMPLATE_LOSC.npy', h[ifo])

# Save a copy of time
t = np.arange(0,len(strain['H'])/4096.,1./4096.)
np.save('Results/sample_times.npy', t)

H relative amplitude: 0.13069831058557527
Bandpassing and slicing.
L relative amplitude: 0.10485061770529915
Bandpassing and slicing.
