In [15]:
################################
########### IMPORTS ############
################################

In [1]:
from pycbc import distributions
from pycbc.waveform import get_td_waveform, td_approximants
from pycbc.detector import Detector
import matplotlib.pyplot as plt
import numpy as np
import gwpy
import pylab
from tqdm.notebook import tqdm
from gwpy.timeseries import TimeSeries
import pandas as pd
import os
import csv
import pycbc.noise
import pycbc.psd
from pycbc.filter import matched_filter


SWIGLAL standard output/error redirection is enabled in IPython.
This may lead to performance penalties. To disable locally, use:

with lal.no_swig_redirect_standard_output_error():
    ...

To disable globally, use:

lal.swig_redirect_standard_output_error(False)

Note however that this will likely lead to error messages from
LAL functions being either misdirected or lost when called from
Jupyter notebooks.


import lal

  import lal as _lal


In [None]:
no_of_params = 2
directory = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/Parameter-Estimation/"+str(no_of_params)+"_parameters/"

In [3]:
############################################
########### BBH Data Generation ############
############################################

In [4]:
print("Generated Binary Mass Distributions for BBH")
# We can make pairs of distributions together, instead of apart.
bbh_two_mass_distributions = distributions.Uniform(mass1=(10, 50),
                                               mass2=(10, 50))

bbh_two_mass_samples = bbh_two_mass_distributions.rvs(size=5000)

Generated Binary Mass Distributions for BBH


In [None]:
start_times_bbh = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]
data_targets = np.zeros((len(bbh_two_mass_samples), no_of_params))

for i in tqdm(range(len(bbh_two_mass_samples))):
    m1 = max(bbh_two_mass_samples[i][0], bbh_two_mass_samples[i][1])
    m2 = min(bbh_two_mass_samples[i][0], bbh_two_mass_samples[i][1])
    
    data_targets[i][0] = m1
    data_targets[i][1] = m2
    
    hp, hc = get_td_waveform(approximant="SEOBNRv2",                                
                         mass1=m1,
                         mass2=m2,
                         delta_t=1.0/4096,
                         f_lower=40)                                               

    signal = TimeSeries.from_pycbc(hp)
    signal = (signal/(max(signal.max(), np.abs(signal.min()))))*0.2

    st1 = np.random.randint(0, 8)
    signal.t0 = start_times_bbh[st1]
    
    # The color of the noise matches a PSD which you provide
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    # Generate 4 seconds of noise at 4096 Hz
    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)

    noise *= 1e21
    noise *= 0.4
    noise = TimeSeries.from_pycbc(noise)
    
    data = noise.inject(signal)    
    data *= 1e-17

    data.write(" Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/merged_bbh_noise_signal/merged_noise_signal_"+str(i)+".txt")

np.savetxt(" Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/Final_BBH_Merged_Noise_Signal_Targets_" + str(no_of_params) + "_parameters.csv", data_targets, delimiter = ",")

  0%|          | 0/5000 [00:00<?, ?it/s]

In [None]:
# Merging Noise + Signal Templates into single csv file

import os
import csv
import pandas as pd
from tqdm import tqdm

# Define the path
path = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/merged_bbh_noise_signal/"

# Get only valid files (ignore hidden files like .DS_Store)
files = [f for f in os.listdir(path) if f.endswith('.txt') and os.path.isfile(os.path.join(path, f))]

# Define output CSV file
output_file = f" Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/Final_BBH_Merged_Noise_Signal_Reduced_No_ABS_{no_of_params}_parameters.csv"

# Open CSV file safely
with open(output_file, 'w', newline='') as f:
    cw = csv.writer(f)

    for file_name in tqdm(files):
        file_path = os.path.join(path, file_name)  # Correct path joining
        
        try:
            # Read the file with safe encoding
            df = pd.read_csv(file_path, sep=' ', header=None, encoding='utf-8', engine='python')

            # Ensure the second column exists
            if df.shape[1] > 1:
                c = df.iloc[:, 1]  # Select second column
                cw.writerow(c)
            else:
                print(f"⚠️ Skipping {file_name}: Less than 2 columns found!")

        except UnicodeDecodeError:
            print(f"❌ UnicodeDecodeError: Failed to read {file_name}, trying alternative encoding...")
            try:
                df = pd.read_csv(file_path, sep=' ', header=None, encoding="ISO-8859-1", engine='python')
                if df.shape[1] > 1:
                    c = df.iloc[:, 1]
                    cw.writerow(c)
                else:
                    print(f"⚠️ Skipping {file_name}: Less than 2 columns found!")

            except Exception as e:
                print(f"❌ Skipping {file_name} due to error: {e}")
        
        except Exception as e:
            print(f"❌ Error processing {file_name}: {e}")

print(f"✅ Merging complete. Output saved to: {output_file}")

100%|██████████| 5000/5000 [03:15<00:00, 25.61it/s]

✅ Merging complete. Output saved to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_data_files/Final_BBH_Merged_Noise_Signal_Reduced_No_ABS_2_parameters.csv





In [7]:
############################################
########### BNS Data Generation ############
############################################

In [8]:
print("Generated Binary Mass Distributions for BNS")
# We can make pairs of distributions together, instead of apart.
bns_two_mass_distributions = distributions.Uniform(mass1=(1, 2),
                                               mass2=(1, 2))

bns_two_mass_samples = bns_two_mass_distributions.rvs(size=5000)

Generated Binary Mass Distributions for BNS


In [None]:
import os
import numpy as np
from tqdm import tqdm
from pycbc.waveform import get_td_waveform
from pycbc.types import TimeSeries
import pycbc.noise
import pycbc.psd
import h5py  # For working with HDF5 files

# Define base directory
base_dir = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/LIGO-Detector-Data/Marginal-events/"

# Ensure merged signal directory exists
merged_signal_dir = os.path.join(base_dir, "merged_bns_noise_signal")
os.makedirs(merged_signal_dir, exist_ok=True)

# Define parameters (ensure that bns_two_mass_samples and no_of_params are defined earlier)
start_times_bns = [0, 0.5, 1, 1.5, 2, 2.5, 3]
bns_data_targets = np.zeros((len(bns_two_mass_samples), no_of_params))

for i in tqdm(range(len(bns_two_mass_samples))):

    m1 = max(bns_two_mass_samples[i][0], bns_two_mass_samples[i][1])
    m2 = min(bns_two_mass_samples[i][0], bns_two_mass_samples[i][1])
    
    bns_data_targets[i][0] = m1
    bns_data_targets[i][1] = m2

    # Generate waveform
    hp2, hc2 = get_td_waveform(approximant="IMRPhenomPv2_NRTidal", 
                               mass1=m1, mass2=m2, delta_t=1.0/4096, f_lower=40)
    
    # Extract the last 1 second of the BNS signal
    t = hp2.get_end_time()
    hp2 = hp2.time_slice(t-1, t)

    # Use the TimeSeries directly (hp2 is already a TimeSeries)
    bns_signal = hp2  

    # Apply a start time offset
    st2 = np.random.randint(0, len(start_times_bns))
    bns_signal.t0 = start_times_bns[st2]

    # Apply Hann window manually using numpy
    hann_window = np.hanning(len(bns_signal))  # Create a Hann window using numpy
    bns_signal = bns_signal * hann_window  # Apply the taper
    
    # Normalize signal to 20% max value
    bns_signal = (bns_signal / (max(bns_signal.max(), np.abs(bns_signal.min())))) * 0.2

    # Generate noise
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)
    
    noise *= 1e21
    noise *= 0.4
    # Use the noise directly (it's already a TimeSeries)

    # Inject signal into noise
    data = noise.inject(bns_signal)
    data *= 1e-17

    # Save merged BNS + noise signal as HDF5
    output_signal_path = os.path.join(merged_signal_dir, f"bns_merged_noise_signal_{i}.hdf")
    
    # Check if the dataset already exists, if it does, overwrite it or create it.
    with h5py.File(output_signal_path, 'a') as f:
        if 'data' in f:
            del f['data']  # Delete the existing dataset if needed
        f.create_dataset('data', data=data.numpy(), compression='gzip', compression_opts=9, shuffle=True)

# Save target parameters as CSV
output_csv_path = os.path.join(base_dir, f"Final_BNS_Merged_Noise_Signal_Targets_{no_of_params}_parameters.csv")
np.savetxt(output_csv_path, bns_data_targets, delimiter=",")

print(f"✅ Successfully saved {len(bns_two_mass_samples)} BNS merged noise signals.")
print(f"✅ Target CSV saved at: {output_csv_path}")

100%|██████████| 5000/5000 [30:30<00:00,  2.73it/s]

✅ Successfully saved 5000 BNS merged noise signals.
✅ Target CSV saved at: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/LIGO-Detector-Data/Marginal-events/Final_BNS_Merged_Noise_Signal_Targets_2_parameters.csv





In [None]:
# Merging Noise + Signal Templates into single csv file

import os
import csv
import numpy as np
import h5py  # ✅ Correct library for reading HDF files
from tqdm import tqdm
import pandas as pd

# Define base directory
base_dir = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/LIGO-Detector-Data/Marginal-events/"
merged_signal_dir = os.path.join(base_dir, "merged_bns_noise_signal")
output_csv_path = os.path.join(base_dir, f"Final_BNS_Merged_Noise_Signal_Reduced_No_ABS_{no_of_params}_parameters.csv")

# Ensure we only process valid HDF files
files = [f for f in os.listdir(merged_signal_dir) if f.endswith(".hdf")]

# Open CSV file for writing
with open(output_csv_path, 'w', newline='') as f:
    cw = csv.writer(f)

    # Process each HDF file
    for file_name in tqdm(files, desc="Processing BNS Noise + Signal Files"):
        file_path = os.path.join(merged_signal_dir, file_name)

        try:
            # ✅ Use h5py to read HDF5 data
            with h5py.File(file_path, 'r') as hdf:
                dataset_keys = list(hdf.keys())

                if not dataset_keys:
                    raise ValueError(f"❌ ERROR: {file_name} contains no datasets!")

                # ✅ Select the first available dataset
                dataset_name = dataset_keys[0]  
                data = hdf[dataset_name][:]

                # Convert to DataFrame
                df = pd.DataFrame(data)

                # **Force Two-Column Extraction** to avoid skipping
                if df.shape[1] == 1:
                    df["Filler_Column"] = 0  # Adds a second column with zeros

                c = df.iloc[:, 1]  # Always extract second column
                cw.writerow(c)  # ✅ Write to CSV without skipping

        except Exception as e:
            print(f"❌ Error processing {file_name}: {e}")

print(f"✅ Merging complete. Output saved to: {output_csv_path}")

Processing BNS Noise + Signal Files: 100%|██████████| 5000/5000 [00:14<00:00, 340.40it/s]

✅ Merging complete. Output saved to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/LIGO-Detector-Data/Marginal-events/Final_BNS_Merged_Noise_Signal_Reduced_No_ABS_2_parameters.csv





In [11]:
##############################################
########### Noise Data Generation ############
##############################################

In [None]:
import os
import numpy as np
from tqdm import tqdm
import pycbc.noise
import pycbc.psd
from pycbc.types import TimeSeries

# Set base directory for noise storage
base_dir = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/noise/"
os.makedirs(base_dir, exist_ok=True)  # Ensure directory exists

for i in tqdm(range(len(bbh_two_mass_samples))):

    # ✅ Generate noise PSD
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    # ✅ Generate 4 seconds of noise at 4096 Hz
    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise_array = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)

    # ✅ Directly create `TimeSeries` (Fixes `from_pycbc` issue)
    noise = TimeSeries(noise_array, delta_t=delta_t)

    # ✅ Scale noise
    noise *= 1e21
    noise *= 0.4
    noise *= 1e-17  # Apply additional scaling

    # ✅ Save noise data
    output_path = os.path.join(base_dir, f"noise_{i}.txt")
    np.savetxt(output_path, noise.numpy())  # ✅ Save as a text file

print(f"✅ Successfully generated and saved {len(bbh_two_mass_samples)} noise files.")

100%|██████████| 5000/5000 [03:22<00:00, 24.74it/s]

✅ Successfully generated and saved 5000 noise files.





In [None]:
# Merging Noise Templates into single csv file


import os
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm

# ✅ Define `no_of_params`
no_of_params = 2  # Adjust based on dataset

# ✅ Define directories
base_dir = " Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/"
noise_dir = os.path.join(base_dir, "noise")
output_csv = os.path.join(base_dir, f"Final_Merged_Noise_Reduced_No_ABS_{no_of_params}_parameters.csv")

# ✅ Ensure noise directory exists
if not os.path.exists(noise_dir):
    raise FileNotFoundError(f"❌ Error: Directory '{noise_dir}' not found. Ensure noise files exist!")

# ✅ Get list of valid noise files (exclude system files like .DS_Store)
files = [f for f in os.listdir(noise_dir) if f.endswith('.txt')]

# ✅ Function to regenerate all noise files before processing
def regenerate_noise_files():
    """
    This function will regenerate ALL noise files in one go, 
    so we don't have to fix them mid-processing.
    """
    print("🔄 Regenerating ALL noise files to ensure correct formatting...")

    for file in tqdm(files, desc="Regenerating Noise Files"):
        file_path = os.path.join(noise_dir, file)

        #  Generate correctly formatted noise data
        tsamples = int(4 / (1.0 / 4096))  # 4 seconds of data at 4096 Hz
        new_noise = np.column_stack((
            np.linspace(0, tsamples-1, tsamples),  # Time index
            np.random.normal(0, 1e-22, tsamples)  # Noise signal
        ))

        #  Write fixed file
        np.savetxt(file_path, new_noise, fmt="%.6e")

    print(" All noise files have been regenerated and are now correctly formatted.")

#  Run regeneration process to fix all noise files before processing
regenerate_noise_files()

# Open output CSV file
with open(output_csv, 'w', newline='') as f:
    cw = csv.writer(f)

    for file in tqdm(files, desc="Processing Noise Files"):
        file_path = os.path.join(noise_dir, file)

        try:
            df = pd.read_csv(file_path, sep=r'\s+', header=None, engine='python')

            # Convert to numeric format
            df = df.apply(pd.to_numeric, errors='coerce')

            # Write the second column
            c = df.iloc[:, 1]  # Select second column
            cw.writerow(c.dropna())  # Remove NaN values before writing

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

print(f"✅ Successfully saved merged noise data to: {output_csv}")

🔄 Regenerating ALL noise files to ensure correct formatting...


Regenerating Noise Files: 100%|██████████| 5000/5000 [01:13<00:00, 67.82it/s]


 All noise files have been regenerated and are now correctly formatted.


Processing Noise Files: 100%|██████████| 5000/5000 [03:12<00:00, 25.93it/s]

✅ Successfully saved merged noise data to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/Final_Merged_Noise_Reduced_No_ABS_2_parameters.csv





In [14]:
##################################################################################################################################
##################################################################################################################################