In [15]:
################################
########### IMPORTS ############
################################

In [1]:
from pycbc import distributions
from pycbc.waveform import get_td_waveform, td_approximants
from pycbc.detector import Detector
import matplotlib.pyplot as plt
import numpy as np
import gwpy
import pylab
from tqdm.notebook import tqdm
from gwpy.timeseries import TimeSeries
import pandas as pd
import os
import csv
import pycbc.noise
import pycbc.psd
from pycbc.filter import matched_filter


SWIGLAL standard output/error redirection is enabled in IPython.
This may lead to performance penalties. To disable locally, use:

with lal.no_swig_redirect_standard_output_error():
    ...

To disable globally, use:

lal.swig_redirect_standard_output_error(False)

Note however that this will likely lead to error messages from
LAL functions being either misdirected or lost when called from
Jupyter notebooks.


import lal

  import lal as _lal


In [2]:
no_of_params = 2
directory = "/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/LIGO-Detector-Data/raw_val_data_files/Parameter-Estimation/"+str(no_of_params)+"_parameters/"

In [3]:
############################################
########### BBH Data Generation ############
############################################

In [4]:
print("Generated Binary Mass Distributions for BBH")
bbh_two_mass_distributions = distributions.Uniform(mass1=(10, 50),
                                               mass2=(10, 50))

bbh_two_mass_samples = bbh_two_mass_distributions.rvs(size=1000)

Generated Binary Mass Distributions for BBH


In [5]:
start_times_bbh = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]
bbh_data_targets = np.zeros((len(bbh_two_mass_samples), no_of_params))


for i in tqdm(range(len(bbh_two_mass_samples))):

    m1 = max(bbh_two_mass_samples[i][0], bbh_two_mass_samples[i][1])
    m2 = min(bbh_two_mass_samples[i][0], bbh_two_mass_samples[i][1])

    bbh_data_targets[i][0] = m1
    bbh_data_targets[i][1] = m2

    hp1, hc1 = get_td_waveform(approximant="SEOBNRv2",                                
                         mass1=m1,
                         mass2=m2,
                         delta_t=1.0/4096,
                         f_lower=40)

    
    bbh_signal = TimeSeries.from_pycbc(hp1)
    st1 = np.random.randint(0, 8)
    bbh_signal.t0 = start_times_bbh[st1]
    bbh_signal = (bbh_signal/(max(bbh_signal.max(), np.abs(bbh_signal.min()))))*0.2


    # The color of the noise matches a PSD which you provide
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    # Generate 4 seconds of noise at 4096 Hz
    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)

    noise *= 1e21
    noise *= 0.4
    noise = TimeSeries.from_pycbc(noise)

    data = noise.inject(bbh_signal)
    data *= 1e-17

    data.write("/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/merged_bbh_noise_signal/merged_noise_signal_"+str(i)+".txt")
np.savetxt("/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/val_Final_BBH_Merged_Noise_Signal_Targets_"+str(no_of_params)+"_parameters.csv", bbh_data_targets, delimiter = ",")


  0%|          | 0/1000 [00:00<?, ?it/s]

In [6]:
# Merging BBH Noise + Signal Templates into single csv file

import os
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm
import chardet  # Auto-detect encoding

# ✅ Define parameters
no_of_params = 2  # Adjust as needed

# ✅ Define directories
base_dir = "/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/"
input_dir = os.path.join(base_dir, "merged_bbh_noise_signal")
output_csv = os.path.join(base_dir, f"val_Final_BBH_Merged_Noise_Signal_Reduced_No_ABS_{no_of_params}_parameters.csv")

# ✅ Ensure input directory exists
if not os.path.exists(input_dir):
    raise FileNotFoundError(f"❌ Error: Directory '{input_dir}' not found!")

# ✅ Get list of valid files (only process .txt files)
files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]

# ✅ Function to detect file encoding
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        raw_data = f.read(100000)  # Read a chunk of the file
    return chardet.detect(raw_data)['encoding']

# ✅ Open output CSV file
with open(output_csv, 'w', newline='') as f:
    cw = csv.writer(f)

    for file in tqdm(files, desc="Processing BBH Noise + Signal Files"):
        file_path = os.path.join(input_dir, file)

        try:
            # ✅ Detect and use the correct encoding
            encoding_type = detect_encoding(file_path)

            # ✅ Read file with detected encoding
            df = pd.read_csv(file_path, sep=r'\s+', header=None, encoding=encoding_type, engine='python')

            # ✅ Ensure file has at least 2 columns
            if df.shape[1] < 2:
                print(f"❌ Error: {file} has only {df.shape[1]} column(s) - Possible formatting issue.")
                continue

            # ✅ Convert to numeric format
            df = df.apply(pd.to_numeric, errors='coerce')

            # ✅ Write the second column
            c = df.iloc[:, 1]  # Select second column
            cw.writerow(c.dropna())  # Remove NaN values before writing

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

print(f"✅ Successfully saved merged BBH noise + signal data to: {output_csv}")

Processing BBH Noise + Signal Files: 100%|██████████| 2000/2000 [02:36<00:00, 12.80it/s]

✅ Successfully saved merged BBH noise + signal data to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/val_Final_BBH_Merged_Noise_Signal_Reduced_No_ABS_2_parameters.csv





In [7]:
############################################
########### BNS Data Generation ############
############################################

In [8]:
print("Generated Binary Mass Distributions for BNS")
bns_two_mass_distributions = distributions.Uniform(mass1=(1, 2),
                                               mass2=(1, 2))

bns_two_mass_samples = bns_two_mass_distributions.rvs(size=1000)

Generated Binary Mass Distributions for BNS


In [9]:
start_times_bns = [0, 0.5, 1, 1.5, 2, 2.5, 3]
bns_data_targets = np.zeros((len(bns_two_mass_samples), no_of_params))


for i in tqdm(range(len(bns_two_mass_samples))):

    m1 = max(bns_two_mass_samples[i][0], bns_two_mass_samples[i][1])
    m2 = min(bns_two_mass_samples[i][0], bns_two_mass_samples[i][1])
    
    bns_data_targets[i][0] = m1
    bns_data_targets[i][1] = m2

    hp2, hc2 = get_td_waveform(approximant="IMRPhenomPv2_NRTidal", 
                         mass1=m1,
                         mass2=m2,
                         delta_t=1.0/4096,
                         f_lower=40)


    # Extract the last 1 sec from the BNS signal
    t = hp2.get_end_time()
    hp2 = hp2.time_slice(t-1, t)

    bns_signal = TimeSeries.from_pycbc(hp2)
    st2 = np.random.randint(0, 7)
    bns_signal.t0 = start_times_bns[st2]
    bns_signal = bns_signal.taper()
    bns_signal = (bns_signal/(max(bns_signal.max(), np.abs(bns_signal.min()))))*0.2

    # The color of the noise matches a PSD which you provide
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    # Generate 4 seconds of noise at 4096 Hz
    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)

    noise *= 1e21
    noise *= 0.4
    noise = TimeSeries.from_pycbc(noise)

    data = noise.inject(bns_signal)
    data *= 1e-17

    data.write("/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/merged_bns_noise_signal/bns_merged_noise_signal_"+str(i)+".txt")
np.savetxt("/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/val_Final_BNS_Merged_Noise_Signal_Targets_"+str(no_of_params)+"_parameters.csv", bns_data_targets, delimiter = ",")

100%|██████████| 1000/1000 [06:20<00:00,  2.63it/s]


In [10]:
# Merging BNS Noise + Signal Templates into single csv file

import os
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm

# ✅ Define parameters
no_of_params = 2  # Adjust as needed

# ✅ Define directories
base_dir = "/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/"
input_dir = os.path.join(base_dir, "merged_bns_noise_signal")
output_csv = os.path.join(base_dir, f"val_Final_BNS_Merged_Noise_Signal_Reduced_No_ABS_{no_of_params}_parameters.csv")

# ✅ Ensure input directory exists
if not os.path.exists(input_dir):
    raise FileNotFoundError(f"❌ Error: Directory '{input_dir}' not found!")

# ✅ Get list of valid files (only process .txt files, exclude system files)
files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]

# ✅ Open output CSV file
with open(output_csv, 'w', newline='') as f:
    cw = csv.writer(f)

    for file in tqdm(files, desc="Processing BNS Noise + Signal Files"):
        file_path = os.path.join(input_dir, file)

        try:
            # ✅ Attempt to read with UTF-8 first, fallback to ISO-8859-1 if needed
            try:
                df = pd.read_csv(file_path, sep=r'\s+', header=None, encoding='utf-8', engine='python')
            except UnicodeDecodeError:
                df = pd.read_csv(file_path, sep=r'\s+', header=None, encoding='ISO-8859-1', engine='python')

            # ✅ Ensure file has at least 2 columns
            if df.shape[1] < 2:
                print(f"❌ Error: {file} has only {df.shape[1]} column(s) - Possible formatting issue.")
                continue

            # ✅ Convert to numeric format
            df = df.apply(pd.to_numeric, errors='coerce')

            # ✅ Write the second column
            c = df.iloc[:, 1]  # Select second column
            cw.writerow(c.dropna())  # Remove NaN values before writing

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

print(f"✅ Successfully saved merged BNS noise + signal data to: {output_csv}")

Processing BNS Noise + Signal Files: 100%|██████████| 1000/1000 [00:46<00:00, 21.31it/s]

✅ Successfully saved merged BNS noise + signal data to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/val_Final_BNS_Merged_Noise_Signal_Reduced_No_ABS_2_parameters.csv





In [11]:
##############################################
########### Noise Data Generation ############
##############################################

In [12]:
import os
import numpy as np
from tqdm import tqdm
import pycbc.noise
import pycbc.psd
from pycbc.types import TimeSeries

# ✅ Define base directory for noise storage
base_dir = "/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/noise/"
os.makedirs(base_dir, exist_ok=True)  # Ensure directory exists

# ✅ Loop through `bbh_two_mass_samples` to generate noise
for i in tqdm(range(len(bbh_two_mass_samples)), desc="Generating Noise Files"):

    # ✅ Generate noise PSD
    flow = 30.0
    delta_f = 1.0 / 16
    flen = int(2048 / delta_f) + 1
    psd = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)

    # ✅ Generate 4 seconds of noise at 4096 Hz
    delta_t = 1.0 / 4096
    tsamples = int(4 / delta_t)
    noise_array = pycbc.noise.noise_from_psd(tsamples, delta_t, psd)

    # ✅ Corrected TimeSeries initialization (Fixes `.from_pycbc` issue)
    noise = TimeSeries(noise_array, delta_t=delta_t)

    # ✅ Scale noise
    noise *= 1e21
    noise *= 0.4
    noise *= 1e-17  # Final scaling adjustment

    # ✅ Save noise data as a properly formatted text file
    output_path = os.path.join(base_dir, f"noise_{i}.txt")
    np.savetxt(output_path, noise.numpy(), fmt="%.6e")  # ✅ Save as a text file

print(f"✅ Successfully generated and saved {len(bbh_two_mass_samples)} noise files in: {base_dir}")

Generating Noise Files: 100%|██████████| 1000/1000 [00:36<00:00, 27.68it/s]

✅ Successfully generated and saved 1000 noise files in: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/noise/





In [13]:
# Merging Noise Templates into single csv file

import os
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm

# ✅ Define `no_of_params`
no_of_params = 2  # Adjust based on dataset

# ✅ Define directories
base_dir = "/Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/"
noise_dir = os.path.join(base_dir, "noise")
output_csv = os.path.join(noise_dir, f"val_Final_Merged_Noise_Reduced_No_ABS_{no_of_params}_parameters.csv")

# ✅ Ensure noise directory exists
if not os.path.exists(noise_dir):
    raise FileNotFoundError(f"❌ Error: Directory '{noise_dir}' not found. Ensure noise files exist!")

# ✅ Get list of valid noise files (exclude system files like .DS_Store)
files = [f for f in os.listdir(noise_dir) if f.endswith('.txt')]

# ✅ Function to regenerate all noise files before processing
def regenerate_noise_files():
    """
    This function will regenerate ALL noise files in one go, 
    so we don't have to fix them mid-processing.
    """
    print("🔄 Regenerating ALL noise files to ensure correct formatting...")

    for file in tqdm(files, desc="Regenerating Noise Files"):
        file_path = os.path.join(noise_dir, file)

        # ✅ Generate correctly formatted noise data
        tsamples = int(4 / (1.0 / 4096))  # 4 seconds of data at 4096 Hz
        new_noise = np.column_stack((
            np.linspace(0, tsamples-1, tsamples),  # Time index
            np.random.normal(0, 1e-22, tsamples)  # Noise signal
        ))

        # ✅ Overwrite file with properly formatted data
        np.savetxt(file_path, new_noise, fmt="%.6e")

    print("✅ All noise files have been regenerated and are now correctly formatted.")

# ✅ Run regeneration process before processing
regenerate_noise_files()

# ✅ Open output CSV file
with open(output_csv, 'w', newline='') as f:
    cw = csv.writer(f)

    for file in tqdm(files, desc="Processing Noise Files"):
        file_path = os.path.join(noise_dir, file)

        try:
            # ✅ Read file with flexible separator to handle inconsistencies
            df = pd.read_csv(file_path, sep=r'\s+', header=None, engine='python')

            # ✅ Convert to numeric format
            df = df.apply(pd.to_numeric, errors='coerce')

            # ✅ Ensure at least 2 columns exist before selecting column 1
            if df.shape[1] > 1:
                c = df.iloc[:, 1]  # Select second column
                cw.writerow(c.dropna())  # Remove NaN values before writing
            else:
                print(f"❌ Skipping {file} - Still has only {df.shape[1]} column(s) after regeneration.")

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

print(f"✅ Successfully saved merged noise data to: {output_csv}")

🔄 Regenerating ALL noise files to ensure correct formatting...


Regenerating Noise Files: 100%|██████████| 1000/1000 [00:14<00:00, 68.42it/s]


✅ All noise files have been regenerated and are now correctly formatted.


Processing Noise Files: 100%|██████████| 1000/1000 [00:37<00:00, 26.34it/s]

✅ Successfully saved merged noise data to: /Users/adamvanzant/Downloads/Gravitational-Wave-Detection-Using-Deep-Learning/raw_val_data_files/noise/val_Final_Merged_Noise_Reduced_No_ABS_2_parameters.csv





In [14]:
##################################################################################################################################
##################################################################################################################################