# Preparing Noise dataset

In this article, we will generate baseline ("noise") epochs and concatenate them
with event epochs we generated previously and generate `X` and `y`, which will
be the datasets for our machine learning pipeline.

In [195]:
import importlib
import pickle
import re

from pathlib import Path

import numpy as np
import pandas as pd

from gwpy.timeseries import TimeSeries

In [196]:
# Local imports
import utils

import conf

importlib.reload(conf)
importlib.reload(utils)

pass

# Extracting epochs from event strains

We utilize the same strain files for catalouged events and extract epochs
non-overlapping with `[event_timestamp - dT, event_timestamp + dT]`.

In [221]:
catalog = pd.read_csv(conf.DATA_DIR / "catalog.csv")

In [222]:
noise_epochs = []

In [223]:
for i, row in catalog.iterrows():
  print(f"Processing noise {i}/{len(catalog)}")
  
  h1f, l1f = Path(row["h1_strain"]), Path(row["l1_strain"])
  
  h1s = TimeSeries.read(h1f, format='hdf5.losc')
  l1s = TimeSeries.read(l1f, format='hdf5.losc')
  
  t0 = utils.get_file_timestamp(h1f)
  
  event_ts = row["timestamp"]
  
  while True:
    t = t0 + np.random.randint(conf.dT, 4096 - conf.dT)
    start, end = t - conf.dT, t + conf.dT  

    h1_ts = h1s.crop(start, end, copy=True)
    l1_ts = l1s.crop(start, end, copy=True)
    
    if not ((end < t0 - conf.dT) or (start > t0 + conf.dT)):
      print("Baseline overlaps with event. Recreating...")
    if (np.isnan(h1_ts).any() or np.isnan(l1_ts).any()):
      print("Detected NaNs. Recreating...")
    else:
      break

  del h1s, l1s

  noise_epochs.append([h1_ts, l1_ts])

Processing noise 0/88
Processing noise 1/88
Processing noise 2/88
Detected NaNs. Recreating...
Processing noise 3/88
Processing noise 4/88
Processing noise 5/88
Processing noise 6/88
Processing noise 7/88
Processing noise 8/88
Processing noise 9/88
Processing noise 10/88
Processing noise 11/88
Processing noise 12/88
Processing noise 13/88
Processing noise 14/88
Detected NaNs. Recreating...
Processing noise 15/88
Processing noise 16/88
Processing noise 17/88
Detected NaNs. Recreating...
Processing noise 18/88
Processing noise 19/88
Processing noise 20/88
Processing noise 21/88
Processing noise 22/88
Processing noise 23/88
Processing noise 24/88
Detected NaNs. Recreating...
Processing noise 25/88
Processing noise 26/88
Processing noise 27/88
Processing noise 28/88
Processing noise 29/88
Processing noise 30/88
Processing noise 31/88
Processing noise 32/88
Processing noise 33/88
Processing noise 34/88
Processing noise 35/88
Detected NaNs. Recreating...
Processing noise 36/88
Processing noi

In [224]:
with (conf.DATA_DIR / "noises.npy").open("wb") as f:
  pickle.dump(noise_epochs, f)

# Merging event and noise datasets

In [225]:
with (conf.DATA_DIR / "events.npy").open("rb") as f:
  event_epochs = pickle.load(f)

In [226]:
X = epochs = event_epochs + noise_epochs

len(event_epochs), len(noise_epochs), len(X)

(88, 88, 176)

In [227]:
with (conf.DATA_DIR / "X.npy").open("wb") as f:
  pickle.dump(X, f)

In [228]:
y = np.array([1] * len(event_epochs) + [0] * len(noise_epochs))

len(y)

176

In [229]:
with (conf.DATA_DIR / "y.npy").open("wb") as f:
  pickle.dump(y, f)