In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
INPUT_DIR = "../input/g2net-gravitational-wave-detection"
train_df = pd.read_csv(f"{INPUT_DIR}/training_labels_paths.csv")
test_df = pd.read_csv(f"{INPUT_DIR}/test_paths.csv")
savedir = "../input/filtered_train"

In [27]:
import gwpy
from gwpy.timeseries import TimeSeries
from scipy.cluster.vq import whiten
from gwpy.signal import filter_design
from tqdm.notebook import tqdm

def save_files(rg_n, df=train_df):
    filts=[]
    for j in range(3):
        bp = filter_design.bandpass(50,250, 2048)
        if j == 2:
            notch_freqs = (50, 100, 150)
        else:
            notch_freqs = (60, 120, 180)
        notches = [filter_design.notch(line, 2048) for line in notch_freqs]
        zpk = filter_design.concatenate_zpks(bp, *notches)
        filts.append(zpk)

    for index in tqdm(rg_n):
        path, ident = df.iloc[index][["path", "id"]]
        ts_data = np.load(path).astype(np.float32)

        for i in range(3):
            measurement = ts_data[i]
            ts = TimeSeries(measurement)
            ts.sample_rate=2048

            zpk = filts[i]
            hfilt = ts.filter(zpk, filtfilt=True)
            hfilt = whiten(hfilt)
            ts_data[i] = np.array(hfilt)

        ts_data = np.transpose(ts_data, [1,0])
        ts_data /= abs(ts_data).max()
        np.save(f"../input/filtered/{ident}.npy", ts_data)

In [99]:
paths = glob(f"{savedir}/*")
test_df.shape
glob(f"{savedir}/{test_df.id[0]}*")

[]

In [102]:
print(len(paths))
train_df.shape[0]

560000


560000

In [4]:
def make_missing_files(df):
    if df.iloc[0].target == 0.5:
        savedir = "../input/filtered_test"
        print("test set")
    else:
        savedir = "../input/filtered_train"
        print("train set")
    paths = glob(f"{savedir}/*")
    print(f"Paths: {len(paths)}, Rows: {df.shape[0]}")
    if len(paths) == df.shape[0]:
        print("all files are present")
    else:
        missed_paths = []
        for i in tqdm(range(test_df.shape[0])):
            ident = test_df.iloc[i].id
            if not f"{savedir}\\{ident}.npy" in paths:
                missed_paths.append(i)
                print(f"missed {test_df.iloc[i].id}")
        save_files(missed_paths)

In [118]:
make_missing_files(train_df)
make_missing_files(test_df)

train set
Paths: 560000, Rows: 560000
all files are present
test set
Paths: 226000, Rows: 226000
all files are present


In [145]:
train_x = np.zeros((train_df.shape[0],4096, 3),dtype=np.float32)
train_y = np.zeros(train_df.shape[0], dtype=np.int)

In [146]:
train_df

Unnamed: 0,id,target,path
0,00000e74ad,1,../input/g2net-gravitational-wave-detection/tr...
1,00001f4945,0,../input/g2net-gravitational-wave-detection/tr...
2,0000661522,0,../input/g2net-gravitational-wave-detection/tr...
3,00007a006a,0,../input/g2net-gravitational-wave-detection/tr...
4,0000a38978,1,../input/g2net-gravitational-wave-detection/tr...
...,...,...,...
559995,ffff9a5645,1,../input/g2net-gravitational-wave-detection/tr...
559996,ffffab0c27,0,../input/g2net-gravitational-wave-detection/tr...
559997,ffffcf161a,1,../input/g2net-gravitational-wave-detection/tr...
559998,ffffd2c403,0,../input/g2net-gravitational-wave-detection/tr...


In [5]:
test_x = np.load(f"{INPUT_DIR}/test_x.npy")

In [16]:
test_x[-17:]

array([[[-0.06244404,  0.03822626,  0.02098865],
        [-0.32932013, -0.4944385 ,  0.01208142],
        [-0.49958375, -0.8705459 , -0.02977327],
        ...,
        [-0.41248697, -0.11056531,  0.40311646],
        [-0.28653347, -0.1056959 ,  0.22778332],
        [-0.05929917, -0.0802836 ,  0.03178055]],

       [[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       ...,

       [[ 0.        ,  0.        ,  0.        ],
        [ 0

In [29]:
save_files(, test_df)

  0%|          | 0/16 [00:00<?, ?it/s]

Unnamed: 0,id,target,path
0,00005bced6,0.5,../input/g2net-gravitational-wave-detection/te...
1,0000806717,0.5,../input/g2net-gravitational-wave-detection/te...
2,0000ef4fe1,0.5,../input/g2net-gravitational-wave-detection/te...
3,00020de251,0.5,../input/g2net-gravitational-wave-detection/te...
4,00024887b5,0.5,../input/g2net-gravitational-wave-detection/te...
...,...,...,...
225995,ffff4125f1,0.5,../input/g2net-gravitational-wave-detection/te...
225996,ffff9d32a6,0.5,../input/g2net-gravitational-wave-detection/te...
225997,ffff9f4c1f,0.5,../input/g2net-gravitational-wave-detection/te...
225998,ffffa19693,0.5,../input/g2net-gravitational-wave-detection/te...


In [45]:
i = 0
for ind in test_df.iloc[-16:].index:
    test_x[ind] = np.load(f"../input/filtered/{test_df.iloc[ind].id}.npy")


In [47]:
# np.savez(f"{INPUT_DIR}/dataset.npz", train_x, train_y, test_x)
# np.save(f"{INPUT_DIR}/train_x.npy", train_x)
# np.save(f"{INPUT_DIR}/train_y.npy", train_y)
np.save(f"{INPUT_DIR}/test_x.npy", test_x)

In [46]:
test_x

array([[[ 0.1246516 ,  0.05904157,  0.05949322],
        [ 0.18227789,  0.12275852,  0.22884452],
        [ 0.20864534,  0.17730013,  0.2786787 ],
        ...,
        [-0.04188983,  0.3825923 , -0.07423549],
        [ 0.04799047,  0.22818792, -0.05167435],
        [ 0.07813493,  0.03292836,  0.03669486]],

       [[-0.01073111,  0.18248162, -0.00180754],
        [ 0.22839126, -0.18469845, -0.02156874],
        [ 0.32300073, -0.45050958, -0.0229334 ],
        ...,
        [-0.06835276,  0.39612588, -0.30983418],
        [-0.04245393,  0.24756773, -0.15696232],
        [-0.01998452, -0.03911129,  0.04262162]],

       [[ 0.06984168,  0.00831802, -0.00564737],
        [ 0.11111585,  0.03480548,  0.15226942],
        [ 0.14903949,  0.06390033,  0.26502427],
        ...,
        [ 0.20059855,  0.48006037, -0.41830468],
        [ 0.11511093,  0.21778609, -0.25183508],
        [-0.00485693, -0.05394083, -0.10547303]],

       ...,

       [[-0.00893921,  0.05271262, -0.02456646],
        [ 0