In [39]:
import pickle
import numpy as np
from scipy.signal import welch as psd_welch
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd

In [7]:
with open("./data/preprocessed/all_data.pkl", "rb") as fp:
    data = pickle.load(fp)

In [8]:
FREQ_BANDS = {
    "delta": [0.5, 4.0],
    "theta": [4.0, 8.0],
    "alpha": [8.0, 12.0],
    "sigma": [12.0, 16.0],
    "beta": [16.0, 30.0],
    "gamma": [30.0, 40.0]
}

Fs = 100
Ts = 1/Fs

In [27]:
def data_to_binned_psd(x, freq_bins=FREQ_BANDS.values()):
    freqs, x_psd = psd_welch(x, fs=Fs, return_onesided=True)
    bins = np.zeros(len(freq_bins))
    for i, (freq_start, freq_end) in enumerate(freq_bins):
        mask = (freqs >= freq_start) & (freqs <= freq_end)
        x_psd_bin = x_psd[mask]
        bins[i] = np.mean(x_psd_bin)
    return bins

In [36]:
# get frequency bins
Xpsd = []
Xpsd_rel = []
for _, _, x in tqdm(data):
    x_psd = data_to_binned_psd(x)
    x_psd_rel = x_psd / np.sum(x_psd)
    Xpsd.append(x_psd)
    Xpsd_rel.append(x_psd_rel)

100%|██████████████████████████████████████████████████████████████████████████| 97432/97432 [01:12<00:00, 1336.25it/s]


In [38]:
Nsamples = len(data)
data_export = []
for i in tqdm(range(Nsamples)):
    pid, y, _ = data[i]
    x_psd = Xpsd[i]
    entry = [pid, *x_psd, y]
    data_export.append(entry)

100%|████████████████████████████████████████████████████████████████████████| 97432/97432 [00:00<00:00, 264045.61it/s]


In [45]:
df = pd.DataFrame(data_export, columns=["patient", *[f"X{i}" for i in range(6)], "Y"])
df.to_csv("./data/preprocessed/data.csv")