In [1]:
import numpy as np
import pandas as pd
import pyedflib
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

from database import Database

In [2]:
root = "data/"
# For each patient
data = {}
num_patients = 2 # Max is 109
loop = tqdm(os.listdir(root)[:num_patients])
for patient in loop:
    # print(patient)
    # For each recording
    data[patient] = []
    for recording in os.listdir(root + patient):
        if recording.endswith(".event"):
            continue
        elif "R03" in recording:# or "R07" in recording or "R11" in recording:
            with pyedflib.EdfReader(root + patient + "/" + recording) as edf_file:
                # Number of signals in the file
                n = edf_file.signals_in_file

                # Names of the signals
                signal_labels = edf_file.getSignalLabels()

                # Preallocate a numpy array
                signal = np.zeros((n, edf_file.getNSamples()[0]))

                # Loop over each signal
                for i in range(n):
                    signal[i, :] = edf_file.readSignal(i)
                data[patient].append(signal)
    data[patient] = np.array(data[patient])
    # lis = []
    # for i in range(0, 20000, 640):
    #     lis.append(data[patient][:, :, i:i+640])
    # data[patient] = lis




100%|██████████| 2/2 [00:00<00:00, 15.46it/s]


In [3]:
for patient in data.keys():
    print(data[patient].shape)

(1, 64, 20000)
(1, 64, 19680)


In [4]:
p1df = pd.read_excel("P103.xlsx")
p2df = pd.read_excel("P203.xlsx")

p1df['onset'] = (p1df['onset'].str.replace(' sec', '').str.replace("'", '').astype(float) * 160.0).astype(int)
p1df['duration'] = (p1df['duration'].str.replace(' sec', '').str.replace("'", '').astype(float) * 160.0).astype(int)
p1df = p1df[p1df['anno'] == 'T2']
p2df['onset'] = (p2df['onset'].str.replace(' sec', '').str.replace("'", '').astype(float) * 160.0).astype(int)
p2df['duration'] = (p2df['duration'].str.replace(' sec', '').str.replace("'", '').astype(float) * 160.0).astype(int)
p2df = p2df[p2df['anno'] == 'T2']
p1df.head()

Unnamed: 0,onset,anno,duration
1,672,T2,656
7,4656,T2,656
9,5984,T2,656
15,9968,T2,656
19,12624,T2,656


In [5]:
p1_seqs = []
for row in p1df.iterrows():
    onset = row[1]['onset']
    duration = row[1]['duration']
    p1_seqs.append(data['S001'][:, :, onset:onset+duration].squeeze(0)[:, :640])

p2_seqs = []
for row in p2df.iterrows():
    onset = row[1]['onset']
    duration = row[1]['duration']
    p2_seqs.append(data['S002'][:, :, onset:onset+duration].squeeze(0)[:, :640])

shape = (64, 640)
for seq in p1_seqs + p2_seqs:
    assert seq.shape == shape, seq.shape


In [6]:
p1_ffts = [np.fft.fft(seq) for seq in p1_seqs]
p1_ffts = [fft[:, :320] for fft in p1_ffts]

p2_ffts = [np.fft.fft(seq) for seq in p2_seqs]
p2_ffts = [fft[:, :320] for fft in p2_ffts]

In [7]:
db = Database()
db.add('S001', p1_ffts[:6])
db.add('S002', p2_ffts[:6])


In [8]:
print(db.verify('S001', p1_ffts[-1]), db.verify('S001', p2_ffts[-1]))

True False
