In [8]:
import mne
import numpy as np
import glob
import os
import random
from utils.dataloader import read_edf_to_raw, calculate_neg_pos

In [9]:
seizures_dict = {"chb01_03": [[2996, 3036]],
                "chb01_04": [[1467, 1494]],
                "chb01_15": [[1732, 1772]],
                "chb01_16": [[1015, 1066]],
                "chb01_18": [[1720, 1810]],
                "chb01_21": [[327, 420]],
                "chb01_26": [[1862, 1963]],
                
                "chb02_16": [[130, 212]],
                "chb02_16+": [[2972, 3053]],
                "chb02_19": [[3369, 3378]],

                "chb03_01": [[362, 414]],
                "chb03_02": [[731, 796]],
                "chb03_03": [[432, 501]],
                "chb03_04": [[2162, 2214]],
                "chb03_34": [[1982, 2029]],
                "chb03_35": [[2592, 2656]],
                "chb03_36": [[1725, 1778]],

                "chb05_06": [[417, 532]], 
                "chb05_13": [[1086, 1196]],
                "chb05_16": [[2317, 2413]], 
                "chb05_17": [[2451, 2571]],
                "chb05_22": [[2348, 2465]],
                
                "chb06_01": [[1724, 1738], [7461, 7476], [13525, 13540]],
                "chb06_04": [[327, 347], [6211, 6231]],
                "chb06_09": [[12500, 12516]],
                "chb06_10": [[10833, 10845]],
                "chb06_13": [[506, 519]],
                "chb06_18": [[7799, 7811]],
                "chb06_24": [[9387, 9403]],

                "chb07_12": [[4920, 5006]],
                "chb07_13": [[3285, 3381]],
                "chb07_19": [[13688, 13831]],

                "chb08_02": [[2670, 2841]], 
                "chb08_05": [[2856, 3046]],
                "chb08_11": [[2988, 3211]], 
                "chb08_13": [[2417, 2577]],
                "chb08_21": [[2083, 2347]],
                
                "chb10_12": [[6313, 6348]],
                "chb10_20": [[6888, 6958]],
                "chb10_27": [[2382, 2447]],
                "chb10_30": [[3021, 3079]],
                "chb10_31": [[3801, 3877]],
                "chb10_38": [[4618, 4707]],
                "chb10_89": [[1383, 1437]],

                "chb23_06": [[3962, 4075]],
                "chb23_08": [[325, 345], [325, 345]],
                "chb23_09": [[2589, 2660],[6885, 6947], [8505, 8532], [9580, 9664]],

                "chb24_01": [[480, 505],[2451,2476]],
                "chb24_03": [[231, 260],[2883,2908]],
                "chb24_04": [[1088, 1120],[1411,1438],[1745, 1764]],
                "chb24_06": [[1229, 1253]],
                "chb24_07": [[38, 60]],
                "chb24_09": [[1745, 1764]],
                "chb24_11": [[3527, 3597]],
                "chb24_13": [[3288, 3304]],
                "chb24_14": [[1939, 1966]],
                "chb24_15": [[3552, 3569]],
                "chb24_17": [[3515, 3581]],
                "chb24_21": [[2804, 2872]],
                }

## Balanced Dataset by Undersampling

In [15]:
def calculate_neg_pos(y):
    unique, counts = np.unique(y, return_counts=True)
    dic = dict(zip(unique, counts))
    neg = dic[0]
    pos = dic[1]
    return neg, pos

def custom_fit_resample(x, y, rate_p=0.5):

    neg, pos = calculate_neg_pos(y)
    new_neg = (1-rate_p)/rate_p*(pos)

    while neg > new_neg:
        indice = random.randint(0, y.shape[0]-1)
        if y[indice] == 0:
            y = np.delete(y, indice, 0)
            x = np.delete(x, indice, 0)
            neg, pos = calculate_neg_pos(y)
    
    return x,y

In [11]:
################ INPUT YOUR DATASET PATH HERE
# DATASET_PATH = os.path.join(os.getcwd(), 'data') #ONLY FOR TESTING
DATASET_PATH = "/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0"
NEW_DATASET_PATH = "/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit-segments/3"


selected_files = dict()
for key in seizures_dict.keys():
    folder, fn = key.split('_')
    if folder in selected_files.keys():
        selected_files[folder].append(fn)
    else:
        selected_files[folder] = [fn]
    

In [12]:
selected_files

{'chb01': ['03', '04', '15', '16', '18', '21', '26'],
 'chb02': ['16', '16+', '19'],
 'chb03': ['01', '02', '03', '04', '34', '35', '36'],
 'chb05': ['06', '13', '16', '17', '22'],
 'chb06': ['01', '04', '09', '10', '13', '18', '24'],
 'chb07': ['12', '13', '19'],
 'chb08': ['02', '05', '11', '13', '21'],
 'chb10': ['12', '20', '27', '30', '31', '38', '89'],
 'chb23': ['06', '08', '09'],
 'chb24': ['01',
  '03',
  '04',
  '06',
  '07',
  '09',
  '11',
  '13',
  '14',
  '15',
  '17',
  '21']}

In [16]:
################ CHOOSE THE FILES THAT YOU WANT HERE
# selected_files = {
#                 'chb01': ['03','04','15','16','18','21','26'],
#                 'chb02': ['16'],
#                 'chb05': ['06', '13', '16', '17', '22'],
#                 'chb08': ['02','05','11','13','21']
#                 }

files = [os.path.join(DATASET_PATH, folder, f"{folder}_{fn}.edf") for folder, fn_list in selected_files.items() for fn in fn_list]

for file_path in files:
    print(file_path)

# parameters for epochs generation
epoch_time = 50
overlap = 40

# for each patient
for folder, fn_list in selected_files.items():

    # signals and labels for each patient
    signals = []
    labels = []
    # for each file of specific patitent
    for fn in fn_list:
        file_path = os.path.join(DATASET_PATH, folder, f"{folder}_{fn}.edf")
        # get filename: chbxx_xx.edf
        filename = os.path.split(file_path)[1]
        # remove .edf staying only chbxx_xx
        filename = os.path.splitext(filename)[0]
        
        raw = read_edf_to_raw(file_path)
        
        # start 10 epochs before the crises
        curr_time = seizures_dict[filename][0][0] - 100
        
        # Divide into epochs
        while curr_time <= max(raw.times) - epoch_time:

            # calculate window and get data to epoch array
            start_time = curr_time 
            if start_time < 0.:
                start_time = 0.
            end_time = curr_time + epoch_time

            # seizure flag for y
            aux = []
            if filename in seizures_dict:  # if file has seizure
                for seizure in seizures_dict[filename]:
                    if start_time > seizure[0] and start_time < seizure[1]:
                        aux.append(1)
                    if start_time + epoch_time > seizure[0] and start_time + epoch_time < seizure[1]:
                        aux.append(1)
                    if start_time < seizure[0] and end_time > seizure[0]:
                        aux.append(1)
                    if start_time < seizure[0] and end_time > seizure[1]:
                        aux.append(1)
                    else:
                        aux.append(0)
            else:    
                aux.append(0)
            
            # if the current time is inside at least one seizure interval -> 1; otherwise -> 0
            if 1 in aux:
                labels.append([1])
            else:
                labels.append([0])

            start, stop = raw.time_as_index([start_time, end_time])
            epoch = raw[:, start:stop][0]
            signals.append(epoch)

            # calculate next current time
            curr_time = curr_time + epoch_time - overlap
            # print("Section ", str(len(signals)), "; start: ", start, " ; stop: ", stop, "\r")
        
        # next iteration
        print(f"Finished labeling file {filename}")
    
        # fit and apply the transform
        x_over, y_over = custom_fit_resample(np.array(signals), np.array(labels))

        new_filename = filename + "_segments.npz"
        new_file_path = os.path.join(NEW_DATASET_PATH, new_filename)

        print(calculate_neg_pos(y_over))

        print(f"Saving file {new_filename}...")
        np.savez(new_file_path, x_over, y_over)
        
signals = np.array(signals)
labels = np.array(labels)

/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_03.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_04.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_15.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_16.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_18.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_21.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_26.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb02/chb02_16.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-scalp-eeg-database-1.0.0/chb02/chb02_16+.edf
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit/chb-mit-sca

  raw = mne.io.read_raw_edf(file_path, preload=True, verbose=False)


Finished labeling file chb01_03
(7, 7)
Saving file chb01_03_segments.npz...


  raw = mne.io.read_raw_edf(file_path, preload=True, verbose=False)


Finished labeling file chb01_04


In [None]:
print(calculate_neg_pos(y_over))

(2, 1)


## Counting features

In [None]:
def read_npz(file):
    arrays = np.load(file)
    x = arrays['arr_0']
    y = arrays['arr_1']
    return x, y

In [None]:
train_files = glob.glob(os.path.join(NEW_DATASET_PATH,"*.npz"))
# test_files = glob.glob(os.path.join(NEW_DATASET_PATH,'test',"*.npz"))

negatives = 0
positives = 0
for file in train_files:
    print(file)
    _, y = read_npz(file)

    unique, counts = np.unique(y, return_counts=True)
    dic = dict(zip(unique, counts))
    negatives += dic[0]
    try:
        positives += dic[1]
    except:
        pass

print(positives, negatives)

/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit-segments/2/chb01_segments.npz
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit-segments/2/chb02_segments.npz
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit-segments/2/chb03_segments.npz
/media/guisoares/guisoares-ext-hdd/Datasets/chb-mit-segments/2/chb05_segments.npz
38 87


In [None]:
dic

NameError: name 'dic' is not defined