In [1]:
import os
import mne
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

In [14]:
# Set directory of raw data (use data in sleep-cassette folder) and pre-processed data
rawDir = "./data/sleep-edf-database-expanded-1.0.0/sleep-cassette"
dest_path = "./data/preprocessed/all_data.pkl"

# Match each psg file with corresponding hypnogram (sleep stage annotation) file. We only use the 2nd night recording for each patient, which is identified by the '2'.
psg_hyp = []
for file in os.listdir(rawDir):
    if file.endswith("PSG.edf") and file[5] == '2':
        psgTemp = file
    elif file.endswith("Hypnogram.edf") and file[5] == '2':
        if psgTemp[0:5] == file[0:5]:
            psg_hyp.append((psgTemp, file))

rows = []
# psg_hyp = [('SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf')]    # For debugging with 1 patient


mapping = {'EOG horizontal': 'eog',
           'Resp oro-nasal': 'resp',
           'EMG submental': 'emg',
           'Temp rectal': 'misc',
           'Event marker': 'misc'}

annotation_desc_2_event_id = {'Sleep stage W': 0,
                              'Sleep stage 1': 1,
                              'Sleep stage 2': 2,
                              'Sleep stage 3': 3,
                              'Sleep stage 4': 3,
                              'Sleep stage R': 4}

event_id = {'Sleep stage W': 0,
        'Sleep stage 1': 1,
        'Sleep stage 2': 2,
        'Sleep stage 3/4': 3,
        'Sleep stage R': 4}


all_epochs = []
Npatients = len(psg_hyp)

# based on specs in referenced paper in proposal
epoch_duration = 60.0

for i, (psgFile, hypFile) in enumerate(psg_hyp):
    pID = psgFile[3:5]    
    print(f"Patient {i:3d}/{Npatients}")
    # get our data into epochs
    raw = mne.io.read_raw_edf(os.path.join(rawDir, psgFile))
    annot = mne.read_annotations(os.path.join(rawDir, hypFile))
    raw.set_annotations(annot, emit_warning = False)
    raw.set_channel_types(mapping)
    # Double check if sampling freq is 100 Hz
    fs = int(raw.info['sfreq'])
    assert(fs == 100)
    assert(raw['EEG Fpz-Cz'][1][100] == 1)
    # remove head and tail of our recording 
    annot.crop(annot[1]['onset'] - 30 * 60, annot[-2]['onset'] + 30 * 60)
    raw.set_annotations(annot, emit_warning=False)
    # split into epochs
    events, _ = mne.events_from_annotations(raw, event_id=annotation_desc_2_event_id, chunk_duration=epoch_duration)
    tmax = epoch_duration - 1. / raw.info['sfreq']  # tmax in included
    try:
        # Currently the code breaks if the PSG file does not include at least one of each sleep stage (eg: for pID = 20, there is no sleep stage 3/4). I don't know how to fix this so we just ignore that file.
        epochs = mne.Epochs(raw=raw, events=events, tmin=0., tmax=tmax, baseline=None, picks = ['EEG Fpz-Cz'])
    except Exception as ex:
        print(ex)
    
    # push all our epochs into a list
    epochs.drop_bad()
    
    epochs_data = epochs.get_data("EEG Fpz-Cz")
    epochs_data = np.squeeze(epochs_data)
    epochs_y = epochs.events[:,-1].flatten()
    Nepochs = epochs_data.shape[0]
    
    for i in range(Nepochs):
        all_epochs.append((int(pID), epochs_y[i], epochs_data[i,:].flatten()))

Patient   0/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
516 matching events found
No baseline correction applied
0 projection items activated
Loading data for 516 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 516 events and 6000 original time points ...
Patient   1/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4012E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4',

Not setting metadata
Not setting metadata
436 matching events found
No baseline correction applied
0 projection items activated
Loading data for 436 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 436 events and 6000 original time points ...
Patient  13/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4142E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
436 matching events found
No baseline correction applied
0 projection items activated
Loading data for 436 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 436 events and 6000 original time points ...
Patient  14/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9

EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
462 matching events found
No baseline correction applied
0 projection items activated
Loading data for 462 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 462 events and 6000 original time points ...
Patient  26/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4272F0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
493 matching events found
No baseline correction applied
0 projection items activated
L

Patient  38/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4402E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
486 matching events found
No baseline correction applied
0 projection items activated
Loading data for 486 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 486 events and 6000 original time points ...
Patient  39/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4412E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4',

481 matching events found
No baseline correction applied
0 projection items activated
Loading data for 481 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 481 events and 6000 original time points ...
Patient  51/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4532E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
490 matching events found
No baseline correction applied
0 projection items activated
Loading data for 490 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 490 events and 6000 original time points ...
Patient  52/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sle

Not setting metadata
Not setting metadata
825 matching events found
No baseline correction applied
0 projection items activated
Loading data for 825 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 825 events and 6000 original time points ...
Patient  64/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9417-project-21t2\data\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4662E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
Not setting metadata
940 matching events found
No baseline correction applied
0 projection items activated
Loading data for 940 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 940 events and 6000 original time points ...
Patient  65/77
Extracting EDF parameters from C:\Users\acidi\Coding\comp9

Not setting metadata
Not setting metadata
634 matching events found
No baseline correction applied
0 projection items activated
Loading data for 634 events and 6000 original time points ...
0 bad epochs dropped
Loading data for 634 events and 6000 original time points ...


In [15]:
with open(dest_path, "wb+") as fp:
    pickle.dump(all_epochs, fp)