### Load Raw EEG Dataset CHB-MIT

In [1]:
# Import the packages 
import mne
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn

In [2]:
#ِDataset patients Info based on paper "Shoeb (2010)"
patient_info = {
    'chb01': {'age': 11, 'gender': 'F'},
    'chb02': {'age': 11, 'gender': 'M'},
    'chb03': {'age': 14, 'gender': 'F'},
    'chb04': {'age': 22, 'gender': 'M'},
    'chb05': {'age': 7,  'gender': 'F'},
    'chb06': {'age': 1.5,'gender': 'F'},
    'chb07': {'age': 14.5,'gender': 'F'},
    'chb08': {'age': 3.5,'gender': 'M'},
    'chb09': {'age': 10, 'gender': 'F'},
    'chb10': {'age': 3,  'gender': 'M'},
    'chb11': {'age': 12, 'gender': 'F'},
    'chb12': {'age': 2,  'gender': 'F'},
    'chb13': {'age': 3,  'gender': 'F'},
    'chb14': {'age': 9,  'gender': 'M'},
    'chb15': {'age': 16, 'gender': 'M'},
    'chb16': {'age': 7,  'gender': 'F'},
    'chb17': {'age': 12, 'gender': 'F'},
    'chb18': {'age': 18, 'gender': 'F'},
    'chb19': {'age': 19, 'gender': 'F'},
    'chb20': {'age': 6,  'gender': 'F'},
    'chb21': {'age': 13, 'gender': 'F'},
    'chb22': {'age': 9,  'gender': 'F'},
    'chb23': {'age': 6,  'gender': 'F'}
}

#### Try to read annotations from chbXX-summary.txt

In [3]:

def Read_Patients_Annotations(summary_path, patient):
    seizure_annotations = {}
    start_sec = 0 
    end_sec = 0
    #Read Summary file
    try:
        if not os.path.exists(summary_path):
            raise Exception(f"Summary not found for {patient}")

        with open(summary_path, 'r') as f:
            lines = f.readlines()

        i = 0
        while i < len(lines):
            line = lines[i].strip()
            if line.startswith("File Name:"):
                file_name = line.split(":", 1)[1].strip()
                #file_name = file_name_full.replace(".edf", "")  # Remove .edf from key
                i += 1
                record_start_time = lines[i].strip().split(":", 1)[1].strip()
                i += 1
                record_end_time = lines[i].strip().split(":", 1)[1].strip()
                i += 1
                num_seizures = int(lines[i].strip().split(":", 1)[1].strip())
                seizures = []

                for _ in range(num_seizures):
                    i += 1
                    start_sec_str = lines[i].strip().split(":", 1)[1].strip()
                    start_sec = int(start_sec_str.replace(" seconds", ""))

                    i += 1
                    end_sec_str = lines[i].strip().split(":", 1)[1].strip()
                    end_sec = int(end_sec_str.replace(" seconds", ""))

                    seizures.append({"start": start_sec, "end": end_sec})

                # Save in dictionary
                seizure_annotations[file_name] = {
                    "record_start_time": record_start_time,
                    "record_end_time": record_end_time,
                    "num_seizures": num_seizures,
                    "seizures": seizures
                }
            i += 1

        return seizure_annotations
    
    except Exception as ex:
        print(f"Error reading summary file of patient{patient}: {ex}")

#summary_file = "D:\Master/Dissertation (Thesis)/Epileptic Seizure/Dataset/CHB-MIT/chb01/chb01-summary.txt"
#test = Read_Patients_Annotations(summary_file,'chb01')
#print(test)

In [4]:
# Method for load all data of selected_patients contains: edf_files, annotations, age, sex

def Load_All_Data(base_path, selected_patients):
    all_data = []
    all_edf_data = []

    # If there is no specific patints, read all patients data
    if selected_patients is None:
        selected_patients = sorted([d for d in os.listdir(base_path) if d.startswith('chb') and os.path.isdir(os.path.join(base_path, d))])

    for patient in selected_patients:
        patient_folder = os.path.join(base_path, patient)
        if not os.path.exists(patient_folder):
            print(f"Warning! Folder not found for patient {patient}")
            continue
        
        summary_file = os.path.join(patient_folder, f"{patient}-summary.txt")
        seizure_annotations = Read_Patients_Annotations(summary_file, patient)

        demographic_data = patient_info.get(patient, {'age': None, 'gender': None})

        #Load edf files and add to dictionary
        edf_files = sorted([f for f in os.listdir(patient_folder) if f.endswith('.edf')])

        for file in edf_files:
            file_path = os.path.join(patient_folder, file)
            annotations = seizure_annotations.get(file, {})    

            try:
                print(f"Loading: {file_path} ...")
                raw = mne.io.read_raw_edf(file_path, preload = True, verbose = False)

                if seizure_annotations and file in seizure_annotations:
                    all_edf_data.append({'fileName': file, 
                                         'raw': raw,
                                        'record_start_time': annotations.get("record_start_time", None),
                                        'record_end_time': annotations.get("record_end_time", None),
                                        'num_seizures': annotations.get("num_seizures", 0),
                                        'seizures':annotations.get("seizures", []) })
                else:
                    all_edf_data.append({'fileName': file, 
                                         'raw': raw})
            
                    
            except Exception as e:
                print(f"Error loading {file_path}:{e}")
            

            all_data.append({'patient': patient, 
                             'age': demographic_data['age'],
                             'gender': demographic_data['gender'],
                             'rawEEGdata': all_edf_data
                              }) 
 

    return all_data



In [14]:
# Selected patients based on paper
#selected_patients = ['chb01', 'chb02', 'chb03', 'chb05', 'chb07', 'chb08', 'chb10', 'chb11', 'chb13', 'chb14', 'chb18']
selected_patients = ['chb01']


base_path = 'D:\\Master\\Dissertation (Thesis)\\Epileptic Seizure\\Dataset\\CHB-MIT'
all_edf_data = Load_All_Data(base_path, selected_patients)

print(all_edf_data)

Loading: D:\Master\Dissertation (Thesis)\Epileptic Seizure\Dataset\CHB-MIT\chb01\chb01_01.edf ...
Loading: D:\Master\Dissertation (Thesis)\Epileptic Seizure\Dataset\CHB-MIT\chb01\chb01_02.edf ...


  raw = mne.io.read_raw_edf(file_path, preload = True, verbose = False)
  raw = mne.io.read_raw_edf(file_path, preload = True, verbose = False)


Loading: D:\Master\Dissertation (Thesis)\Epileptic Seizure\Dataset\CHB-MIT\chb01\chb01_03.edf ...
Loading: D:\Master\Dissertation (Thesis)\Epileptic Seizure\Dataset\CHB-MIT\chb01\chb01_04.edf ...


  raw = mne.io.read_raw_edf(file_path, preload = True, verbose = False)
  raw = mne.io.read_raw_edf(file_path, preload = True, verbose = False)


[{'patient': 'chb01', 'age': 11, 'gender': 'F', 'rawEEGdata': [{'fileName': 'chb01_01.edf', 'raw': <RawEDF | chb01_01.edf, 23 x 921600 (3600.0 s), ~161.7 MiB, data loaded>, 'record_start_time': '11:42:54', 'record_end_time': '12:42:54', 'num_seizures': 0, 'seizures': []}, {'fileName': 'chb01_02.edf', 'raw': <RawEDF | chb01_02.edf, 23 x 921600 (3600.0 s), ~161.7 MiB, data loaded>, 'record_start_time': '12:42:57', 'record_end_time': '13:42:57', 'num_seizures': 0, 'seizures': []}, {'fileName': 'chb01_03.edf', 'raw': <RawEDF | chb01_03.edf, 23 x 921600 (3600.0 s), ~161.7 MiB, data loaded>, 'record_start_time': '13:43:04', 'record_end_time': '14:43:04', 'num_seizures': 1, 'seizures': [{'start': 2996, 'end': 3036}]}, {'fileName': 'chb01_04.edf', 'raw': <RawEDF | chb01_04.edf, 23 x 921600 (3600.0 s), ~161.7 MiB, data loaded>, 'record_start_time': '14:43:12', 'record_end_time': '15:43:12', 'num_seizures': 1, 'seizures': [{'start': 1467, 'end': 1494}]}]}, {'patient': 'chb01', 'age': 11, 'gender

### Preprocessing raw data

In [None]:
def Preprocess_Patient_Records(patient_records):
    for file_data in patient_records['rawEEGdata']:
        raw = file_data['raw']

        #Remove 0 HZ DC component and low noise
        raw.filter(l_freq = 0.5, h_freq = None, verbose = False)

        #Remove electricity and harmonic noises:  57–63 + 117-123
        raw.notch_filter(freqs=np.arange(60, 124, 60), notch_widths=6, verbose=False)

                # ----- Z-score Normalization for each channel-----
        data = raw.get_data()
        mean = np.mean(data, axis=1, keepdims=True)
        std = np.std(data, axis=1, keepdims=True)
        normalized_data = (data - mean) / std
        raw._data = normalized_data

        file_data['raw'] = raw

    return patient_records



### Raw EEG Segmentation

In [None]:
'''
#Based on summary.txt file of this raw EEG
seizure_start = 2996
seizure_end = 3036
interictal_start = 600 # 10 min after record start
segment_duration = 10  # seconds
preictal_duration = 1800 # 30 * 60 =  30 min
sfreq = int(raw.info['sfreq'])
'''
# segment_length and overlap are based on paper
def Extract_Segments_From_Records(patient_records, segment_length=8, overlap=4):
    segments = []
    sfreq = int(patient_records['rawEEGdata'][0]['raw'].info['sfreq'])  # Assumption: All files have same frequncy sampling

    seg_samples = segment_length * sfreq
    step_samples = overlap * sfreq

    for file_data in patient_records['rawEEGdata']:
        raw = file_data['raw']
        data = raw.get_data()  # shape: (channels, samples)
        filename = file_data['fileName']
        seizures = file_data.get('seizures', [])
        total_samples = data.shape[1]
        total_duration_sec = total_samples / sfreq

        # ----- Extract preictal segments-----
        for sz in seizures:
            seizure_start = sz['start']
            pre_start = seizure_start - 2100  # 35 min before seizure
            pre_end = seizure_start - 300     # 5  min before seizure

            if pre_start < 0:
                continue  # Skip incorrect windows

            start_idx = int(pre_start * sfreq)
            end_idx = int(pre_end * sfreq)

            for i in range(start_idx, end_idx - seg_samples + 1, step_samples):
                segment = data[:, i:i + seg_samples]
                segments.append({
                    'data': segment,
                    'label': 1,
                    'patient': patient_records['patient'],
                    'filename': filename,
                    'start_time': i / sfreq,
                    'type': 'preictal'
                })

        # ----- calculate interictal durations-----
        interictal_ranges = []
        last_end = 0

        for sz in seizures:
            if sz['start'] - 14400 > last_end:
                interictal_ranges.append((last_end, sz['start'] - 14400))
            last_end = max(last_end, sz['end'] + 14400)

        if last_end < total_duration_sec:
            interictal_ranges.append((last_end, total_duration_sec))

        # ----- Extract interictal segments-----
        for irange in interictal_ranges:
            istart_idx = int(irange[0] * sfreq)
            iend_idx = int(irange[1] * sfreq)

            for i in range(istart_idx, iend_idx - seg_samples + 1, step_samples):
                segment = data[:, i:i + seg_samples]
                segments.append({
                    'data': segment,
                    'label': 0,
                    'patient': patient_records['patient'],
                    'filename': filename,
                    'start_time': i / sfreq,
                    'type': 'interictal'
                })

    return segments

In [None]:
all_segments = []
for record in all_edf_data:
    segs = Extract_Segments_From_Records(record)
    all_segments.extend(segs)

print(f"Total segments: {len(all_segments)}")


✅ Total segments: 8988


### Create Spectogram