# MHyEEG: DEAP DATASET

### 0: IMPORTING DEAP DATASET AND UNZIPPING

In [3]:
!pip install mne



In [4]:
from google.colab import drive

# Mount the Google Drive
drive.mount('/content/drive')

# Verify the drive is mounted
!ls /content/drive/My\ Drive


Mounted at /content/drive
'ACSAI_Marks&Recommendations.pdf'  'MasterThesis_Proposal_(A.Borgi).gslides'
 Alessio_Borgi_CV.pdf		    nano-imagenet-30-Original
 Alessio_Borgi_CV_Short.pdf	    NN
'Colab Notebooks'		    open_window.py
 data				    Open_Window.traj
 DEAP.zip			    problem.txt
 HW2				    sapienza-ppt-template_dark.gslides
 identita.pdf			    StyleAligned
 Iscrizione.pdf			    StyleAligned_M2L.gslides
 KAN-GAT_EAI.gslides		   'Tony Starks Concentration Mix.mp3'
 Linz				    XGNNs_RL.gslides


In [5]:
import zipfile
import os

def unzip_file(zip_path, extract_to):
    """
    Unzips a file to a specified directory.

    Args:
    - zip_path (str): The path to the zip file.
    - extract_to (str): The directory where files should be extracted.
    """
    # Ensure the extraction directory exists
    os.makedirs(extract_to, exist_ok=True)

    # Open and extract the zip file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
        print(f"Extracted all files to: {extract_to}")

# Example usage
zip_path = "/content/drive/MyDrive/DEAP.zip"  # Replace with your zip file path
extract_to = "/content/MHyEEG/data/"  # Replace with your target extraction folder
unzip_file(zip_path, extract_to)


Extracted all files to: /content/MHyEEG/data/


In [6]:
import os
import zipfile

def unzip_all_in_directory(directory):
    """
    Unzips all zip files in the given directory and its subdirectories.

    Args:
    - directory (str): The directory to search for zip files.
    """
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.zip'):
                zip_path = os.path.join(root, file)
                extract_to = os.path.join(root, file.replace('.zip', ''))
                os.makedirs(extract_to, exist_ok=True)
                print(f"Unzipping: {zip_path} to {extract_to}")
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extractall(extract_to)
                print(f"Extracted: {zip_path} to {extract_to}")

# Directory containing the DEAP dataset
deap_data_dir = "/content/MHyEEG/data/DEAP"
unzip_all_in_directory(deap_data_dir)


Unzipping: /content/MHyEEG/data/DEAP/metadata_csv.zip to /content/MHyEEG/data/DEAP/metadata_csv
Extracted: /content/MHyEEG/data/DEAP/metadata_csv.zip to /content/MHyEEG/data/DEAP/metadata_csv
Unzipping: /content/MHyEEG/data/DEAP/face_video.zip to /content/MHyEEG/data/DEAP/face_video
Extracted: /content/MHyEEG/data/DEAP/face_video.zip to /content/MHyEEG/data/DEAP/face_video
Unzipping: /content/MHyEEG/data/DEAP/data_original.zip to /content/MHyEEG/data/DEAP/data_original
Extracted: /content/MHyEEG/data/DEAP/data_original.zip to /content/MHyEEG/data/DEAP/data_original
Unzipping: /content/MHyEEG/data/DEAP/data_preprocessed_python.zip to /content/MHyEEG/data/DEAP/data_preprocessed_python
Extracted: /content/MHyEEG/data/DEAP/data_preprocessed_python.zip to /content/MHyEEG/data/DEAP/data_preprocessed_python


### 1: DEAP PRE-PROCESSING

Before using the Dataset, we would like to follow some **pre-processing** steps for all the modalities present in it.

#### 1.1: DEAP (EEG) PRE-PROCESSING

Before we can use the dataset, we need to apply some **preprocessing steps** to the **DEAP (EEG)** dataset. In summary, we will have:
1. **Channel Selection**
2. **Band-Pass Filtering**
3. **Notch Filtering**
4. **Downsampling**
5. **Referencing**
6. **Baseline Correction**
7. **Artifact Removal (ICA)**

##### 1. **Channel Selection**
   - From the 32 EEG electrodes, select the **10 channels most related to emotion recognition**:
     - `F3`, `F4`, `F5`, `F6`, `F7`, `F8`, `T7`, `T8`, `P7`, `P8`.
   - This reduces the dimensionality and focuses on the most relevant channels for emotion analysis.
   - (Note that here we have not electrodes F5 and F6).


##### 2. **Band-Pass Filtering**
   - Apply a **Band-Pass Filter** to retain only frequencies in the range of 1â€“45 Hz:
     \begin{equation}
     x_{filtered}(t) = {F}^{-1}\left({F}(x(t)) \cdot H(f)\right)
     \end{equation}
     where:
     - $F$ is the Fourier Transform,
     - H(f) is a filter function with:
       \begin{equation}
       H(f) =
       \begin{cases}
       1, & 1 \leq f \leq 45 \text{ Hz} \\
       0, & \text{otherwise}
       \end{cases}
       \end{equation}


##### 3. **Notch Filtering**
   - Apply a **Notch Filter** at 50 Hz to remove power line noise:
     \begin{equation}
     x_{notch}(t) = {F}^{-1}\left({F}(x_{filtered}(t)) \cdot (1 - H_{notch}(f))\right)
     \end{equation}
     where where H_{notch}(f) is a narrow bandpass filter centered at 50 Hz.



##### 4. **Downsampling**
   - Downsample the data to 128 Hz to reduce computational complexity:
     \begin{equation}
     x_{downsampled}(t) = x_{notch}(t) \quad \text{(sampled at 128 Hz)}.
     \end{equation}


##### 5. **Referencing**
   - Reference the EEG data to the average of all selected channels:
     \begin{equation}
     x_{referenced}(t, c) = x_{downsampled}(t, c) - \frac{1}{C} \sum_{i=1}^{C} x_{downsampled}(t, i)
     \end{equation}
     where C is the total number of selected channels.



##### 6. **Baseline Correction**
   - Correct each trial relative to the mean value of the preceding 200 ms (baseline period):
     \begin{equation}
     x_{corrected}(t) = x_{referenced}(t) - \frac{1}{200 \, \text{ms}} \int_{t-200}^{t} x_{referenced}(s) \, ds
     \end{equation}
     This removes slow signal drifts and aligns data to the baseline.


##### 7. **Artifact Removal (ICA)**

**What is ICA?**
   - **Independent Component Analysis (ICA)** is a statistical technique used to separate multivariate signals into independent non-Gaussian components.
   - In EEG, ICA is widely used to isolate and remove artifacts like:
     - **Eye blinks**: Strong, low-frequency signals recorded from frontal electrodes.
     - **Muscle movements**: High-frequency noise due to muscle tension or movement.
     - **Electrocardiographic (ECG) signals**: Periodic artifacts caused by heartbeats.

**Steps in ICA:**
   1. **Decomposition**:
      - EEG data X is decomposed into a mixing matrix A and independent source signals S:
        \begin{equation}
        X = A \cdot S
        \end{equation}
        where:
        - X: Original EEG signal (observed signals),
        - A: Mixing matrix (weights),
        - S: Independent source signals (latent components).
   2. **Identification of Artifacts**:
      - Each component in S is inspected (either visually or automatically) for characteristics of artifacts:
        - Eye blinks show large peaks in frontal electrodes.
        - Muscle movements are high-frequency signals across multiple channels.
   3. **Reconstruction of Clean EEG**:
      - The artifact components are set to zero, and the clean EEG signal is reconstructed:
        \begin{equation}
        X_{clean} = A \cdot S_{clean}
        \end{equation}
        where S_{clean} is the source signal with artifact components removed.

**Why Use ICA?**
   - Artifacts overlap with brain signals in time and frequency domains, making simple filtering ineffective.
   - ICA allows selective removal of artifact sources while preserving neural activity.
   - This enhances data quality and improves the performance of downstream machine learning models.


##### Final Output:
   - Save the preprocessed EEG data for each subject in `.fif` format.


In [7]:
import os
import mne
from mne.preprocessing import ICA

def load_raw_data(data_dir, subject_id):
    """
    Load raw DEAP EEG data from the unzipped directory structure.
    Args:
        data_dir (str): Path to the `data_original` directory.
        subject_id (int): Subject ID (1-32).
    Returns:
        raw (mne.io.Raw): Raw EEG data.
    """
    file_path = os.path.join(data_dir, f"s{str(subject_id).zfill(2)}.bdf")
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    print(f"Loading data for Subject {subject_id} from {file_path}")
    raw = mne.io.read_raw_bdf(file_path, preload=True)
    return raw

def preprocess_raw_data(raw):
    """
    Preprocess raw EEG data: select channels, set montage, filter, resample, reference, and apply baseline correction.
    Args:
        raw (mne.io.Raw): Raw EEG data.
    Returns:
        preprocessed_raw (mne.io.Raw): Preprocessed EEG data.
    """
    # Updated list of relevant channels based on available data
    relevant_channels = ['F3', 'F4', 'F7', 'F8', 'T7', 'T8', 'P7', 'P8']

    # Pick relevant channels
    raw.pick_channels(relevant_channels, ordered=False)

    # Band-pass filter (1-45 Hz)
    raw.filter(l_freq=1.0, h_freq=45.0)

    # Notch filter at 50 Hz to remove line noise
    raw.notch_filter(freqs=50.0)

    # Downsample to 128 Hz
    raw.resample(128)

    # Reference to the average
    raw.set_eeg_reference("average", projection=True)

    # Baseline correction (adjust relative to mean value of preceding 200 ms)
    raw.apply_function(lambda x: x - x.mean(axis=-1, keepdims=True), picks="eeg")

    return raw


def save_preprocessed_data(raw, output_dir, subject_id):
    """
    Save preprocessed EEG data to FIF file format.
    Args:
        raw (mne.io.Raw): Preprocessed EEG data.
        output_dir (str): Directory to save the preprocessed data.
        subject_id (int): Subject ID (1-32).
    """
    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, f"preprocessed_s{str(subject_id).zfill(2)}.raw.fif")
    raw.save(file_path, overwrite=True)
    print(f"Saved preprocessed data for Subject {subject_id} to {file_path}")

if __name__ == "__main__":
    # Define paths
    data_dir = "/content/MHyEEG/data/DEAP/data_original"
    output_dir = "/content/MHyEEG/data/DEAP/data_preprocessed/EEG"

    # Process each subject
    for subject_id in range(1, 33):
        print(f"Processing Subject {subject_id}...")
        try:
            raw = load_raw_data(data_dir, subject_id)
            preprocessed_raw = preprocess_raw_data(raw)
            save_preprocessed_data(preprocessed_raw, output_dir, subject_id)
        except Exception as e:
            print(f"An error occurred for Subject {subject_id}: {e}")


Processing Subject 1...
Loading data for Subject 1 from /content/MHyEEG/data/DEAP/data_original/s01.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s01.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1980927  =      0.000 ...  3868.998 secs...
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contigu

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (3.303 s)

Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower trans

In [8]:
# Used to check the names of the channels.
print(raw.info['ch_names'])


['F3', 'F7', 'T7', 'P7', 'F4', 'F8', 'T8', 'P8']


#### 1:2 DEAP(ECG) PREPROCESSING

1. **Channel Selection**  
   - Extract only the channels corresponding to ECG signals from the raw data.

2. **Band-Pass Filtering**  
   - Apply a band-pass filter to retain frequencies in the range [0.5 Hz, 45 Hz].
   - Formula for filtering:  
     \begin{equation}
     y(t) = \int_{0.5}^{45} x(t) H(f) \, df
     \end{equation}  
     where H(f) is the filter transfer function.

3. **Notch Filtering**  
   - Remove 50 Hz powerline noise using a notch filter.  
     \begin{equation}
     H_{\text{notch}}(f) = 1 - \frac{f^2}{(f^2 + Qf_0f + f_0^2)}
     \end{equation}  
     where \( f_0 = 50 \, \text{Hz} \) is the notch frequency and \( Q \) is the quality factor.

4. **Downsampling**  
   - Reduce the sampling rate to 128 Hz for computational efficiency.  
     \begin{equation}
     x_{\text{downsampled}}(t) = x(t) \text{ at intervals of } \frac{1}{128} \, \text{seconds}.
     \end{equation}

5. **Baseline Correction**  
   - Adjust the signal relative to the mean value of the 200 ms period preceding each trial:  
     \begin{equation}
     x_{\text{corrected}}(t) = x(t) - \mu_{\text{baseline}}, \quad \mu_{\text{baseline}} = \frac{1}{N} \sum_{t=-200\,\text{ms}}^{0} x(t)
     \end{equation}

6. **Saving Preprocessed Data**  
   - Save the preprocessed ECG data as a NumPy array.


In [11]:
print(raw.info['ch_names'])


['EXG5', 'EXG6']


In [12]:
def preprocess_ecg(raw):
    """
    Preprocess ECG data.
    Steps:
    - Select appropriate ECG channels (EXG5, EXG6).
    - Set channel type to 'misc' for ECG channels.
    - Band-pass filter (0.5-45 Hz).
    - Notch filter at 50 Hz.
    - Downsample to 128 Hz.
    - Baseline correction (200 ms before each trial).
    Args:
        raw (mne.io.Raw): Raw data object.
    Returns:
        np.ndarray: Preprocessed ECG data.
    """
    # Select ECG channels
    ecg_channels = ['EXG5', 'EXG6']  # Confirmed ECG channels
    raw.pick_channels(ecg_channels)

    # Set ECG channels as 'misc' type
    raw.set_channel_types({ch: 'misc' for ch in ecg_channels})

    # Band-pass filter (0.5-45 Hz)
    raw.filter(l_freq=0.5, h_freq=45.0)

    # Notch filter at 50 Hz
    raw.notch_filter(freqs=50.0)

    # Downsample to 128 Hz
    raw.resample(128)

    # Baseline correction
    raw.apply_function(lambda x: x - x.mean(axis=-1, keepdims=True), picks="misc")

    return raw.get_data()

def save_preprocessed_ecg(data, output_dir, subject_id):
    """
    Save preprocessed ECG data as NumPy array.
    Args:
        data (np.ndarray): Preprocessed ECG data.
        output_dir (str): Directory to save processed data.
        subject_id (int): Subject ID (1-32).
    """
    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, f"subject_{subject_id:02d}_ecg.npy")
    np.save(file_path, data)
    print(f"Saved preprocessed ECG data for Subject {subject_id} to {file_path}")

if __name__ == "__main__":
    data_dir = "/content/MHyEEG/data/DEAP/data_original"
    output_dir = "/content/MHyEEG/data/DEAP/preprocessed/ECG"

    for subject_id in range(1, 33):
        print(f"Processing ECG for Subject {subject_id}...")
        try:
            raw = load_raw_data(data_dir, subject_id)
            preprocessed_ecg = preprocess_ecg(raw)
            save_preprocessed_ecg(preprocessed_ecg, output_dir, subject_id)
        except Exception as e:
            print(f"Error processing ECG for Subject {subject_id}: {e}")


Processing ECG for Subject 1...
Loading data for Subject 1 from /content/MHyEEG/data/DEAP/data_original/s01.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s01.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1980927  =      0.000 ...  3868.998 secs...
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 1: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 2...
Loading data for Subject 2 from /content/MHyEEG/data/DEAP/data_original/s02.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s02.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1895935  =      0.000 ...  3702.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 2: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 3...
Loading data for Subject 3 from /content/MHyEEG/data/DEAP/data_original/s03.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s03.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1989119  =      0.000 ...  3884.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 3: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 4...
Loading data for Subject 4 from /content/MHyEEG/data/DEAP/data_original/s04.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s04.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1690111  =      0.000 ...  3300.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 4: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 5...
Loading data for Subject 5 from /content/MHyEEG/data/DEAP/data_original/s05.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s05.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2004991  =      0.000 ...  3915.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 5: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 6...
Loading data for Subject 6 from /content/MHyEEG/data/DEAP/data_original/s06.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s06.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1836031  =      0.000 ...  3585.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 6: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 7...
Loading data for Subject 7 from /content/MHyEEG/data/DEAP/data_original/s07.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s07.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1828351  =      0.000 ...  3570.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 7: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 8...
Loading data for Subject 8 from /content/MHyEEG/data/DEAP/data_original/s08.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s08.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1800703  =      0.000 ...  3516.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 8: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 9...
Loading data for Subject 9 from /content/MHyEEG/data/DEAP/data_original/s09.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s09.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1928703  =      0.000 ...  3766.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 9: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 10...
Loading data for Subject 10 from /content/MHyEEG/data/DEAP/data_original/s10.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s10.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1798143  =      0.000 ...  3511.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 10: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 11...
Loading data for Subject 11 from /content/MHyEEG/data/DEAP/data_original/s11.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s11.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2130431  =      0.000 ...  4160.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 11: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 12...
Loading data for Subject 12 from /content/MHyEEG/data/DEAP/data_original/s12.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s12.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1790975  =      0.000 ...  3497.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 12: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 13...
Loading data for Subject 13 from /content/MHyEEG/data/DEAP/data_original/s13.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s13.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1900031  =      0.000 ...  3710.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 13: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 14...
Loading data for Subject 14 from /content/MHyEEG/data/DEAP/data_original/s14.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s14.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2015743  =      0.000 ...  3936.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 14: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 15...
Loading data for Subject 15 from /content/MHyEEG/data/DEAP/data_original/s15.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s15.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2084863  =      0.000 ...  4071.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 15: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 16...
Loading data for Subject 16 from /content/MHyEEG/data/DEAP/data_original/s16.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s16.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1885695  =      0.000 ...  3682.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 16: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 17...
Loading data for Subject 17 from /content/MHyEEG/data/DEAP/data_original/s17.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s17.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1764351  =      0.000 ...  3445.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 17: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 18...
Loading data for Subject 18 from /content/MHyEEG/data/DEAP/data_original/s18.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s18.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1857535  =      0.000 ...  3627.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 18: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 19...
Loading data for Subject 19 from /content/MHyEEG/data/DEAP/data_original/s19.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s19.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1917951  =      0.000 ...  3745.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 19: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 20...
Loading data for Subject 20 from /content/MHyEEG/data/DEAP/data_original/s20.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s20.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1796607  =      0.000 ...  3508.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 20: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 21...
Loading data for Subject 21 from /content/MHyEEG/data/DEAP/data_original/s21.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s21.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1818111  =      0.000 ...  3550.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 21: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 22...
Loading data for Subject 22 from /content/MHyEEG/data/DEAP/data_original/s22.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s22.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1904127  =      0.000 ...  3718.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 22: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 23...
Loading data for Subject 23 from /content/MHyEEG/data/DEAP/data_original/s23.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s23.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1843711  =      0.000 ...  3600.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 23: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 24...
Loading data for Subject 24 from /content/MHyEEG/data/DEAP/data_original/s24.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s24.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2412543  =      0.000 ...  4711.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 24: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 25...
Loading data for Subject 25 from /content/MHyEEG/data/DEAP/data_original/s25.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s25.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2109439  =      0.000 ...  4119.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 25: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 26...
Loading data for Subject 26 from /content/MHyEEG/data/DEAP/data_original/s26.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s26.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2003455  =      0.000 ...  3912.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 26: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 27...
Loading data for Subject 27 from /content/MHyEEG/data/DEAP/data_original/s27.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s27.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1944575  =      0.000 ...  3797.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 27: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 28...
Loading data for Subject 28 from /content/MHyEEG/data/DEAP/data_original/s28.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s28.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1805823  =      0.000 ...  3526.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 28: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 29...
Loading data for Subject 29 from /content/MHyEEG/data/DEAP/data_original/s29.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s29.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2036735  =      0.000 ...  3977.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 29: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 30...
Loading data for Subject 30 from /content/MHyEEG/data/DEAP/data_original/s30.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s30.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1876479  =      0.000 ...  3664.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 30: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 31...
Loading data for Subject 31 from /content/MHyEEG/data/DEAP/data_original/s31.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s31.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1892351  =      0.000 ...  3695.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 31: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly
Processing ECG for Subject 32...
Loading data for Subject 32 from /content/MHyEEG/data/DEAP/data_original/s32.bdf
Extracting EDF parameters from /content/MHyEEG/data/DEAP/data_original/s32.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1804287  =      0.000 ...  3523.998 secs...


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)
  raw = mne.io.read_raw_bdf(file_path, preload=True)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Error processing ECG for Subject 32: picks (NoneNone, treated as "data_or_ica") yielded no channels, consider passing picks explicitly


  raw.set_channel_types({ch: 'misc' for ch in ecg_channels})


#### 1.3: DEAP(GSR) PRE-PROCESSING

In [None]:
def preprocess_gsr(raw):
    """
    Preprocess GSR data.
    Steps:
    - Low-pass filter at 60 Hz.
    - Notch filter at 50 Hz.
    - Downsample to 128 Hz.
    - Baseline correction (200 ms before each trial).
    Args:
        raw (mne.io.Raw): Raw data object.
    Returns:
        np.ndarray: Preprocessed GSR data.
    """
    # Select GSR channels
    gsr_channels = [ch for ch in raw.info['ch_names'] if 'GSR' in ch]
    raw.pick_channels(gsr_channels)

    # Low-pass filter at 60 Hz
    raw.filter(l_freq=None, h_freq=60.0)

    # Notch filter at 50 Hz
    raw.notch_filter(freqs=50.0)

    # Downsample to 128 Hz
    raw.resample(128)

    # Baseline correction (adjust to the mean of the preceding 200 ms)
    raw.apply_function(lambda x: x - x.mean(axis=-1, keepdims=True), picks="misc")

    return raw.get_data()

def save_preprocessed_gsr(data, output_dir, subject_id):
    """
    Save preprocessed GSR data as NumPy array.
    Args:
        data (np.ndarray): Preprocessed GSR data.
        output_dir (str): Directory to save processed data.
        subject_id (int): Subject ID (1-32).
    """
    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, f"subject_{subject_id:02d}_gsr.npy")
    np.save(file_path, data)
    print(f"Saved preprocessed GSR data for Subject {subject_id} to {file_path}")

if __name__ == "__main__":
    data_dir = "/content/MHyEEG/data/DEAP/data_original"
    output_dir = "/content/MHyEEG/data/DEAP/data_preprocessed/GSR"

    for subject_id in range(1, 33):
        print(f"Processing GSR for Subject {subject_id}...")
        try:
            raw = load_raw_data(data_dir, subject_id)
            preprocessed_gsr = preprocess_gsr(raw)
            save_preprocessed_gsr(preprocessed_gsr, output_dir, subject_id)
        except Exception as e:
            print(f"Error processing GSR for Subject {subject_id}: {e}")


#### 1.4: DEAP(EOG) PRE-PROCESSING

In [None]:
def preprocess_eog(raw):
    """
    Preprocess EOG (Eye Movement) data.
    Steps:
    - Average the left and right eye measurements.
    - Retain -1 values (indicating blinks or rapid movements).
    - Downsample to 128 Hz.
    Args:
        raw (mne.io.Raw): Raw data object.
    Returns:
        np.ndarray: Preprocessed EOG data.
    """
    # Select EOG channels (assuming channel names include 'EXG')
    eog_channels = ['EXG1', 'EXG2']  # Adjust these based on your dataset's naming
    raw.pick_channels(eog_channels)

    # Average the left (EXG1) and right (EXG2) eye channels
    eog_data = raw.get_data()
    averaged_eog = eog_data.mean(axis=0, keepdims=True)

    # Downsample to 128 Hz
    raw.resample(128)

    # Retain -1 values to indicate blinks or rapid movements
    averaged_eog[averaged_eog == -1] = -1

    return averaged_eog

def save_preprocessed_eog(data, output_dir, subject_id):
    """
    Save preprocessed EOG data as NumPy array.
    Args:
        data (np.ndarray): Preprocessed EOG data.
        output_dir (str): Directory to save processed data.
        subject_id (int): Subject ID (1-32).
    """
    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, f"subject_{subject_id:02d}_eog.npy")
    np.save(file_path, data)
    print(f"Saved preprocessed EOG data for Subject {subject_id} to {file_path}")

if __name__ == "__main__":
    data_dir = "/content/MHyEEG/data/DEAP/data_original"
    output_dir = "/content/MHyEEG/data/DEAP/preprocessed_eog"

    for subject_id in range(1, 33):
        print(f"Processing EOG for Subject {subject_id}...")
        try:
            raw = load_raw_data(data_dir, subject_id)
            preprocessed_eog = preprocess_eog(raw)
            save_preprocessed_eog(preprocessed_eog, output_dir, subject_id)
        except Exception as e:
            print(f"Error processing EOG for Subject {subject_id}: {e}")
