In [3]:
# 02_preprocessing.ipynb

# Author: Eryk Urbański
# Date: April 2025
# Description: Load raw EEG data, add artificial noise.

# Import packages

In [1]:
import pickle
import numpy as np
import pandas as pd

# Load datasets

### Load EEGBCI data

In [2]:
with open("../data/eeg_datasets.pkl", "rb") as f:
    eeg_datasets_dict = pickle.load(f)
    
datasets = eeg_datasets_dict["datasets"]

eegbci_dataset = next((d for d in datasets if d["name"] == "eegbci"), None)
if eegbci_dataset:
    print("Found:", eegbci_dataset)
else:
    print("Not found.")

original_eegbci_data = eegbci_dataset["data"]
original_eegbci_times = eegbci_dataset["times"]
original_eegbci_channel_names = eegbci_dataset["ch_names"]
original_eegbci_data_df = pd.DataFrame(original_eegbci_data.T, columns=original_eegbci_channel_names)
original_eegbci_data_df.head(5)

Found: {'name': 'eegbci', 'data': array([[-5.0e-06, -1.2e-05, -7.7e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00],
       [ 2.0e-06, -2.4e-05, -7.8e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00],
       [ 3.7e-05,  1.0e-06, -5.9e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00],
       ...,
       [-4.8e-05, -4.2e-05, -4.2e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00],
       [-3.9e-05, -3.1e-05, -2.9e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00],
       [-3.9e-05, -3.4e-05, -2.7e-05, ...,  0.0e+00,  0.0e+00,  0.0e+00]]), 'times': array([0.0000000e+00, 6.2500000e-03, 1.2500000e-02, ..., 1.2498125e+02,
       1.2498750e+02, 1.2499375e+02]), 'ch_names': ['FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6', 'Fp1', 'Fpz', 'Fp2', 'AF7', 'AF3', 'AFz', 'AF4', 'AF8', 'F7', 'F5', 'F3', 'F1', 'Fz', 'F2', 'F4', 'F6', 'F8', 'FT7', 'FT8', 'T7', 'T8', 'T9', 'T10', 'TP7', 'TP8', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 'P4', 'P6', 'P8', 'PO7', 'PO3', 'POz

Unnamed: 0,FC5,FC3,FC1,FCz,FC2,FC4,FC6,C5,C3,C1,...,P8,PO7,PO3,POz,PO4,PO8,O1,Oz,O2,Iz
0,-5e-06,2e-06,3.7e-05,3.9e-05,3e-05,2.6e-05,-1.6e-05,-1.4e-05,4e-06,1.8e-05,...,-2.1e-05,-8e-06,-3.5e-05,-4.5e-05,-6.6e-05,-3.9e-05,-3.3e-05,-4.8e-05,-3.9e-05,-3.9e-05
1,-1.2e-05,-2.4e-05,1e-06,-2e-06,-1.5e-05,-2.2e-05,-5.5e-05,-3.6e-05,-2.7e-05,-2.5e-05,...,-5e-05,-4e-05,-6.8e-05,-6.5e-05,-8.4e-05,-5.2e-05,-2.1e-05,-4.2e-05,-3.1e-05,-3.4e-05
2,-7.7e-05,-7.8e-05,-5.9e-05,-6.5e-05,-6.3e-05,-5.5e-05,-6.7e-05,-8.8e-05,-7.1e-05,-6.5e-05,...,-1.7e-05,-2.2e-05,-5e-05,-3.5e-05,-4.8e-05,-1.8e-05,-2e-05,-4.2e-05,-2.9e-05,-2.7e-05
3,-6.6e-05,-6.7e-05,-5e-05,-6.5e-05,-6e-05,-5.5e-05,-6.8e-05,-6.2e-05,-5.3e-05,-5.4e-05,...,-3.9e-05,-6e-05,-7.8e-05,-6.4e-05,-6.8e-05,-4.1e-05,-4.4e-05,-6.2e-05,-3.4e-05,-4.3e-05
4,-4.5e-05,-5.5e-05,-3.3e-05,-5.3e-05,-5.4e-05,-6.3e-05,-8.3e-05,-5.2e-05,-5e-05,-5.3e-05,...,-4.4e-05,-5.5e-05,-7e-05,-5.4e-05,-6.3e-05,-3.7e-05,-6e-05,-7e-05,-3.4e-05,-4.5e-05


# Add artificial noise

In [5]:
datasets_artnoise = []

In [6]:
noise_ratio = 0.8 # to be adjusted
for dataset in datasets:
    signal_std = np.std(dataset["data"])
    noise_std = signal_std * noise_ratio

    noisy_data = dataset["data"] + np.random.normal(0.0, noise_std, size=dataset["data"].shape)
    noisy_dataset_dict = {
        "name": dataset["name"],
        "noisy_data": noisy_data
    }
    datasets_artnoise.append(noisy_dataset_dict)

# Save data to pkl

In [12]:
with open("../data/eeg_datasets.pkl", "wb") as f:
    pickle.dump({"datasets": datasets, "datasets_artnoise": datasets_artnoise}, f)