In [21]:
# 02_preprocessing.ipynb

# Author: Eryk Urbański
# Date: April 2025
# Description: Load raw EEG data, perform filtering, add artificial noise.

# Import packages

In [22]:
import pickle
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from mne import make_ad_hoc_cov, set_eeg_reference
from mne.simulation import add_noise

# Load datasets

### Load EEGBCI data

In [23]:
data_path = 'C:/Users/blaze/Desktop/opsi_eeg_denoising/eeg-denoising/data'
with open(data_path + "/eeg_dataset_raw.pkl", "rb") as f:
    eeg_datasets_dict = pickle.load(f)
    
eeg_datasets = eeg_datasets_dict["datasets_mne_format"]

# original_bci = eeg_datasets_dict["datasets_mne_format"][0]
# original_bci_channel_names = original_bci.ch_names
# original_bci_data, original_bci_times = original_bci[:]
# original_bci_data_df = pd.DataFrame(original_bci_data.T, columns=original_bci_channel_names)
# original_bci_data_df.head(5)

# Scale Signal

In [24]:
scaler = StandardScaler()
data, times = eeg_datasets[0][:]
data.shape
scaled_eeg_data = scaler.fit_transform(data.T).T


channel_names = eeg_datasets[0].ch_names
eeg_datasets = {
    "channel_names": channel_names,
    "original_scaled_eeg": scaled_eeg_data
}

# Add artificial noise

In [25]:
def add_gaussian_noise(signal, noise_std=0.5):
    noise = np.random.normal(0, noise_std, size=signal.shape)
    return signal + noise

In [26]:
artificial_noise_signals = []

for signal in scaled_eeg_data:
    noisy_signal_scaled = add_gaussian_noise(signal, noise_std=0.5)
    artificial_noise_signals.append(noisy_signal_scaled)

datasets_artificial_noise = {
    "channel_names": channel_names,
    "noisy_scaled_eeg": artificial_noise_signals
}

In [27]:
# noisy_data, _ = noisy[:]
# noisy_data.shape
# noisy_data_df = pd.DataFrame(noisy_data.T, columns=noisy.ch_names)
# noisy_data_df.head(5)

In [28]:
# Second approach
# sigma = 0.05 # Noise variance
# noisy_signal = signal + sigma * np.random.randn(signal.size)
# or
# noisy_data = data + np.random.normal(0, 0.1, size=data.shape)

# Save data to pkl

In [29]:
with open(data_path + "/eeg_dataset_raw.pkl", "wb") as f:
    pickle.dump({"datasets_mne_format": eeg_datasets, "datasets_artnoise_mne_format": datasets_artificial_noise}, f)