In [13]:
# 02_preprocessing.ipynb

# Author: Eryk Urbański
# Date: April 2025
# Description: Load raw EEG data, perform filtering, add artificial noise.

# Import packages

In [14]:
import pickle
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from mne import make_ad_hoc_cov, set_eeg_reference
from mne.simulation import add_noise
import scipy.signal as signal_lib

# Load datasets

### Load EEGBCI data

In [15]:
data_path = 'C:/Users/blaze/Desktop/opsi_eeg_denoising/eeg-denoising/data'
with open(data_path + "/eeg_dataset_raw.pkl", "rb") as f:
    eeg_datasets_dict = pickle.load(f)
    
eeg_datasets = eeg_datasets_dict["datasets_mne_format"]

noise = 'pink' # ['burst,'pink','gaussian'] ->> NOISE TYPE

# Scale Signal

In [16]:
scaler = StandardScaler()
data, times = eeg_datasets[0][:]
data.shape
scaled_eeg_data = scaler.fit_transform(data.T).T


channel_names = eeg_datasets[0].ch_names
eeg_datasets = {
    "channel_names": channel_names,
    "original_scaled_eeg": scaled_eeg_data
}

# Add artificial noise

In [17]:
def add_gaussian_noise(signal, noise_std=0.5):
    noise = np.random.normal(0, noise_std, size=signal.shape)
    return signal + noise

def add_emg_bursts(signal, noise_std=0.5, burst_duration=0.1, fs=256):
    burst_len = int(burst_duration * fs)
    emg_noise = np.zeros_like(signal)

    for _ in range(5): 
        start = np.random.randint(0, len(signal) - burst_len)
        burst = np.random.normal(0, noise_std, burst_len)
        burst *= np.sin(2 * np.pi * np.random.uniform(20, 60) * np.arange(burst_len) / fs)
        emg_noise[start:start+burst_len] += burst

    return signal + emg_noise

def add_pink_noise(signal, noise_std=0.5):

    white = np.random.normal(0, 1, len(signal))
    b, a = signal_lib.butter(1, 0.01, btype='low')
    pink = signal_lib.lfilter(b, a, white)
    pink = pink / np.std(pink) * noise_std
    return signal + pink

In [18]:
artificial_noise_signals = []

for signal in scaled_eeg_data:
    match noise:
        case 'burst':
            noisy_signal_scaled = add_emg_bursts(signal)
        case 'gaussian':
            noisy_signal_scaled = add_gaussian_noise(signal, noise_std=0.5)
        case 'pink':
            noisy_signal_scaled = add_pink_noise(signal, noise_std=0.5)
    artificial_noise_signals.append(noisy_signal_scaled)

datasets_artificial_noise = {
    "channel_names": channel_names,
    "noisy_scaled_eeg": artificial_noise_signals
}

In [19]:
# noisy_data, _ = noisy[:]
# noisy_data.shape
# noisy_data_df = pd.DataFrame(noisy_data.T, columns=noisy.ch_names)
# noisy_data_df.head(5)

In [20]:
# Second approach
# sigma = 0.05 # Noise variance
# noisy_signal = signal + sigma * np.random.randn(signal.size)
# or
# noisy_data = data + np.random.normal(0, 0.1, size=data.shape)

# Save data to pkl

In [21]:
with open(data_path + "/eeg_dataset_raw.pkl", "wb") as f:
    pickle.dump({"datasets_mne_format": eeg_datasets, "datasets_artnoise_mne_format": datasets_artificial_noise}, f)