Attempt 2 Using better ICA Gpu Rendered

In [None]:
import mne
import numpy as np
from torcheeg.datasets import SEEDDataset
import os
from collections import defaultdict

# ---------------------------------------------------------------
# Load the SEED Dataset
# ---------------------------------------------------------------
dataset = SEEDDataset(
    io_path='C:/Users/tahir/Documents/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS',
    online_transform=None,
    label_transform=None,
    num_worker=6
)

# ---------------------------------------------------------------
# Directory to Save the Cleaned Data
# ---------------------------------------------------------------
save_dir = 'C:/Users/tahir/Documents/EEg-based-Emotion-Recognition/cleaned_data'
os.makedirs(save_dir, exist_ok=True)

# ---------------------------------------------------------------
# Group Samples by Record ID
# ---------------------------------------------------------------
record_groups = defaultdict(list)

for idx in range(len(dataset)):
    eeg_data, label = dataset[idx]
    record_id = label['_record_id']
    record_groups[record_id].append(eeg_data)

# ---------------------------------------------------------------
# Process Each Unique Record
# ---------------------------------------------------------------
for record_id, eeg_samples in record_groups.items():
    print(f"Processing record {record_id}...")

    # Concatenate all samples within the record along the time axis
    eeg_data = np.hstack(eeg_samples)  # Shape: (channels, combined_time_points)

    # Create MNE info object
    sfreq = 200  # Original sampling frequency (200 Hz)
    ch_names = [f'Ch{i+1}' for i in range(eeg_data.shape[0])]
    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=['eeg'] * len(ch_names))

    # Create MNE Raw object
    raw = mne.io.RawArray(eeg_data, info)

    # Assign a standard montage
    montage = mne.channels.make_standard_montage('standard_1005')
    raw.rename_channels({f'Ch{i+1}': montage.ch_names[i] for i in range(len(ch_names))})
    raw.set_montage(montage)

    # -------------------------------------------------------------------------
    # Preprocessing: Bandpass Filter (0.05–47 Hz) and Downsampling
    # -------------------------------------------------------------------------
    print(f"Applying 0.05–47 Hz bandpass filter for record {record_id}...")
    raw.filter(l_freq=0.05, h_freq=47.0, fir_design='firwin')

    # Downsample to 100 Hz to speed up ICA
    print(f"Downsampling to 100 Hz for record {record_id}...")
    raw.resample(sfreq=100)

    # -------------------------------------------------------------------------
    # Apply ICA for Artifact Removal (Infomax)
    # -------------------------------------------------------------------------
    print(f"Applying Infomax ICA for artifact removal on record {record_id}...")

    # Set ICA parameters to use Infomax with a reduced number of components
    ica = mne.preprocessing.ICA(
        n_components=len(ch_names) - 1, # Limit to 30 components for faster processing
        method='infomax',      # Use Infomax algorithm as specified in the paper
        max_iter=200,          # Default max_iter
        random_state=42
    )

    # Fit ICA to the data
    ica.fit(raw, picks='all')

    # -------------------------------------------------------------------------
    # Automatically Detect and Limit Exclusions to 2 Worst Components
    # -------------------------------------------------------------------------
    # Define virtual EOG channels (frontal electrodes)
    eog_virtual_channels = ['Fp1', 'Fp2']

    try:
        # Detect EOG artifacts using virtual EOG channels
        eog_indices, eog_scores = ica.find_bads_eog(raw, ch_name=eog_virtual_channels)
        print(f"EOG components identified: {eog_indices}")
    except RuntimeError:
        print("No suitable EOG channels found. Skipping EOG artifact detection.")
        eog_indices, eog_scores = [], []

    # Detect muscle artifacts
    muscle_indices, muscle_scores = ica.find_bads_muscle(raw)
    print(f"Muscle artifact components identified: {muscle_indices}")

    # Combine indices and scores
    all_indices = eog_indices + muscle_indices
    all_scores = eog_scores + muscle_scores

    # Limit to the 2 worst components (highest scores)
    if len(all_indices) > 2:
        worst_indices = [idx for _, idx in sorted(zip(all_scores, all_indices), key=lambda x: x[0], reverse=True)[:2]]
    else:
        worst_indices = all_indices

    # Set components to exclude
    ica.exclude = worst_indices
    print(f"Excluded components for record {record_id}: {ica.exclude}")

    # Apply ICA to remove the selected components
    ica.apply(raw)

    # -------------------------------------------------------------------------
    # Save the Cleaned Data
    # -------------------------------------------------------------------------
    save_path = os.path.join(save_dir, f'cleaned_{record_id}.fif')
    raw.save(save_path, overwrite=True)
    print(f"Processed and saved record {record_id} to {save_path}")

print("All records have been processed and saved successfully.")


[2024-12-09 16:38:00] INFO (torcheeg/MainThread) 🔍 | Detected cached processing results, reading cache from C:/Users/tahir/Documents/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS.


Processing record _record_0...
Applying GPU-accelerated 0.05–47 Hz bandpass filter for record _record_0...
Applying Infomax ICA for artifact removal on record _record_0...


RuntimeError: No EOG channel(s) found

In [5]:
import mne
import numpy as np
from torcheeg.datasets import SEEDDataset
import os

# ---------------------------------------------------------------
# Load the SEED Dataset
# ---------------------------------------------------------------
dataset = SEEDDataset(
    io_path='C:/Users/tahir/Documents/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS',
    online_transform=None,
    label_transform=None,
    num_worker=6
)

# Load a single sample to test
eeg_data, label = dataset[0]

# Create MNE info object
sfreq = 200  # Sampling frequency (200 Hz)
ch_names = [f'Ch{i+1}' for i in range(eeg_data.shape[0])]
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=['eeg'] * len(ch_names))

# Create MNE Raw object
raw = mne.io.RawArray(eeg_data, info)

# Assign a standard montage
montage = mne.channels.make_standard_montage('standard_1005')
raw.rename_channels({f'Ch{i+1}': montage.ch_names[i] for i in range(len(ch_names))})
raw.set_montage(montage)

# Apply bandpass filtering (0.05–47 Hz)
raw.filter(l_freq=0.05, h_freq=47.0, fir_design='firwin')

# Create ICA object with fewer components and fit on a small subset of data
ica = mne.preprocessing.ICA(
    n_components=20,       # Use fewer components to speed up the process
    method='infomax',
    max_iter=100,          # Reduce iterations for quicker fitting
    random_state=42
)

# Fit ICA on the available duration of data
raw_subset = raw.copy().crop(tmin=0, tmax=raw.times[-1])
ica.fit(raw_subset, picks='all')

# Check virtual EOG channels
eog_virtual_channels = ['Fp1', 'Fp2']  # Adjust based on your montage

try:
    # Attempt to find EOG artifacts
    eog_indices, eog_scores = ica.find_bads_eog(raw_subset, ch_name=eog_virtual_channels)
    print(f"EOG components identified: {eog_indices}")
except RuntimeError as e:
    print(f"Error: {e}")


[2024-12-09 17:02:39] INFO (torcheeg/MainThread) 🔍 | Detected cached processing results, reading cache from C:/Users/tahir/Documents/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS.


EOG components identified: [0]
