In [1]:
cd /home/dubadr/projects/def-gdumas85/dubadr/deephy/libs/OmnEEG

/lustre06/project/6057506/dubadr/deephy/libs/OmnEEG


In [2]:
#!/usr/bin/env python3
"""
EEG Processing Pipeline with Omneeg Integration

This script can run omneeg on either raw data or preprocessed data.
If preprocessing is requested, it will first run the preprocessing script
and then apply omneeg transformation.

Usage:
    python eeg_pipeline.py database input_folder participant output_folder [options]

Examples:
    # Extract HBN data from tar.gz, then run omneeg on raw data
    python eeg_pipeline.py HBN /data/hbn/tar_files participant_001 /output /path/to/demo.csv --mode raw --extract_data
    
    # Run full pipeline: extract -> preprocess -> omneeg
    python eeg_pipeline.py HBN /data/hbn/tar_files participant_001 /output /path/to/demo.csv --mode preprocess --extract_data
    
    # Use already extracted/preprocessed data
    python eeg_pipeline.py HBN /data/preprocessed participant_001 /output /path/to/demo.csv --mode preprocessed
    
    # Custom task name for extraction
    python eeg_pipeline.py HBN /data/hbn/tar_files participant_001 /output /path/to/demo.csv --mode raw --extract_data --task MovieWatching
    
    # Use reconstruction mode (looks for labels.fif instead of RestingState_epo.fif)
    python eeg_pipeline.py HBN /data/preprocessed participant_001 /output /path/to/demo.csv --mode preprocessed --reconstruction
"""

import sys
import os
import argparse
import subprocess
import logging
from pathlib import Path
import tarfile
import numpy as np
import pandas as pd
import mne
from mne.io.pick import pick_info

chan_names_tobeused = ['E1', 'E2', 'E3', 'E4', 'E5', 'E6',
                        'E7', 'E8', 'E9', 'E10', 'E11', 'E12', 'E13', 'E14', 'E15', 'E16',
                        'E17', 'E18', 'E19', 'E20', 'E21', 'E22', 'E23', 'E24', 'E25',
                        'E26', 'E27', 'E28', 'E29', 'E30', 'E31', 'E32', 'E33', 'E34',
                        'E35', 'E36', 'E37', 'E38', 'E39', 'E40', 'E41', 'E42', 'E43',
                        'E44', 'E45', 'E46', 'E47', 'E48', 'E49', 'E50', 'E51', 'E52',
                        'E53', 'E54', 'E55', 'E56', 'E57', 'E58', 'E59', 'E60', 'E61',
                        'E62', 'E63', 'E64', 'E65', 'E66', 'E67', 'E68', 'E69', 'E70',
                        'E71', 'E72', 'E73', 'E74', 'E75', 'E76', 'E77', 'E78', 'E79',
                        'E80', 'E81', 'E82', 'E83', 'E84', 'E85', 'E86', 'E87', 'E88',
                        'E89', 'E90', 'E91', 'E92', 'E93', 'E94', 'E95', 'E96', 'E97',
                        'E98', 'E99', 'E100', 'E101', 'E102', 'E103', 'E104', 'E105',
                        'E106', 'E107', 'E108', 'E109', 'E110', 'E111', 'E112', 'E113',
                        'E114', 'E115', 'E116', 'E117', 'E118', 'E119', 'E120', 'E121',
                        'E122', 'E123', 'E124', 'E125', 'E126', 'E127', 'E128', 'Cz']


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('eeg_pipeline.log'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

def extract_data(database, data_folder, participant_id, raw_folder, output_folder, task='RestingState'):
    """Extract HBN/CMI data from tar.gz files and create MNE Raw object"""
    sampling_freq = 500
    HBN_raw = dict()
    
    if database == 'HBN':
        location_eeg = '/home/dubadr/projects/def-gdumas85/dubadr/deephy/datasets/HBN/GSN_HydroCel_129.sfp.txt'
        path_to_events = f'{task}_event.csv'
        path_to_data = f'{task}_data.csv'
        # Define channel names for HBN - you may need to adjust this
    else:
        location_eeg = '/home/dubadr/projects/def-gdumas85/dubadr/deephy/datasets/CMI/Electrodes_position_cmi.txt'
        path_to_events = f'{participant_id}001_events.csv'
        path_to_data = f'{participant_id}001.csv'
        # Define channel names for CMI - you may need to adjust this
    
    template_channel_names = np.loadtxt(location_eeg, dtype=str, usecols=0)
    template_channel_locations = np.loadtxt(location_eeg, dtype=float, usecols=(1, 2, 3))
    extract_folder = os.path.join(os.path.dirname(output_folder.rstrip('/')), 'scratch')
    base_name = os.path.splitext(os.path.splitext(participant_id)[0])[0]
    csvfolder = extract_folder + 'EEG/' + participant_id + '/EEG/raw/csv_format'
    datafolder = data_folder + 'EEG/' + participant_id + '/EEG'

    if not os.path.exists(os.path.join(extract_folder + 'EEG/' + participant_id)):
        file = tarfile.open(data_folder + '/' + participant_id + '.tar.gz')
        name = file.getnames()
        file.extractall(extract_folder + 'EEG/')
        file.close()

    if os.path.exists(os.path.join(csvfolder, path_to_events)):
        events = pd.read_csv(os.path.join(csvfolder, path_to_events))
        if database == 'CMI':
            events['type'] = events['type'].str.strip()
            events = events[events['type'] != 'type']
            events['type'] = events['type'].str.strip()
            events['latency'] = pd.to_numeric(events['latency'])
            events['urevent'] = pd.to_numeric(events['urevent'])

        csvfile = os.path.join(csvfolder, path_to_data)
        print('Starting data import')
        data = pd.read_csv(csvfile, header=None)
        data = data.values
        print('Done data import')
        data = data * 1e-6
        chan_names = chan_names_tobeused

        info = mne.create_info(chan_names, sfreq=sampling_freq, ch_types='eeg')
        if database == 'HBN':
            raw = mne.io.RawArray(data, info)
            raw.drop_channels('Cz')
        else:
            channels_to_keep = [ch for ch in info['ch_names'] if ch != 'Cz']
            new_info = pick_info(info, sel=[info['ch_names'].index(ch) for ch in channels_to_keep])
            raw = mne.io.RawArray(data, new_info)

        dt = np.dtype([('col0', 'U6'), ('col1', float), ('col2', float), ('col3', float)])
        new_array = np.empty((132,), dtype=dt)
        new_array['col0'] = template_channel_names
        new_array['col1'] = template_channel_locations[:, 0]
        new_array['col2'] = template_channel_locations[:, 1]
        new_array['col3'] = template_channel_locations[:, 2]

        txt_list = list(new_array['col0'])
        raw_list = raw.info['ch_names']

        uq = list(set(txt_list).symmetric_difference(set(raw_list)))

        ID = []
        for idx in range(min(3, len(uq))):  # Safety check
            ID_temp = np.where(new_array['col0'] == uq[idx])
            ID.append(ID_temp)

        new_locs = np.delete(new_array, ID, 0)
        template_locs_final = new_locs[['col1', 'col2', 'col3']]

        # Handle MFF format if available
        if os.path.exists(os.path.join(datafolder, 'raw', 'mff_format', participant_id)):
            mff_folder = os.path.join(datafolder, 'raw', 'mff_format', participant_id)
            os.rename(mff_folder, mff_folder + '.mff')
            raw_mff = mne.io.read_raw_egi(mff_folder + '.mff')
            os.rename(mff_folder + '.mff', mff_folder)

            for i, ch in enumerate(raw.info['chs']):
                name = ch['ch_name']
                for ch_mff in raw_mff.info['chs']:
                    if ch_mff['ch_name'] == name:
                        break
                ch['cal'] = ch_mff['cal']

                if os.path.exists(os.path.join(csvfolder, f'{task}_chanlocs.csv')):
                    # Note: channel_locations variable is not defined in the original function
                    # You may need to load this separately
                    pass  # ch['loc'][0:10] = channel_locations[:, i][1:11]
                else:
                    tlf = template_locs_final[i]
                    ch['loc'][0:3] = np.array(list(tlf)).astype(object)

                ch['kind'] = ch_mff['kind']
                ch['unit'] = ch_mff['unit']
                ch['coord_frame'] = ch_mff['coord_frame']

        elif os.path.exists(os.path.join(csvfolder, f'{task}_chanlocs.csv')):
            for i, ch in enumerate(raw.info['chs']):
                ch['cal'] = 1.0e-06
                # ch['loc'][0:10] = channel_locations[:, i][1:11]  # Need to define channel_locations
                ch['kind'] = mne.utils._bunch.NamedInt('FIFFV_EEG_CH', 2)
                ch['unit'] = mne.utils._bunch.NamedInt('FIFF_UNIT_V', 107)
                ch['coord_frame'] = mne.utils._bunch.NamedInt('FIFFV_COORD_HEAD', 4)
        else:
            for i, ch in enumerate(raw.info['chs']):
                ch['cal'] = 1.0e-06
                tlf = template_locs_final[i]
                ch['loc'][0:3] = np.array(list(tlf)).astype(object)
                ch['kind'] = mne.utils._bunch.NamedInt('FIFFV_EEG_CH', 2)
                ch['unit'] = mne.utils._bunch.NamedInt('FIFF_UNIT_V', 107)
                ch['coord_frame'] = mne.utils._bunch.NamedInt('FIFFV_COORD_HEAD', 4)

        if database == 'HBN':
            descriptions = events.loc[:, 'type'].values
            onsets = events.loc[:, 'sample'].values / raw.info['sfreq']
            durations = events.loc[:, 'duration'].values
            for index, description in enumerate(descriptions):
                if description == 'break cnt':
                    durations[index] = 0.1
        else:
            descriptions = events['type'].values
            onsets = events['latency'].values / 500
            durations = [1] * len(descriptions)

        annotations = mne.Annotations(onsets, durations, descriptions)
        raw.set_annotations(annotations)
        
        if not os.path.exists(raw_folder + '/' + participant_id):
            os.makedirs(raw_folder + '/' + participant_id)
        
        # Save the raw data
        raw.save(raw_folder + '/' + participant_id + '/' + task + '.fif', overwrite=True)
        
        HBN_raw[participant_id + '_' + task] = raw
    else:
        print("there is no data for this participant " + participant_id)
        return None

    return raw

class EEGPipeline:
    """Main pipeline class for EEG processing with omneeg"""
    
    def __init__(self, database, input_folder, participant, output_folder, 
                 preprocessing_script='PPSPrep/preprocessing.py', omneeg_script='omneeg.py', 
                 demo_file=None, transform_module='omneeg.transform', extract_data_first=False):
        self.database = database
        self.input_folder = Path(input_folder)
        self.participant = participant
        self.output_folder = Path(output_folder)
        self.preprocessing_script = Path(preprocessing_script)
        self.omneeg_script = Path(omneeg_script)
        self.demo_file = demo_file
        self.transform_module = transform_module
        self.extract_data_first = extract_data_first
        
        # Create output folder structure
        self.setup_output_folders()
    
    def run_data_extraction(self, task='RestingState'):
        """Extract data from tar.gz files for HBN/CMI databases"""
        if self.database not in ['HBN', 'CMI']:
            logger.info(f"Data extraction not needed for database: {self.database}")
            return True
            
        logger.info(f"Starting data extraction for {self.database} participant {self.participant}")
        
        try:
            raw_folder = self.output_folder / 'raw'
            raw = extract_data(
                database=self.database,
                data_folder=str(self.input_folder),
                participant_id=self.participant,
                raw_folder=str(raw_folder),
                output_folder=str(self.output_folder),
                task=task
            )
            
            if raw is not None:
                logger.info("Data extraction completed successfully")
                return True
            else:
                logger.error("Data extraction failed - no data returned")
                return False
                
        except Exception as e:
            logger.error(f"Data extraction failed: {str(e)}")
            import traceback
            logger.error(traceback.format_exc())
            return False
    
    def setup_output_folders(self):
        """Create necessary output folder structure"""
        folders = ['raw', 'ica', 'process', 'log', 'omneeg']
        for folder in folders:
            folder_path = self.output_folder / folder
            folder_path.mkdir(parents=True, exist_ok=True)
            logger.info(f"Created/verified folder: {folder_path}")
    
    def run_preprocessing(self, remove_channels=False):
        """Run the preprocessing script"""
        logger.info(f"Starting preprocessing for participant {self.participant}")
        
        # Check if preprocessing script exists
        if not self.preprocessing_script.exists():
            logger.error(f"Preprocessing script not found: {self.preprocessing_script}")
            logger.info("Please ensure the PPSPrep directory and preprocessing.py file exist")
            return False
        
        # Construct preprocessing command
        cmd = [
            'python', str(self.preprocessing_script),
            self.database,
            str(self.input_folder),
            self.participant,
            str(self.output_folder),
            'output_placeholder',  # This seems to be unused in the original script
            '--remove_channels', str(remove_channels).lower()
        ]
        
        logger.info(f"Running preprocessing command: {' '.join(cmd)}")
        
        try:
            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
            logger.info("Preprocessing completed successfully")
            logger.info(f"Preprocessing stdout: {result.stdout}")
            if result.stderr:
                logger.warning(f"Preprocessing stderr: {result.stderr}")
            return True
        except subprocess.CalledProcessError as e:
            logger.error(f"Preprocessing failed with return code {e.returncode}")
            logger.error(f"Error output: {e.stderr}")
            logger.error(f"Standard output: {e.stdout}")
            return False
        except FileNotFoundError:
            logger.error(f"Python interpreter or preprocessing script not found")
            logger.error(f"Script path: {self.preprocessing_script}")
            return False
    
    def find_input_file(self, mode):
        """Find the appropriate input file based on processing mode"""
        if mode == 'raw':
            # For HBN with extraction, look for extracted .fif file first
            if self.database in ['HBN', 'CMI'] and self.extract_data_first:
                extracted_file = self.output_folder / 'raw' / self.participant / 'RestingState.fif'
                if extracted_file.exists():
                    return extracted_file
            
            # Look for raw data files in input folder
            possible_extensions = ['.fif', '.set', '.edf', '.bdf', '.mat', '.tar.gz']
            for ext in possible_extensions:
                pattern = f"*{self.participant}*{ext}"
                files = list(self.input_folder.glob(pattern))
                if files:
                    return files[0]
                
                # For HBN, also look for tar.gz files with just participant name
                if ext == '.tar.gz':
                    tar_file = self.input_folder / f"{self.participant}.tar.gz"
                    if tar_file.exists():
                        return tar_file
            
            # If no specific participant file found, look for generic patterns
            for ext in possible_extensions[:-1]:  # Exclude .tar.gz for generic search
                files = list(self.input_folder.glob(f"*{ext}"))
                if files:
                    logger.warning(f"No specific participant file found, using: {files[0]}")
                    return files[0]
        
        elif mode == 'preprocessed':
            # Look for preprocessed data in output folder
            ica_file = self.output_folder / 'ica' / self.participant / 'ica.fif'
            process_file = self.output_folder / 'process' / self.participant / 'RestingState_epo.fif'
            
            if process_file.exists():
                return process_file
            elif ica_file.exists():
                return ica_file
        
        elif mode == 'preprocess':
            # After preprocessing, use the processed file
            process_file = self.output_folder / 'process' / self.participant / 'RestingState_epo.fif'
            if process_file.exists():
                return process_file
            
            # Fallback to ICA file
            ica_file = self.output_folder / 'ica' / self.participant / 'ica.fif'
            if ica_file.exists():
                return ica_file
        
        return None
    
    def run_omneeg(self, input_file, output_suffix='transformed', reconstruction=False, use_transform_module=True):
        """Run omneeg transformation on the input file"""
        logger.info(f"Starting omneeg transformation on {input_file}")
        
        if use_transform_module:
            # Use the Interpolate class from omneeg/transform.py directly
            return self._run_omneeg_direct(input_file, output_suffix, reconstruction)
        else:
            # Use the original omneeg.py script
            return self._run_omneeg_script(input_file, output_suffix, reconstruction)
    
    def _run_omneeg_direct(self, input_file, output_suffix='transformed', reconstruction=False):
        """Run omneeg transformation using direct import of Interpolate class"""
        try:
            # Import required modules
            import mne
            import numpy as np
            import h5py
            import pandas as pd
            from importlib import import_module
            
            # Import the Interpolate class from omneeg/transform.py
            try:
                transform_module = import_module(self.transform_module)
                Interpolate = transform_module.Interpolate
                logger.info(f"Successfully imported Interpolate from {self.transform_module}")
            except ImportError as e:
                logger.error(f"Failed to import Interpolate from {self.transform_module}: {e}")
                return None
            
            # Try to import demo function, but handle gracefully if it fails
            try:
                from demo import get_demo
                use_demo = True
                logger.info("Successfully imported get_demo from demo module")
            except Exception as e:
                logger.warning(f"Could not import get_demo from demo module: {e}")
                logger.info("Will create minimal demographics instead")
                use_demo = False
            
            # Construct output file path
            omneeg_output_folder = self.output_folder / 'omneeg'
            omneeg_output_folder.mkdir(parents=True, exist_ok=True)
            
            # For epochs created from raw data, use the input_file directly
            participant_file = input_file
            
            logger.info(f"Loading epochs from: {participant_file}")
            
            # Load epochs
            # Load epochs - check if it's a raw file or epochs file
            import warnings
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", message="This filename (.*) does not conform to MNE naming conventions")

                # Check if we're loading epochs or raw data
                if 'epochs' in str(participant_file) or str(participant_file).endswith('_epo.fif'):
                    # This is an epochs file
                    epochs = mne.read_epochs(str(participant_file), preload=True)
                else:
                    # This is a raw file, create epochs from it
                    logger.info("Input appears to be raw data, creating fixed-length epochs")
                    raw = mne.io.read_raw_fif(str(participant_file), preload=True)
                    epochs = mne.make_fixed_length_epochs(raw, duration=2.0, preload=True)
            
            # Resample to 500 Hz
            epochs.resample(500)
            
            # Get demographics
            if use_demo and self.demo_file:
                try:
                    df_demo = get_demo(self.database, str(self.input_folder), self.participant, self.demo_file)
                    logger.info("Successfully retrieved demographics from demo module")
                except Exception as e:
                    logger.warning(f"Failed to get demographics from demo module: {e}")
                    # Create minimal demographics
                    df_demo = pd.DataFrame({
                        'participant_id': [self.participant],
                        'database': [self.database]
                    })
            else:
                # Create minimal demographics
                logger.info("Creating minimal demographics")
                df_demo = pd.DataFrame({
                    'participant_id': [self.participant],
                    'database': [self.database]
                })
            
            logger.info(f"Channel names: {epochs.info['ch_names'][:5]}...")  # Show first 5 channels
            
            # Apply interpolation transformation
            logger.info("Applying Interpolate transformation (32x32)")
            interpolate = Interpolate((32, 32))
            data_temp = interpolate(epochs)
            
            # Construct output path
            participant_id = self._get_participant_id()
            h5_path = omneeg_output_folder / f"{participant_id}.h5"
            
            # Save to HDF5
            logger.info(f"Saving transformed data to: {h5_path}")
            with h5py.File(h5_path, 'w') as f:
                # Save EEG array
                f.create_dataset('power_data', data=data_temp, compression="gzip")
                
                # Save demographics as a group of attributes (one key per column)
                demo_group = f.create_group('demographics')
                for key, value in df_demo.iloc[0].items():  # assumes 1-row per participant
                    demo_group.attrs[key] = value
            
            logger.info("Omneeg transformation completed successfully using direct method")
            return h5_path
            
        except Exception as e:
            logger.error(f"Direct omneeg transformation failed: {str(e)}")
            import traceback
            logger.error(traceback.format_exc())
            return None
    
    def _find_participant_file(self, data_type, reconstruction=False):
        """Find the participant file based on database type and processing mode"""
        if self.database == 'HBN':
            return self.output_folder / 'process' / self.participant / data_type
        
        elif self.database == 'BCAN':
            # For BCAN, we need to handle the demo file logic
            if self.demo_file:
                import pandas as pd
                demo = pd.read_csv(self.demo_file)
                basename = os.path.basename(os.path.normpath(self.participant))
                participant_id = basename.split('_', 1)[1] if '_' in basename else basename
                index_participant = demo[demo['Participant code'] == participant_id].index
                if len(index_participant) > 0:
                    Record_id = demo.iloc[index_participant[0]]['Record ID']
                    Participant_code = demo.iloc[index_participant[0]]['Participant code']
                    folder_name = f"{Record_id}_{Participant_code}"
                    return self.output_folder / 'process' / folder_name / data_type
            return self.output_folder / 'process' / self.participant / data_type
        
        elif self.database in ['VIP', 'RDB', 'ABCCT', 'XFRAGILE', 'NED', 'HSJ', 'TUEG']:
            participant_id = self.participant.split('.')[0] if self.database == 'VIP' else self.participant
            if self.database == 'RDB':
                participant_id = self.participant[:-7]
            else:
                participant_id = os.path.basename(self.participant)
            return self.output_folder / 'process' / participant_id / data_type
        
        else:
            # Default case
            return self.output_folder / 'process' / self.participant / data_type
    
    def _get_participant_id(self):
        """Get the correct participant ID based on database type"""
        if self.database == 'HBN':
            return self.participant
        elif self.database == 'BCAN':
            basename = os.path.basename(os.path.normpath(self.participant))
            return basename
        elif self.database == 'VIP':
            return self.participant.split('.')[0]
        elif self.database == 'RDB':
            return self.participant[:-7]
        else:
            return os.path.basename(self.participant)
    
    def _run_omneeg_script(self, input_file, output_suffix='transformed', reconstruction=False):
        """Run omneeg transformation using the original omneeg.py script"""
        # Check if omneeg script exists
        if not self.omneeg_script.exists():
            logger.error(f"Omneeg script not found: {self.omneeg_script}")
            return None
        
        # Construct output file path - omneeg saves as .h5 files
        omneeg_output_folder = self.output_folder / 'omneeg'
        omneeg_output_folder.mkdir(parents=True, exist_ok=True)
        
        # For omneeg, we need to determine the correct input folder based on the mode
        if 'process' in str(input_file):
            # Using preprocessed data from the process folder
            data_folder = self.output_folder / 'process'
        elif 'ica' in str(input_file):
            # Using ICA processed data
            data_folder = self.output_folder / 'ica'
        else:
            # Using raw data
            data_folder = self.input_folder
        
        # Construct omneeg command based on the actual omneeg.py script
        cmd = [
            'python', str(self.omneeg_script),
            self.database,                    # database
            str(data_folder),                 # input_folder (data_folder in omneeg)
            self.participant,                 # input_path (participant in omneeg)
            str(omneeg_output_folder),        # output_folder
            'output_placeholder',             # output_file (seems unused in omneeg)
            str(self.demo_file) if self.demo_file else 'demo_placeholder',  # demo_file
            '--reconstruction', str(reconstruction).lower()  # reconstruction flag
        ]
        
        logger.info(f"Running omneeg command: {' '.join(cmd)}")
        
        try:
            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
            
            # Omneeg saves as participant_id.h5 in the output folder
            output_file = omneeg_output_folder / f"{self.participant}.h5"
            
            logger.info("Omneeg transformation completed successfully")
            logger.info(f"Output saved to: {output_file}")
            logger.info(f"Omneeg stdout: {result.stdout}")
            if result.stderr:
                logger.warning(f"Omneeg stderr: {result.stderr}")
            
            return output_file
            
        except subprocess.CalledProcessError as e:
            logger.error(f"Omneeg transformation failed with return code {e.returncode}")
            logger.error(f"Error output: {e.stderr}")
            logger.error(f"Standard output: {e.stdout}")
            return None
        except FileNotFoundError:
            logger.error(f"Python interpreter or omneeg script not found")
            logger.error(f"Script path: {self.omneeg_script}")
            return None
    
    def run_pipeline(self, mode='preprocess', remove_channels=False, reconstruction=False, use_transform_module=True, task='RestingState'):
        """Run the complete pipeline based on specified mode"""
        logger.info(f"Starting EEG pipeline in '{mode}' mode for participant {self.participant}")
        
        # Step 1: Data extraction (if needed and requested)
        if self.extract_data_first and self.database in ['HBN', 'CMI']:
            if not self.run_data_extraction(task=task):
                logger.error("Data extraction failed, aborting pipeline")
                return False
        
        if mode == 'preprocess':
            # Run preprocessing first
            if not self.run_preprocessing(remove_channels=remove_channels):
                logger.error("Preprocessing failed, aborting pipeline")
                return False
            
            # Find processed file
            input_file = self.find_input_file('preprocess')
            if not input_file:
                logger.error("Could not find preprocessed file after preprocessing")
                return False
            
            # Run omneeg on preprocessed data
            output_file = self.run_omneeg(input_file, 'preprocessed_transformed', 
                                        reconstruction=reconstruction, use_transform_module=use_transform_module)
            
        elif mode == 'raw':
            # Find raw input file
            input_file = self.find_input_file('raw')
            if not input_file:
                logger.error("Could not find raw input file")
                return False
            
            # Run omneeg on raw data
            output_file = self.run_omneeg(input_file, 'raw_transformed', 
                                        reconstruction=reconstruction, use_transform_module=use_transform_module)
            
        elif mode == 'preprocessed':
            # Find already preprocessed file
            input_file = self.find_input_file('preprocessed')
            if not input_file:
                logger.error("Could not find preprocessed file")
                return False
            
            # Run omneeg on preprocessed data
            output_file = self.run_omneeg(input_file, 'preprocessed_transformed', 
                                        reconstruction=reconstruction, use_transform_module=use_transform_module)
        
        else:
            logger.error(f"Unknown mode: {mode}")
            return False
        
        if output_file:
            logger.info(f"Pipeline completed successfully. Output: {output_file}")
            return True
        else:
            logger.error("Pipeline failed")
            return False

  from mne.io.pick import pick_info


In [29]:
if __name__ == '__main__':
    main()

usage: ipykernel_launcher.py [-h] [--mode {raw,preprocess,preprocessed}]
                             [--remove_channels] [--reconstruction]
                             [--preprocessing_script PREPROCESSING_SCRIPT]
                             [--omneeg_script OMNEEG_SCRIPT]
                             [--transform_module TRANSFORM_MODULE]
                             [--use_script] [--extract_data] [--task TASK]
                             database input_folder participant output_folder
                             demo_file
ipykernel_launcher.py: error: the following arguments are required: input_folder, participant, output_folder, demo_file


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [3]:
pipeline = EEGPipeline(
    database='HBN',
    input_folder='/home/dubadr/projects/ctb-gdumas85/data/HBN/EEG',  # Note: should be the parent folder
    participant='NDARAA075AMK',  # Just the participant ID
    output_folder='/home/dubadr/scratch/2seconds/HBN',
    demo_file='/home/dubadr/scratch/2seconds/HBN/data.csv',
    extract_data_first=True
)

2025-08-18 11:30:07,621 - __main__ - INFO - Created/verified folder: /home/dubadr/scratch/2seconds/HBN/raw
2025-08-18 11:30:07,624 - __main__ - INFO - Created/verified folder: /home/dubadr/scratch/2seconds/HBN/ica
2025-08-18 11:30:07,626 - __main__ - INFO - Created/verified folder: /home/dubadr/scratch/2seconds/HBN/process
2025-08-18 11:30:07,628 - __main__ - INFO - Created/verified folder: /home/dubadr/scratch/2seconds/HBN/log
2025-08-18 11:30:07,629 - __main__ - INFO - Created/verified folder: /home/dubadr/scratch/2seconds/HBN/omneeg


In [5]:
pipeline

<__main__.EEGPipeline at 0x145f35967310>

In [4]:
success = pipeline.run_pipeline(
    mode='raw',
    remove_channels=False,
    reconstruction=False,
    use_transform_module=True  # Use direct import method
)

2025-08-18 11:30:09,283 - __main__ - INFO - Starting EEG pipeline in 'raw' mode for participant NDARAA075AMK
2025-08-18 11:30:09,284 - __main__ - INFO - Starting data extraction for HBN participant NDARAA075AMK
Starting data import
Done data import
Creating RawArray with float64 data, n_channels=129, n_times=182086
    Range : 0 ... 182085 =      0.000 ...   364.170 secs
Ready.
Overwriting existing file.
Writing /home/dubadr/scratch/2seconds/HBN/raw/NDARAA075AMK/RestingState.fif


  raw.save(raw_folder + '/' + participant_id + '/' + task + '.fif', overwrite=True)


Closing /home/dubadr/scratch/2seconds/HBN/raw/NDARAA075AMK/RestingState.fif
[done]
2025-08-18 11:31:06,979 - __main__ - INFO - Data extraction completed successfully
2025-08-18 11:31:06,982 - __main__ - INFO - Starting omneeg transformation on /home/dubadr/scratch/2seconds/HBN/raw/NDARAA075AMK/RestingState.fif
2025-08-18 11:31:07,076 - __main__ - INFO - Successfully imported Interpolate from omneeg.transform


  from mne.io.pick import pick_types


2025-08-18 11:31:09,162 - __main__ - INFO - Will create minimal demographics instead
2025-08-18 11:31:09,164 - __main__ - INFO - Loading epochs from: /home/dubadr/scratch/2seconds/HBN/raw/NDARAA075AMK/RestingState.fif
2025-08-18 11:31:09,165 - __main__ - INFO - Input appears to be raw data, creating fixed-length epochs
Opening raw data file /home/dubadr/scratch/2seconds/HBN/raw/NDARAA075AMK/RestingState.fif...
Isotrak not found
    Range : 0 ... 182085 =      0.000 ...   364.170 secs
Ready.
Reading 0 ... 182085  =      0.000 ...   364.170 secs...
Not setting metadata
182 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 182 events and 1000 original time points ...
0 bad epochs dropped
Sampling frequency of the instance is already 500.0, returning unmodified.
2025-08-18 11:31:09,415 - __main__ - INFO - Creating minimal demographics
2025-08-18 11:31:09,417 - __main__ - INFO - Channel names: ['E1', 'E2', 'E3', 'E4', 'E5']..

  data = eeg.get_data()


2025-08-18 11:32:21,396 - __main__ - INFO - Saving transformed data to: /home/dubadr/scratch/2seconds/HBN/omneeg/NDARAA075AMK.h5
2025-08-18 11:40:01,748 - __main__ - INFO - Omneeg transformation completed successfully using direct method
2025-08-18 11:40:01,756 - __main__ - INFO - Pipeline completed successfully. Output: /home/dubadr/scratch/2seconds/HBN/omneeg/NDARAA075AMK.h5


In [None]:
python eeg_pipeline.py HBN /data/raw participant_001 /output /path/to/demo.csv --mode raw