In [2]:
import joblib
import numpy as np 
import mne
import numpy as np
from scipy.stats import skew, kurtosis
from mne.preprocessing import ICA
from mne.time_frequency import psd_array_welch
import pandas as pd
from scipy.fftpack import fft
import pywt  # For wavelet transform
import os

In [3]:
def preprocess_eeg_data(vhdr_file_path, l_freq=1.0, h_freq=40.0, notch_freq=50):
    """Preprocess EEG data."""
    raw = mne.io.read_raw_brainvision(vhdr_file_path, preload=True)
    eog_channels = ['VPVA', 'VNVB', 'HPHL', 'HNHR']
    raw.set_channel_types({ch: 'eog' for ch in eog_channels if ch in raw.ch_names})
    raw.notch_filter(freqs=[notch_freq], picks='eeg')
    raw.filter(l_freq=l_freq, h_freq=h_freq, picks='eeg')
    raw.set_eeg_reference('average', projection=True)
    
    # ICA for artifact removal
    ica = mne.preprocessing.ICA(n_components=20, random_state=97, max_iter=800)
    ica.fit(raw)
    eog_indices, _ = ica.find_bads_eog(raw)
    ica.exclude = eog_indices
    raw = ica.apply(raw)
    return raw

In [4]:
def slice_and_save_eeg(vhdr_file_path, slice_duration=60, output_dir="split_fif/mdd"):
    """Preprocess, slice, and save EEG data into 60-second segments."""
    
    # Extract the base name for saving
    base_name = os.path.splitext(os.path.basename(vhdr_file_path))[0]
    
    # Perform preprocessing
    raw = preprocess_eeg_data(vhdr_file_path)
    
    # Calculate the number of samples per slice
    samples_per_slice = int(slice_duration * raw.info['sfreq'])
    
    # Calculate the number of slices
    num_slices = int(len(raw) / samples_per_slice)
    
    # Slice and save
    for i in range(num_slices):
        start_sample = i * samples_per_slice
        end_sample = (i + 1) * samples_per_slice
        
        # Create a new raw object for the slice
        sliced_raw = raw.copy().crop(tmin=start_sample / raw.info['sfreq'], tmax=end_sample / raw.info['sfreq'], include_tmax=False)
        
        # Construct the filename for the slice
        slice_filename = f"{base_name}_{i + 1}.fif"
        
        # Save the slice in the output directory
        slice_output_path = os.path.join(output_dir, slice_filename)
        sliced_raw.save(slice_output_path, overwrite=True)
        print(f"Slice {i + 1} saved as: {slice_output_path}")

    # Handle the remaining data (if any)
    remaining_samples = len(raw) - (num_slices * samples_per_slice)
    if remaining_samples > 0:
        start_sample = num_slices * samples_per_slice
        remaining_raw = raw.copy().crop(tmin=start_sample / raw.info['sfreq'])
        remaining_filename = f"{base_name}_2.fif"
        # Save the remaining data in the output directory
        remaining_output_path = os.path.join(output_dir, remaining_filename)
        remaining_raw.save(remaining_output_path, overwrite=True)
        print(f"Remaining data saved as: {remaining_output_path}")

In [None]:
slice_and_save_eeg("dataset_s/mdd/sub-88000489/ses-1/eeg/sub-88000489_ses-1_task-restEC_eeg.vhdr")

In [None]:
import os

def process_dir(source_dir,output_dir):
    dirs=os.listdir(source_dir)
    for dir in dirs:
        
        fif_path = f"{dir}/ses-1/eeg/"

process_dir("dataset_s/mdd","dataset_s/mdd1")

['sub-88025641', 'sub-88066325', 'sub-88035677', 'sub-88057769', 'sub-88045353', 'sub-88068665', 'sub-88021321', 'sub-88020557', 'sub-88045713', 'sub-88035501', 'sub-88072265', 'sub-88025685', 'sub-88020873', 'sub-88048325', 'sub-88071949', 'sub-88030641', 'sub-88068885', 'sub-88032973', 'sub-88021277', 'sub-88018937', 'sub-88046437', 'sub-88005937', 'sub-88073797', 'sub-88070285', 'sub-88044681', 'sub-88046841', 'sub-88038069', 'sub-88049813', 'sub-88026233', 'sub-88073433', 'sub-88010709', 'sub-88065197', 'sub-88018989', 'sub-88022001', 'sub-88021101', 'sub-88000533', 'sub-88044501', 'sub-88067225', 'sub-88028253', 'sub-88069737', 'sub-88030281', 'sub-88025061', 'sub-88062997', 'sub-88072081', 'sub-88008681', 'sub-88029833', 'sub-88029789', 'sub-88025597', 'sub-88047517', 'sub-88047245', 'sub-88010753', 'sub-88075769', 'sub-88061961', 'sub-88017821', 'sub-88064565', 'sub-88024833', 'sub-88056785', 'sub-88000489', 'sub-88001661', 'sub-88024697', 'sub-88025281', 'sub-88039773', 'sub-88