In [1]:
import joblib
import numpy as np
import os
import mne
import numpy as np
from scipy.stats import skew, kurtosis
from mne.preprocessing import ICA
from mne.time_frequency import psd_array_welch
import pandas as pd
from scipy.fftpack import fft
import pywt  # For wavelet transform

In [2]:
import numpy as np
from scipy.fftpack import fft, ifft, fftfreq

def morlet_wavelet(t, f, sigma=1.0):
    return np.exp(2j * np.pi * f * t) * np.exp(-t**2 / (2 * sigma**2))

def manual_morlet_transform(data, scales, fs=1.0):
    t = np.arange(len(data)) / fs
    transformed = []
    for scale in scales:
        wavelet = morlet_wavelet(t - np.mean(t), scale)
        wavelet /= np.linalg.norm(wavelet)  # Normalize wavelet
        convolved = ifft(fft(data) * fft(wavelet)).real
        transformed.append(convolved)
    return transformed


In [3]:
def extract_channel_features(raw, condition, fmin=0.5, fmax=50):
    # Select only EEG channels
    raw.pick('eeg')
    data = raw.get_data()
    channel_names = raw.ch_names
    features = {}  # Store condition as 0 for EO, 1 for EC
    scales = np.linspace(2, 30, 10)  # Covers 8-30 Hz range (Alpha & Beta)
    fs = raw.info['sfreq'] # Sampling frequency
    # condition_prefix = 'eo' if condition == 0 else 'ec'
    
    # Time-domain features
    for i, ch in enumerate(channel_names):
        key_prefix = f'{condition}_{ch.lower()}'
        features[f'{key_prefix}_mean'] = np.mean(data[i])
        features[f'{key_prefix}_variance'] = np.var(data[i])
        features[f'{key_prefix}_skewness'] = skew(data[i])
        features[f'{key_prefix}_kurtosis'] = kurtosis(data[i])
        features[f'{key_prefix}_peak_to_peak'] = np.ptp(data[i])

        # Fourier Transform (FFT)
        fft_values = np.abs(fft(data[i]))
        features[f'{key_prefix}_fft_mean'] = np.mean(fft_values)
        features[f'{key_prefix}_fft_std'] = np.std(fft_values)
        features[f'{key_prefix}_fft_max'] = np.max(fft_values)

        # Wavelet Transform (DWT) using Daubechies wavelet (db4)
        coeffs = manual_morlet_transform(data[i], scales, fs)
        
        # Compute wavelet energy
        wavelet_energy = sum(np.sum(np.square(c)) for c in coeffs)
        features[f'{key_prefix}_wavelet_energy'] = wavelet_energy

    # # Wavelet Energy
    #     wavelet_energy = sum(np.sum(np.square(c)) for c in coeffs)
    #     features[f'{key_prefix}_wavelet_energy'] = wavelet_energy
        
        wavelet_entropy = 0
        for c in coeffs:
            c = c[np.isfinite(c)]
            c_norm = c / (np.sum(np.abs(c)) + 1e-10)

            c_norm = c[c > 0]  # Remove zero or negative values to avoid log errors
            if len(c_norm) > 0:
                wavelet_entropy += -np.sum(c_norm * np.log2(c_norm))
            else:
                wavelet_entropy += 0  # If empty, set entropy to 0
        features[f'{key_prefix}_wavelet_entropy'] = wavelet_entropy
    
    # Get the sampling frequency
    sfreq = raw.info['sfreq']

    # Compute the Nyquist frequency (maximum frequency we can analyze)
    nyquist_freq = sfreq / 2

    # Ensure fmax does not exceed Nyquist frequency
    adjusted_fmax = min(fmax, nyquist_freq)
    print(f"Sampling Frequency: {sfreq} Hz, Nyquist Frequency: {nyquist_freq} Hz, Adjusted fmax: {fmax} Hz")
    # Check if fmin is valid
    if fmin >= adjusted_fmax:
        raise ValueError(f"Invalid frequency range: fmin={fmin} is not less than fmax={adjusted_fmax}")



    n_times = raw.n_times  # Get the actual number of time points in the signal
    n_fft = min(2048, n_times)  # Ensure n_fft does not exceed signal length


    # Frequency-domain features using PSD
    psd = raw.compute_psd(method='welch', fmin=fmin, fmax=fmax, n_fft=n_fft)
    psd_data = psd.get_data()
    freqs = psd.freqs
    psd_df = pd.DataFrame(psd_data, columns=freqs, index=channel_names)

    bands = {'delta': (0.5, 4), 'theta': (4, 8), 'slow_alpha': (6, 9), 'alpha': (8, 12),
             'beta': (12, 30), 'gamma': (30, 50)}

    for band, (low, high) in bands.items():
        band_power = psd_df.loc[:, (freqs >= low) & (freqs <= high)].mean(axis=1)
        for ch in channel_names:
            key_prefix = f'{condition}_{ch.lower()}'
            features[f'{key_prefix}_{band}_power'] = band_power[ch]

    # Frontal Alpha Asymmetry (F3-F4)
    if 'F3' in channel_names and 'F4' in channel_names:
        features[f'{condition}f3_f4_alpha_asymmetry'] = features[f'{condition}_f4_alpha_power'] - features[f'{condition}_f3_alpha_power']

    # Convert features to DataFrame
    features_df = pd.DataFrame([features])

    return features_df


In [28]:
def process_and_combine(eo_file_path, ec_file_path, output_file):
    all_features = []
    eo=False
    ec=False
    # Process EO file
    try:
        raw_eo = mne.io.read_raw_brainvision(eo_file_path)
        features_eo = extract_channel_features(raw_eo,"ec")
        #features_eo['condition'] = 'EO'
        all_features.append(features_eo)
        eo=True
    except Exception as e:
        print(f"Error loading file: {e}")
        return None
    

    # Process EC file
    raw_ec = mne.io.read_raw_brainvision(ec_file_path)
    features_ec = extract_channel_features(raw_ec,"eo")
    #features_ec['condition'] = 'EC'
    all_features.append(features_ec)
    ec=True
    # Check if both EO and EC files were processed
    
    # Combine EO and EC features
    if eo and ec:
        combined_features = pd.concat(all_features,axis=1)
    # print("*****************************",combined_features.shape,"***********************************")
    # out_path = (out_dir,output_file)
    # Save combined features to a single CSV file
        combined_features.to_csv(output_file,index=False)
        print(f"Features successfully saved to {output_file}")
    # return combined_features

In [None]:
# ## DEBUG code
# # raw_test = mne.io.read_raw_fif("/home/admincit/Desktop/Team_4/split_fif/mdd/sub-88017137_ses-1_task-restEO_eeg_2.fif", preload=True)
# raw_test = mne.io.read_raw_fif("/home/admincit/Desktop/Team_4/Shap/sub-88000489_ses-1_task-restEC_eeg_1_eeg.fif", preload=True)
# # print(f"Samples: {raw_test.n_times}, Duration: {raw_test.n_times / raw_test.info['sfreq']} sec")
# print(f"_2.fif Duration: {raw_test.n_times / raw_test.info['sfreq']} sec, Samples: {raw_test.n_times}")

Opening raw data file /home/admincit/Desktop/Team_4/Shap/sub-88000489_ses-1_task-restEC_eeg_1_eeg.fif...
Isotrak not found
    Read a total of 1 projection items:
        Average EEG reference (1 x 29)  idle
    Range : 0 ... 29999 =      0.000 ...    59.998 secs
Ready.
Reading 0 ... 29999  =      0.000 ...    59.998 secs...
_2.fif Duration: 60.0 sec, Samples: 30000


# single file usage

In [None]:
process_and_combine("../dataset_s/other/sub-19737061/ses-1/eeg/sub-19737061_ses-1_task-restEO_eeg.vhdr","../dataset_s/other/sub-19737061/ses-1/eeg/sub-19737061_ses-1_task-restEC_eeg.vhdr","preprocessed.csv")

Extracting parameters from ../dataset_s/other/sub-19681349/ses-1/eeg/sub-19681349_ses-1_task-restEO_eeg.vhdr...
Setting channel info structure...
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Extracting parameters from ../dataset_s/other/sub-19681349/ses-1/eeg/sub-19681349_ses-1_task-restEC_eeg.vhdr...
Setting channel info structure...
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Features successfully saved to preprocessed.csv


In [None]:
process_and_combine("/home/admincit/Desktop/Team_4/split_fif/mdd/sub-88000489_ses-1_task-restEC_eeg_1_eeg.fif","/home/admincit/Desktop/Team_4/split_fif/mdd/sub-88000489_ses-1_task-restEO_eeg_1_eeg.fif","preprocessed.csv")

Opening raw data file /home/admincit/Desktop/Team_4/split_fif/mdd/sub-88000489_ses-1_task-restEC_eeg_1_eeg.fif...
Isotrak not found
    Read a total of 1 projection items:
        Average EEG reference (1 x 29)  idle
    Range : 0 ... 29999 =      0.000 ...    59.998 secs
Ready.
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Opening raw data file /home/admincit/Desktop/Team_4/split_fif/mdd/sub-88000489_ses-1_task-restEO_eeg_1_eeg.fif...
Isotrak not found
    Read a total of 1 projection items:
        Average EEG reference (1 x 29)  idle
    Range : 0 ... 29999 =      0.000 ...    59.998 secs
Ready.
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Features successfully saved to preprocessed.csv


# Function To process Folder

In [5]:
import os

def process_folder(source_folder, destination_folder):
    """
    Processes all pairs of EO and EC files in the source folder and saves the combined features to CSV files in the destination folder.

    Args:
        source_folder (str): Path to the folder containing EEG files.
        destination_folder (str): Path to the folder where CSV files will be saved.
    """
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    files = os.listdir(source_folder)
    
    # Collecting EC and EO files
    ec_files = sorted([f for f in files if "restEC_eeg_1_eeg.fif" in f])
    eo_files = sorted([f for f in files if "restEO_eeg_1_eeg.fif" in f])

    for ec_file in ec_files:
        base_name = ec_file.replace("restEC_eeg_1_eeg.fif", "")
        eo_file = base_name + "restEO_eeg_1_eeg.fif"

        if eo_file in eo_files:
            ec_path = os.path.join(source_folder, ec_file)
            eo_path = os.path.join(source_folder, eo_file)
            output_path = os.path.join(destination_folder, base_name + "restcombined_eeg_1.csv")

            process_and_combine(ec_path, eo_path, output_path)
        else:
            print(f"Warning: No matching EO file found for {ec_file}")



In [29]:

def process_folder(source_folder, destination_folder):
    """
    Processes all pairs of EO and EC files in the source folder and saves the combined features to CSV files in the destination folder.

    Args:
        source_folder (str): Path to the folder containing EEG files.
        destination_folder (str): Path to the folder where CSV files will be saved.
    """
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Traverse the folder structure to get all subdirectories
    for subdir, _, files in os.walk(source_folder):
        print(f"Processing directory: {subdir}")
        # Collecting EC and EO files
        ec_files = sorted([f for f in files if "restEC_eeg.vhdr" in f])
        eo_files = sorted([f for f in files if "restEO_eeg.vhdr" in f])

        # Processing the EC and EO file pairs
        for ec_file in ec_files:
            base_name = ec_file.replace("restEC_eeg.vhdr", "")
            eo_file = base_name + "restEO_eeg.vhdr"

            if eo_file in eo_files:
                ec_path = os.path.join(subdir, ec_file)
                eo_path = os.path.join(subdir, eo_file)
                output_path = os.path.join(destination_folder, base_name + "restcombined_eeg.csv")
                # print(f"Processing {ec_file} and {eo_file}............................................")
                # process_and_combine(ec_path, eo_path, output_path)
                print(f"Processing {ec_file} and {eo_file}...")
                process_and_combine(ec_path, eo_path, output_path)
            else:
                print(f"Warning: No matching EO file found for {ec_file}")


In [31]:
def process_folder_from_file(folder_list_file, base_source_folder, destination_folder):
    """
    Processes all pairs of EO and EC files for each subject listed in the folder_list_file.
    Saves the combined features to CSV files in the destination folder.

    Args:
        folder_list_file (str): Path to the text file containing the list of subject IDs.
        base_source_folder (str): The base directory where the subject folders are located.
        destination_folder (str): Path to the folder where CSV files will be saved.
    """
    # Read the list of subject folders from the text file
    with open(folder_list_file, 'r') as f:
        subject_ids = [line.strip() for line in f.readlines()]

    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Iterate over each subject
    for subject_id in subject_ids:
        # Build the path to the subject's EEG directory
        subject_folder = os.path.join(base_source_folder, subject_id, 'ses-1', 'eeg')

        if not os.path.exists(subject_folder):
            print(f"Warning: EEG folder not found for {subject_id}")
            continue

        print(f"Processing subject: {subject_id} in directory {subject_folder}")

        # Get the list of files in the subject's EEG folder
        files = os.listdir(subject_folder)

        # Collecting EC and EO files
        ec_files = sorted([f for f in files if "restEC_eeg.vhdr" in f])
        eo_files = sorted([f for f in files if "restEO_eeg.vhdr" in f])

        # Processing the EC and EO file pairs
        for ec_file in ec_files:
            base_name = ec_file.replace("restEC_eeg.vhdr", "")
            eo_file = base_name + "restEO_eeg.vhdr"

            if eo_file in eo_files:
                ec_path = os.path.join(subject_folder, ec_file)
                eo_path = os.path.join(subject_folder, eo_file)
                output_path = os.path.join(destination_folder, base_name + "restcombined_eeg.csv")

                print(f"Processing {ec_file} and {eo_file}...")
                process_and_combine(ec_path, eo_path, output_path)
            else:
                print(f"Warning: No matching EO file found for {ec_file} in subject {subject_id}")


In [9]:
source_folder = "/home/admincit/Desktop/Team_4/split_fif/healthy"
destination_folder = "/home/admincit/Desktop/Team_4/split_fif/healthy_csv"
process_folder(source_folder, destination_folder)

In [None]:
txt_file = "../dataset_s/other/o.txt"
source_folder = "../dataset_s/other"
destination_folder = "../split_fif/other_csv"
process_folder_from_file(txt_file, source_folder, destination_folder)

In [30]:
source_folder = "../dataset_s/other"
destination_folder = "../split_fif/other_csv"
process_folder(source_folder, destination_folder)

Processing directory: ../dataset_s/other
Processing directory: ../dataset_s/other/sub-19704914
Processing directory: ../dataset_s/other/sub-19704914/ses-1
Processing directory: ../dataset_s/other/sub-19704914/ses-1/eeg
Processing sub-19704914_ses-1_task-restEC_eeg.vhdr and sub-19704914_ses-1_task-restEO_eeg.vhdr...
Extracting parameters from ../dataset_s/other/sub-19704914/ses-1/eeg/sub-19704914_ses-1_task-restEC_eeg.vhdr...
Setting channel info structure...
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Extracting parameters from ../dataset_s/other/sub-19704914/ses-1/eeg/sub-19704914_ses-1_task-restEO_eeg.vhdr...
Setting channel info structure...
Sampling Frequency: 500.0 Hz, Nyquist Frequency: 250.0 Hz, Adjusted fmax: 50 Hz
Effective window size : 4.096 (s)
Features successfully saved to ../split_fif/other_csv/sub-19704914_ses-1_task-restcombined_eeg.csv
Processing directory: ../dataset_s/other/sub-19703068
Processing

LookupError: unknown encoding: UTF-19687321_ses-1_task-restEO_eeg.eeg