This notebook automatizes the conversion of VHDR files to EDF files. 

The channels labeled as "bad" in the TSV files are excluded before exporting to EDF.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

main_folder = "/content/gdrive/My Drive/Ext-val/"

In [None]:
!pip install mne
!!pip install EDFlib-Python

import mne

In [None]:
from datetime import datetime

def format_datetime(raw_brainvision_file):
    # Fetch the start date and time from the raw data's info dictionary
    original_date_time = str(raw_brainvision_file.info['meas_date'])

    parsed_date_time = datetime.strptime(original_date_time, "%Y-%m-%d %H:%M:%S%z")
    iso8601_date_time = parsed_date_time.strftime("%Y-%m-%dT%H:%M:%S")

    print(original_date_time, "converted to", iso8601_date_time)

    raw_brainvision_file.set_meas_date(iso8601_date_time)
    
    return raw_brainvision_file

In [None]:
import os
import pandas as pd

def find_bad_channels(ieeg_dir):
    """
    For each ieeg file, find bad channels by checking the corresponding channels.tsv file.

    Parameters:
    - ieeg_dir (str): Path to the directory containing ieeg files.

    Returns:
    - bad_channels_dict (dict): Dictionary with ieeg filenames as keys and lists of bad channels as values.
    """
    bad_channels_dict = {}

    for ieeg_file_name in os.listdir(ieeg_dir):
        if ieeg_file_name.endswith(".eeg"):
          
            # Extract sub_id and run_id from the ieeg filename
            sub_id = ieeg_file_name.split('_')[0]
            run_id = ieeg_file_name.split('_')[-2]
            print(sub_id, run_id)

            # Search for all channels.tsv files in the channels_tsv_dir
            for channels_tsv_name in os.listdir(ieeg_dir):
                if channels_tsv_name.endswith("_channels.tsv") and sub_id in channels_tsv_name and run_id in channels_tsv_name:
                    
                    channels_tsv_path = os.path.join(ieeg_dir, channels_tsv_name)

                    # Load channel information from channels.tsv
                    channels_df = pd.read_csv(channels_tsv_path, delimiter='\t')

                    # Extract bad channels
                    bad_channels = channels_df[channels_df['status'] == 'bad']['name'].tolist()

                    # Store bad channels in the dictionary
                    bad_channels_dict[f"{sub_id}_{run_id}"] = bad_channels
                    break  # Break the loop once matching channels_tsv is found
                
    return bad_channels_dict

def exclude_bad_channels(raw, bad_channels):
    """
    Exclude bad channels from the raw data.

    Parameters:
    - raw (mne.io.Raw): MNE Raw object.
    - bad_channels (list): List of bad channel labels.

    Returns:
    - raw (mne.io.Raw): MNE Raw object with bad channels excluded.
    """
    # Exclude bad channels
    raw = raw.drop_channels(bad_channels)
    return raw

In [None]:
def find_annotations(sub_id, run_id):
    """
    Find the seizure onset and offset annotations from the Excel file.

    Parameters:
    - annotations_path (str): Path to the Excel table.
    - sub_id (str): Subject ID.
    - run_id (str): Run ID.

    Returns:
    - onset (float): Seizure onset time (s)
    - offset (float): Seizure offset time (s)
    """
    onset, offset = None, None

    # Load the DataFrame from the Excel file
    df = pd.read_excel("/content/gdrive/My Drive/Ext-val/onset_annotations.xlsx")

    # Discard rows with a value of 1 in the 'UNCLEAR' column
    df = df[df['UNCLEAR'] != 1]

    # Find the row with the specified SUB_ID and RUN_ID
    selected_row = df[(df['SUB_ID'] == sub_id) & (df['RUN_ID'] == run_id)]

    # Check if the row is found
    if not selected_row.empty:
        # Extract 'ONSET_TIME' and 'OFFSET_TIME' from the selected row
        onset = selected_row['ONSET_TIME'].values[0]
        offset = selected_row['OFFSET_TIME'].values[0]

    return onset, offset
    

In [None]:
def convert_vhdr_to_edf(vhdr_file, output_dir, bad_channels_dict):
    """
    Converts a BrainVision VHDR file to EDF format using MNE library,
    excluding bad channels found by find_bad_channels function.

    Parameters:
    - vhdr_file (str): Path to the input VHDR file.
    - output_dir (str): Directory to save the output EDF file.
    - bad_channels_dict (dict): Dictionary with ieeg filenames as keys and lists of bad channels as values.
    """
    # Load data using MNE
    raw_original = mne.io.read_raw_brainvision(vhdr_file, preload=True)
    raw_original.close()
    raw = raw_original.copy()

    # Extract subject ID and run number from the filename
    file_name = os.path.basename(vhdr_file)
    subject_id = file_name.split('_')[0]
    run_number = file_name.split('_')[-2]
    print(run_number)
    
    # Find seizure onset and offset times in the manually created Excel table
    seizure_onset, seizure_offset = find_annotations(subject_id, run_number)
    # Remove existing annotations
    raw.set_annotations(None)
    assert len(raw.annotations) == 0
    # Create and set annotations
    annotations = mne.Annotations(onset=[seizure_onset,seizure_offset], duration=[0,0], description=['Start', 'End'])
    raw.set_annotations(annotations)
    # Extract bad channels for the current file
    bad_channels = bad_channels_dict.get(f"{subject_id}_{run_number}", [])

    # Exclude bad channels
    raw = exclude_bad_channels(raw, bad_channels)

    raw = format_datetime(raw)

    # Export to EDF
    edf_file = os.path.join(output_dir, f"{subject_id}_{run_number}.edf")
    mne.export.export_raw(edf_file, raw, fmt='edf', overwrite=True)

    print(f"Converted {vhdr_file} to {edf_file}")


def parse_and_convert_data(root_dir, output_dir, bad_channels_dict):
    """
    Recursively parses the input directory, finds BrainVision VHDR files,
    extracts subject ID and run number, and converts them to EDF format.

    Parameters:
    - root_dir (str): Path to the main directory containing "sub-***" subdirectories.
    - output_dir (str): Directory to save the output EDF files.
    """
    for subject_dir in os.listdir(root_dir):
        print(subject_dir)
        subject_path = os.path.join(root_dir, subject_dir)

        # Check if it's a "sub-***" directory
        if os.path.isdir(subject_path) and subject_dir.startswith("sub-"):
            presurgery_path = os.path.join(subject_path, "ses-presurgery", "ieeg")

            # Check if "ses-presurgery/ieeg" directory exists
            if os.path.exists(presurgery_path) and os.path.isdir(presurgery_path):
                for file in os.listdir(presurgery_path):
                    # Check if the file is a BrainVision VHDR file
                    if file.endswith(".vhdr"):
                        vhdr_file = os.path.join(presurgery_path, file)
                        convert_vhdr_to_edf(vhdr_file, output_dir, bad_channels_dict)

# Replace 'your_input_directory' and 'your_output_directory' with the actual paths
input_directory = main_folder
output_directory = main_folder + 'Ext-val-edf/'

# Create output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Find bad channels
bad_channels_result = find_bad_channels(input_directory)

# Iterate through ieeg files and convert to EDF
parse_and_convert_data(input_directory, output_directory, bad_channels_result)
