In [None]:
import pyxdf
import soundfile as sf
import numpy as np

In [None]:
def segment_modalities_optimized(marker_timestamps, modality_timestamps, modality_data):
    """
    Optimized segmentation of data for a modality based on marker timestamps, accommodating different sampling rates.

    :param marker_timestamps: Timestamps of markers.
    :param modality_timestamps: Timestamps of the modality data.
    :param modality_data: Data of the modality to be segmented.
    :return: A list of data segments for the modality.
    """
    # Convert to numpy arrays for efficient computation
    marker_timestamps = np.array(marker_timestamps)
    modality_timestamps = np.array(modality_timestamps)

    # Find insertion points for each marker in the modality timestamps
    insert_points = np.searchsorted(modality_timestamps, marker_timestamps)
    print(insert_points)
    segments = []
    for i in range(len(insert_points) - 1):
        # Extract and store the segment
        start_index = insert_points[i]
        end_index = insert_points[i + 1]
        segment = modality_data[start_index:end_index]
        segments.append(segment)

    # Handle the last segment, from the last marker to the end of the data stream
    if insert_points[-1] < len(modality_data):
        last_segment = modality_data[insert_points[-1]:]
        segments.append(last_segment)
    else:
        # If the last marker is exactly at or beyond the end of the data, append an empty segment
        segments.append([])

    return segments

In [None]:
data, header = pyxdf.load_xdf('sub-12_task-words_run-001.xdf')

In [None]:
data_types = ['EEG', 'Markers', 'Audio']
assert len(data) == 3
# check whether the data has 3 streams of type 'Markers', 'EEG' and 'Audio' index can be different
assert all([d['info']['type'][0] in data_types for d in data])

In [None]:
marker_stream = [d for d in data if d['info']['type'][0] == 'Markers'][0]
eeg_stream = [d for d in data if d['info']['type'][0] == 'EEG'][0]
audio_stream = [d for d in data if d['info']['type'][0] == 'Audio'][0]

In [None]:
s = segment_modalities_optimized(marker_stream['time_stamps'], audio_stream['time_stamps'], audio_stream['time_series'].squeeze())

In [1]:
def write_audio_data(audio_data_buffer, out_path='audio.wav'):
    audio_fs = 44100
    sf.write(out_path, audio_data_buffer, audio_fs)