In [1]:
import pyxdf
import numpy as np
import subprocess
import os
from tqdm import tqdm

In [2]:
def segment_modalities_optimized(marker_timestamps, modality_timestamps, modality_data):
    """
    Optimized segmentation of data for a modality based on marker timestamps, accommodating different sampling rates.

    :param marker_timestamps: Timestamps of markers.
    :param modality_timestamps: Timestamps of the modality data.
    :param modality_data: Data of the modality to be segmented.
    :return: A list of data segments for the modality.
    """
    # Convert to numpy arrays for efficient computation
    marker_timestamps = np.array(marker_timestamps)
    modality_timestamps = np.array(modality_timestamps)

    # Find insertion points for each marker in the modality timestamps
    insert_points = np.searchsorted(modality_timestamps, marker_timestamps)
    segments = []
    for i in range(len(insert_points) - 1):
        # Extract and store the segment
        start_index = insert_points[i]
        end_index = insert_points[i + 1]
        segment = modality_data[start_index:end_index]
        segments.append(segment)

    # Handle the last segment, from the last marker to the end of the data stream
    if insert_points[-1] < len(modality_data):
        last_segment = modality_data[insert_points[-1]:]
        segments.append(last_segment)
    else:
        # If the last marker is exactly at or beyond the end of the data, append an empty segment
        segments.append([])

    return segments

def split_video(input_file, time_segments, output_folder):
    """
    Split a video into multiple parts based on a list of start and end times in seconds using FFmpeg.

    Args:
    input_file (str): Path to the input video file.
    time_segments (list of tuples): List of (start_time, end_time) tuples in seconds.
    output_folder (str): Folder to store the output segments.
    """
    # Create the output folder if it doesn't exist
    file_name = os.path.splitext(os.path.basename(input_file))[0]
    output_folder = os.path.join(output_folder, file_name)
    os.makedirs(output_folder, exist_ok=True)

    # Process each segment
    for (start_time, end_time, segment_name) in tqdm(time_segments):
        output_path = os.path.join(output_folder, f'{segment_name}.mp4')
        command = [
            'ffmpeg',
            '-i', input_file,             # Input file path
            '-ss', f'{start_time:.2f}',       # Start time of the segment in seconds
            '-to',f'{end_time:.2f}',         # End time of the segment in seconds
            '-c:v', 'libx264',        # Use H.264 codec
            '-preset', 'fast',            # Use fast encoding
            '-crf', '18',              # Constant Rate Factor (0-51, 0 - lossless, 23 - default, 51 - worst)
            output_path                   # Output file path
        ]
        subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


In [11]:
EXP_ROOT = "exp_data"
INPUT_ROOT = "input"
OUTPUT_ROOT = "output"
DATA_FILE = os.path.join(EXP_ROOT,"sub-1_task-passive.xdf")
RAW_VIDEO = os.path.join(INPUT_ROOT,"2024-05-18_01-08.mp4")
TIME_FACTOR = 1000000 # conversion from microseconds to seconds
data, header = pyxdf.load_xdf(DATA_FILE)

Stream 1: Calculated effective sampling rate 26.4344 Hz is different from specified rate 30.0000 Hz.


In [4]:
marker_stream = [d for d in data if d['info']['type'][0] == 'Markers'][0]
video_stream = [d for d in data if d['info']['type'][0] == 'videostream'][0]
# eeg_stream = [d for d in data if d['info']['type'][0] == 'EEG'][0]

In [13]:
segmented_video_frames = segment_modalities_optimized(marker_stream['time_stamps'], video_stream['time_stamps'], video_stream['time_series'].squeeze())
markers = marker_stream['time_series']
print(f"Length of video segments: {len(segmented_video_frames)}, Length of Markers: {len(markers)}")
print(markers)
segment_arr = [(segmented_video_frames[i][0]/TIME_FACTOR, segmented_video_frames[i][-1]/TIME_FACTOR, markers[i][0]) for i in range(len(segmented_video_frames))]
print(segment_arr)

Length of video segments: 16, Length of Markers: 16
[['range-gray'], ['pre-stim-chewing'], ['stim-chewing'], ['poststim-chewing'], ['pre-stim-seawave'], ['stim-seawave'], ['poststim-seawave'], ['pre-stim-engine'], ['stim-engine'], ['poststim-engine'], ['pre-stim-babycry'], ['stim-babycry'], ['poststim-babycry'], ['pre-stim-typing'], ['stim-typing'], ['poststim-typing']]
[(18.821308, 21.752772, 'range-gray'), (21.786064, 24.750838, 'pre-stim-chewing'), (24.784136, 29.447832, 'stim-chewing'), (29.481152, 32.445896, 'poststim-chewing'), (32.479224, 35.410668, 'pre-stim-seawave'), (35.443988, 41.406844, 'stim-seawave'), (41.44016, 44.404912, 'poststim-seawave'), (44.43822, 47.403, 'pre-stim-engine'), (47.436304, 51.400436, 'stim-engine'), (51.43374, 54.365224, 'poststim-engine'), (54.398512, 57.363288, 'pre-stim-babycry'), (57.3966, 63.426112, 'stim-babycry'), (63.459384, 66.424152, 'poststim-babycry'), (66.457472, 69.422232, 'pre-stim-typing'), (69.455544, 73.652864, 'stim-typing'), (73.6

In [14]:
split_video(RAW_VIDEO, segment_arr, OUTPUT_ROOT)

  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:42<00:00,  2.65s/it]


In [15]:
for s in segment_arr:
    print(f"Start: {s[0]:.2f}, End: {s[1]:.2f}, Marker: {s[2]}, Duration: {s[1]-s[0]:.2f}")

Start: 18.82, End: 21.75, Marker: range-gray, Duration: 2.93
Start: 21.79, End: 24.75, Marker: pre-stim-chewing, Duration: 2.96
Start: 24.78, End: 29.45, Marker: stim-chewing, Duration: 4.66
Start: 29.48, End: 32.45, Marker: poststim-chewing, Duration: 2.96
Start: 32.48, End: 35.41, Marker: pre-stim-seawave, Duration: 2.93
Start: 35.44, End: 41.41, Marker: stim-seawave, Duration: 5.96
Start: 41.44, End: 44.40, Marker: poststim-seawave, Duration: 2.96
Start: 44.44, End: 47.40, Marker: pre-stim-engine, Duration: 2.96
Start: 47.44, End: 51.40, Marker: stim-engine, Duration: 3.96
Start: 51.43, End: 54.37, Marker: poststim-engine, Duration: 2.93
Start: 54.40, End: 57.36, Marker: pre-stim-babycry, Duration: 2.96
Start: 57.40, End: 63.43, Marker: stim-babycry, Duration: 6.03
Start: 63.46, End: 66.42, Marker: poststim-babycry, Duration: 2.96
Start: 66.46, End: 69.42, Marker: pre-stim-typing, Duration: 2.96
Start: 69.46, End: 73.65, Marker: stim-typing, Duration: 4.20
Start: 73.69, End: 76.52, 