In [1]:
import pyxdf
import numpy as np
import subprocess
import os
from tqdm import tqdm

In [33]:
def segment_modalities_optimized(marker_timestamps, modality_timestamps, modality_data):
    """
    Optimized segmentation of data for a modality based on marker timestamps, accommodating different sampling rates.

    :param marker_timestamps: Timestamps of markers.
    :param modality_timestamps: Timestamps of the modality data.
    :param modality_data: Data of the modality to be segmented.
    :return: A list of data segments for the modality.
    """
    # Convert to numpy arrays for efficient computation
    marker_timestamps = np.array(marker_timestamps)
    modality_timestamps = np.array(modality_timestamps)

    # Find insertion points for each marker in the modality timestamps
    insert_points = np.searchsorted(modality_timestamps, marker_timestamps)
    segments = []
    for i in range(len(insert_points) - 1):
        # Extract and store the segment
        start_index = insert_points[i]
        end_index = insert_points[i + 1]
        segment = modality_data[start_index:end_index]
        segments.append(segment)

    # Handle the last segment, from the last marker to the end of the data stream
    if insert_points[-1] < len(modality_data):
        last_segment = modality_data[insert_points[-1]:]
        segments.append(last_segment)
    else:
        # If the last marker is exactly at or beyond the end of the data, append an empty segment
        segments.append([])

    return segments

def split_video(input_file, time_segments, output_folder):
    """
    Split a video into multiple parts based on a list of start and end times in seconds using FFmpeg.

    Args:
    input_file (str): Path to the input video file.
    time_segments (list of tuples): List of (start_time, end_time) tuples in seconds.
    output_folder (str): Folder to store the output segments.
    """
    # Create the output folder if it doesn't exist
    file_name = os.path.splitext(os.path.basename(input_file))[0]
    output_folder = os.path.join(output_folder, file_name)
    os.makedirs(output_folder, exist_ok=True)

    # Process each segment
    for (start_time, end_time, segment_name) in tqdm(time_segments):
        output_path = os.path.join(output_folder, f'{segment_name}.mp4')
        command = [
            'ffmpeg',
            '-i', input_file,             # Input file path
            '-ss', f'{start_time:.2f}',       # Start time of the segment in seconds
            '-to',f'{end_time:.2f}',         # End time of the segment in seconds
            '-c:v', 'libx264',        # Use H.264 codec
            '-preset', 'fast',            # Use fast encoding
            '-crf', '18',              # Constant Rate Factor (0-51, 0 - lossless, 23 - default, 51 - worst)
            output_path                   # Output file path
        ]
        subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


In [31]:
EXP_ROOT = "exp_data"
INPUT_ROOT = "input"
OUTPUT_ROOT = "output"
DATA_FILE = os.path.join(EXP_ROOT,"short-passive.xdf")
RAW_VIDEO = os.path.join(INPUT_ROOT,"one-repeat.mp4")
FRAME_RATE = 30.0
data, header = pyxdf.load_xdf(DATA_FILE)

In [4]:
marker_stream = [d for d in data if d['info']['type'][0] == 'Markers'][0]
video_stream = [d for d in data if d['info']['type'][0] == 'videostream'][0]
# eeg_stream = [d for d in data if d['info']['type'][0] == 'EEG'][0]

In [5]:
segmented_video_frames = segment_modalities_optimized(marker_stream['time_stamps'], video_stream['time_stamps'], video_stream['time_series'].squeeze())
markers = marker_stream['time_series']
print(f"Length of video segments: {len(segmented_video_frames)}, Length of Markers: {len(markers)}")
print(markers)
segment_arr = [(segmented_video_frames[i][0]/FRAME_RATE, segmented_video_frames[i][-1]/FRAME_RATE, markers[i][0]) for i in range(len(segmented_video_frames))]
print(segment_arr)

Length of video segments: 8, Length of Markers: 8
[['range-black'], ['range-gray'], ['pre-stim-babycry'], ['stim-babycry'], ['poststim-babycry'], ['pre-stim-chewing'], ['stim-chewing'], ['poststim-chewing']]
[(40.8, 45.8, 'range-black'), (45.833333333333336, 50.766666666666666, 'range-gray'), (50.8, 53.8, 'pre-stim-babycry'), (53.833333333333336, 59.8, 'stim-babycry'), (59.833333333333336, 64.83333333333333, 'poststim-babycry'), (64.86666666666666, 67.83333333333333, 'pre-stim-chewing'), (67.86666666666666, 72.56666666666666, 'stim-chewing'), (72.6, 77.26666666666667, 'poststim-chewing')]


In [34]:
split_video(RAW_VIDEO, segment_arr, OUTPUT_ROOT)

100%|██████████| 8/8 [00:20<00:00,  2.61s/it]


In [21]:
for s in segment_arr:
    print(f"Start: {s[0]:.2f}, End: {s[1]:.2f}, Marker: {s[2]}, Duration: {s[1]-s[0]:.2f}")

Start: 40.80, End: 45.80, Marker: range-black, Duration: 5.00
Start: 45.83, End: 50.77, Marker: range-gray, Duration: 4.93
Start: 50.80, End: 53.80, Marker: pre-stim-babycry, Duration: 3.00
Start: 53.83, End: 59.80, Marker: stim-babycry, Duration: 5.97
Start: 59.83, End: 64.83, Marker: poststim-babycry, Duration: 5.00
Start: 64.87, End: 67.83, Marker: pre-stim-chewing, Duration: 2.97
Start: 67.87, End: 72.57, Marker: stim-chewing, Duration: 4.70
Start: 72.60, End: 77.27, Marker: poststim-chewing, Duration: 4.67
