In [8]:
import pyxdf
import numpy as np
import subprocess
import os
from tqdm import tqdm

In [9]:
def segment_modalities_optimized(marker_timestamps, modality_timestamps, modality_data):
    """
    Optimized segmentation of data for a modality based on marker timestamps, accommodating different sampling rates.

    :param marker_timestamps: Timestamps of markers.
    :param modality_timestamps: Timestamps of the modality data.
    :param modality_data: Data of the modality to be segmented.
    :return: A list of data segments for the modality.
    """
    # Convert to numpy arrays for efficient computation
    marker_timestamps = np.array(marker_timestamps)
    modality_timestamps = np.array(modality_timestamps)

    # Find insertion points for each marker in the modality timestamps
    insert_points = np.searchsorted(modality_timestamps, marker_timestamps)
    segments = []
    for i in range(len(insert_points) - 1):
        # Extract and store the segment
        start_index = insert_points[i]
        end_index = insert_points[i + 1]
        segment = modality_data[start_index:end_index]
        segments.append(segment)

    # Handle the last segment, from the last marker to the end of the data stream
    if insert_points[-1] < len(modality_data):
        last_segment = modality_data[insert_points[-1]:]
        segments.append(last_segment)
    else:
        # If the last marker is exactly at or beyond the end of the data, append an empty segment
        segments.append([])

    return segments

def split_video(input_file, time_segments, output_folder):
    """
    Split a video into multiple parts based on a list of start and end times in seconds using FFmpeg.

    Args:
    input_file (str): Path to the input video file.
    time_segments (list of tuples): List of (start_time, end_time) tuples in seconds.
    output_folder (str): Folder to store the output segments.
    """
    # Create the output folder if it doesn't exist
    file_name = os.path.splitext(os.path.basename(input_file))[0]
    output_folder = os.path.join(output_folder, file_name)
    os.makedirs(output_folder, exist_ok=True)

    # Process each segment
    for (start_time, end_time, segment_name) in tqdm(time_segments):
        output_path = os.path.join(output_folder, f'{segment_name}.mp4')
        command = [
            'ffmpeg',
            '-i', input_file,             # Input file path
            '-ss', f'{start_time:.2f}',       # Start time of the segment in seconds
            '-to',f'{end_time:.2f}',         # End time of the segment in seconds
            '-c:v', 'libx264',        # Use H.264 codec
            '-preset', 'fast',            # Use fast encoding
            '-crf', '18',              # Constant Rate Factor (0-51, 0 - lossless, 23 - default, 51 - worst)
            output_path                   # Output file path
        ]
        subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


In [10]:
EXP_ROOT = "exp_data"
INPUT_ROOT = "input"
OUTPUT_ROOT = "output"
DATA_FILE = os.path.join(EXP_ROOT,"sub-441364_task-hearing_run-001.xdf")
# RAW_VIDEO = os.path.join(INPUT_ROOT,"2024-05-18_01-08.mp4")
TIME_FACTOR = 1000000 # conversion from microseconds to seconds
data, header = pyxdf.load_xdf(DATA_FILE)

In [13]:
marker_stream = data[0]
marker_stream

{'info': defaultdict(list,
             {'name': ['HearingMarkerStream'],
              'type': ['Markers'],
              'channel_count': ['1'],
              'channel_format': ['string'],
              'source_id': ['hearingid2023'],
              'nominal_srate': ['0.000000000000000'],
              'version': ['1.100000000000000'],
              'created_at': ['54276.30560512500'],
              'uid': ['c25f7fe7-d293-4734-9605-d594718041b9'],
              'session_id': ['default'],
              'hostname': ['Anarghyas-MacBook-Pro.local'],
              'v4address': [None],
              'v4data_port': ['16572'],
              'v4service_port': ['16572'],
              'v6address': [None],
              'v6data_port': ['0'],
              'v6service_port': ['0'],
              'desc': [None],
              'stream_id': 2,
              'effective_srate': 0}),
 'footer': {'info': defaultdict(list,
              {'first_timestamp': ['54295.246030833'],
               'last_timesta

In [None]:
segmented_video_frames = segment_modalities_optimized(marker_stream['time_stamps'], video_stream['time_stamps'], np.array(video_stream['time_series']).squeeze())
markers = marker_stream['time_series']
print(f"Length of video segments: {len(segmented_video_frames)}, Length of Markers: {len(markers)}")
print(markers)

In [None]:
markers

In [None]:
segmented_video_frames[6]

In [None]:
segmented_video_frames = segment_modalities_optimized(marker_stream['time_stamps'], video_stream['time_stamps'], video_stream['time_series'].squeeze())
markers = marker_stream['time_series']
print(f"Length of video segments: {len(segmented_video_frames)}, Length of Markers: {len(markers)}")
print(markers)
segment_arr = [(segmented_video_frames[i][0]/TIME_FACTOR, segmented_video_frames[i][-1]/TIME_FACTOR, markers[i][0]) for i in range(len(segmented_video_frames))]
print(segment_arr)

In [None]:
split_video(RAW_VIDEO, segment_arr, OUTPUT_ROOT)

In [None]:
for s in segment_arr:
    print(f"Start: {s[0]:.2f}, End: {s[1]:.2f}, Marker: {s[2]}, Duration: {s[1]-s[0]:.2f}")