In [1]:
import cv2
import numpy as np

In [2]:
def crop_and_stack_videos(
    video1_path, video2_path, output_path,
    crop1_top, crop1_bottom, crop2_top, crop2_bottom,
    stack_mode='vertical',  # 'vertical' or 'horizontal'
    repeat_frame_index=0, repeat_count=5
):
    cap1 = cv2.VideoCapture(video1_path)
    cap2 = cv2.VideoCapture(video2_path)

    fps = int(cap1.get(cv2.CAP_PROP_FPS))
    frame_count = int(min(cap1.get(cv2.CAP_PROP_FRAME_COUNT),
                          cap2.get(cv2.CAP_PROP_FRAME_COUNT)))

    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()
    if not ret1 or not ret2:
        raise ValueError("Could not read frames from one or both videos.")

    # Sample cropped frames for size determination
    frame1_crop = frame1[crop1_top:frame1.shape[0]-crop1_bottom, :, :]
    frame2_crop = frame2[crop2_top:frame2.shape[0]-crop2_bottom, :, :]

    # Resize to match dimensions for stacking
    if stack_mode == 'vertical' and frame1_crop.shape[1] != frame2_crop.shape[1]:
        width = min(frame1_crop.shape[1], frame2_crop.shape[1])
        frame1_crop = cv2.resize(frame1_crop, (width, frame1_crop.shape[0]))
        frame2_crop = cv2.resize(frame2_crop, (width, frame2_crop.shape[0]))
    elif stack_mode == 'horizontal' and frame1_crop.shape[0] != frame2_crop.shape[0]:
        height = min(frame1_crop.shape[0], frame2_crop.shape[0])
        frame1_crop = cv2.resize(frame1_crop, (frame1_crop.shape[1], height))
        frame2_crop = cv2.resize(frame2_crop, (frame2_crop.shape[1], height))

    # Determine output video size
    if stack_mode == 'vertical':
        stacked_shape = (frame1_crop.shape[1], frame1_crop.shape[0] + frame2_crop.shape[0])
    else:
        stacked_shape = (frame1_crop.shape[1] + frame2_crop.shape[1], frame1_crop.shape[0])

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, stacked_shape)

    cap1.set(cv2.CAP_PROP_POS_FRAMES, 0)
    cap2.set(cv2.CAP_PROP_POS_FRAMES, 0)

    for i in range(frame_count):
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()
        if not ret1 or not ret2:
            break

        frame1_crop = frame1[crop1_top:frame1.shape[0]-crop1_bottom, :, :]
        frame2_crop = frame2[crop2_top:frame2.shape[0]-crop2_bottom, :, :]

        if stack_mode == 'vertical':
            target_width = min(frame1_crop.shape[1], frame2_crop.shape[1])
            frame1_crop = cv2.resize(frame1_crop, (target_width, frame1_crop.shape[0]))
            frame2_crop = cv2.resize(frame2_crop, (target_width, frame2_crop.shape[0]))
            stacked = np.vstack((frame1_crop, frame2_crop))
        else:
            target_height = min(frame1_crop.shape[0], frame2_crop.shape[0])
            frame1_crop = cv2.resize(frame1_crop, (frame1_crop.shape[1], target_height))
            frame2_crop = cv2.resize(frame2_crop, (frame2_crop.shape[1], target_height))
            stacked = np.hstack((frame1_crop, frame2_crop))

        out.write(stacked)

        # Immediately insert repeated frame(s) after the specified index
        if i == repeat_frame_index:
            for _ in range(repeat_count):
                out.write(stacked)

    cap1.release()
    cap2.release()
    out.release()
    print(f"✅ Output saved to: {output_path}")


In [3]:
def crop_and_stack_videos(
    video1_path, video2_path, output_path,
    crop1_top, crop1_bottom, crop2_top, crop2_bottom,
    stack_mode='vertical',  # 'vertical' or 'horizontal'
    repeat_frame_index=0, repeat_count=5,
    output_fps=None,
    text1='', text2=''
):
    cap1 = cv2.VideoCapture(video1_path)
    cap2 = cv2.VideoCapture(video2_path)

    input_fps1 = cap1.get(cv2.CAP_PROP_FPS)
    input_fps2 = cap2.get(cv2.CAP_PROP_FPS)

    if abs(input_fps1 - input_fps2) > 0.1:
        print(f"⚠️ Warning: Input video FPS mismatch: {input_fps1:.2f} vs {input_fps2:.2f}")

    fps = output_fps if output_fps else int(min(input_fps1, input_fps2))
    frame_count = int(min(cap1.get(cv2.CAP_PROP_FRAME_COUNT),
                          cap2.get(cv2.CAP_PROP_FRAME_COUNT)))

    # Read one frame to determine size
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()
    if not ret1 or not ret2:
        raise ValueError("Could not read frames from one or both videos.")

    # Crop and resize
    frame1_crop = frame1[crop1_top:frame1.shape[0]-crop1_bottom, :, :]
    frame2_crop = frame2[crop2_top:frame2.shape[0]-crop2_bottom, :, :]

    if stack_mode == 'vertical':
        width = min(frame1_crop.shape[1], frame2_crop.shape[1])
        frame1_crop = cv2.resize(frame1_crop, (width, frame1_crop.shape[0]))
        frame2_crop = cv2.resize(frame2_crop, (width, frame2_crop.shape[0]))
        stacked_shape = (width, frame1_crop.shape[0] + frame2_crop.shape[0])
    else:
        height = min(frame1_crop.shape[0], frame2_crop.shape[0])
        frame1_crop = cv2.resize(frame1_crop, (frame1_crop.shape[1], height))
        frame2_crop = cv2.resize(frame2_crop, (frame2_crop.shape[1], height))
        stacked_shape = (frame1_crop.shape[1] + frame2_crop.shape[1], height)

    # Output writer
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, stacked_shape)

    cap1.set(cv2.CAP_PROP_POS_FRAMES, 0)
    cap2.set(cv2.CAP_PROP_POS_FRAMES, 0)

    def add_text(img, text, font_scale=1.5, thickness=5):
        if text == '':
            return img
        font = cv2.FONT_HERSHEY_SIMPLEX
        (text_w, text_h), _ = cv2.getTextSize(text, font, font_scale, thickness)
        x = img.shape[1] - text_w - 10
        y = text_h + 10
        cv2.putText(img, text, (x, y), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
        return img

    for i in range(frame_count):
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()
        if not ret1 or not ret2:
            break

        frame1_crop = frame1[crop1_top:frame1.shape[0]-crop1_bottom, :, :]
        frame2_crop = frame2[crop2_top:frame2.shape[0]-crop2_bottom, :, :]

        # Resize
        if stack_mode == 'vertical':
            target_width = min(frame1_crop.shape[1], frame2_crop.shape[1])
            frame1_crop = cv2.resize(frame1_crop, (target_width, frame1_crop.shape[0]))
            frame2_crop = cv2.resize(frame2_crop, (target_width, frame2_crop.shape[0]))
        else:
            target_height = min(frame1_crop.shape[0], frame2_crop.shape[0])
            frame1_crop = cv2.resize(frame1_crop, (frame1_crop.shape[1], target_height))
            frame2_crop = cv2.resize(frame2_crop, (frame2_crop.shape[1], target_height))

        # Add text
        frame1_crop = add_text(frame1_crop, text1)
        frame2_crop = add_text(frame2_crop, text2)

        # Stack
        stacked = np.vstack((frame1_crop, frame2_crop)) if stack_mode == 'vertical' else np.hstack((frame1_crop, frame2_crop))

        out.write(stacked)

        if i == repeat_frame_index:
            for _ in range(repeat_count):
                out.write(stacked)

    cap1.release()
    cap2.release()
    out.release()
    print(f"✅ Output saved to: {output_path} at {fps} FPS")

In [23]:
crop_and_stack_videos(
    video1_path=r"C:\Users\dgaytanj\Documents\automate_boring_stuff\2d_pose_estimation\data\good_sprint_start_v2.mp4",
    video2_path=r"C:\Users\dgaytanj\Documents\automate_boring_stuff\2d_pose_estimation\data\poor_sprint_start_v2.mp4",
    output_path='stacked_output.mp4',
    crop1_top=250, crop1_bottom=100,
    crop2_top=250, crop2_bottom=100,
    stack_mode='vertical',      # or 'horizontal'
    repeat_frame_index=228,      # repeat frame 40
    repeat_count=20,            # repeat it 15 times
    # text1='10yd Split - 1.53s',
    # text2='10yd Split - 1.59s'
)

✅ Output saved to: stacked_output.mp4 at 29 FPS
