In [1]:
from datetime import datetime
import cv2
import numpy as np
import os
import pytesseract
import re

In [2]:
# Load timestamps from files
def load_timestamps(file_path):
    with open(file_path, "r") as f:
        timestamps = [float(line.strip()) for line in f.readlines()]
    return timestamps


def find_closest_frame(main_timestamp, secondary_timestamps):
    # Convert secondary timestamps to a numpy array and calculate the absolute differences
    differences = np.abs(np.array(secondary_timestamps) - main_timestamp)
    # Find the index of the minimum difference
    closest_index = np.argmin(differences)
    # Find the corresponding timestamp
    closest_timestamp = secondary_timestamps[closest_index]
    # Return both the index and the closest timestamp
    return closest_timestamp, closest_index


# Show two frames side by side
def show_frames(frame1, frame2):
    combined_frame = np.hstack((frame1, frame2))
    cv2.imshow("Video Sync Viewer", combined_frame)

In [3]:
def extract_timestamp(frame, x, y, w, h):
    CONFIG = r"--psm 6 -c tessedit_char_whitelist=0123456789:"
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    roi = thresh[y : y + h, x : x + w]

    text = pytesseract.image_to_string(roi, config=CONFIG)
    # keep only digits and colons
    text = re.sub(r"[^0-9:\-\s]", "", text).strip()

    # parse the timestamp text to a datetime object
    try:
        timestamp = datetime.strptime(text, "%M:%S:%f")
        return timestamp
    except ValueError:
        print(f"Invalid timestamp: {text}")
        return None

In [4]:
# Keyboard controls for navigation
def main_loop(
    main_video,
    sub_video,
    main_timestamps,
    sub_timestamps,
    ocr_out,
    rtcp_out,
    start_frame_idx=0,
):
    main_frame_idx = start_frame_idx
    time_deltas = []

    while main_video.isOpened():
        # Set the video frame index based on the current position
        main_video.set(cv2.CAP_PROP_POS_FRAMES, main_frame_idx)

        # Read the main frame
        ret_main, main_frame = main_video.read()
        if not ret_main:
            break

        # Get the timestamp of the current main frame
        main_timestamp = main_timestamps[main_frame_idx]

        # Find the corresponding frame in the secondary video
        sub_timestamp, sub_timestamp_idx = find_closest_frame(
            main_timestamp, sub_timestamps
        )
        sub_video.set(cv2.CAP_PROP_POS_FRAMES, sub_timestamp_idx)
        ret_sub, sub_frame = sub_video.read()
        if not ret_sub:
            break

        # Display the frames side by side
        # show_frames(main_frame, secondary_frame)

        # Calculate the time delta
        time_delta = main_timestamp - sub_timestamp
        print(
            f"Main frame timestamp: {main_timestamp}, Secondary frame timestamp: {sub_timestamp}"
        )
        print(f"RTCP time delta: {time_delta}")
        # write index, main frame timestamp, secondary frame timestamp, time delta
        rtcp_out.write(
            f"{main_frame_idx}: {main_timestamp}: {sub_timestamp}: {time_delta}\n"
        )

        # Extract the timestamp from the main frame
        main_ocr_timestamp = extract_timestamp(main_frame, 1910, 1080, 1020, 250)

        # Extract the timestamp from the secondary frame
        secondary_ocr_timestamp = extract_timestamp(sub_frame, 910, 1080, 1100, 250)

        # Print the timestamps
        print("Main frame timestamp:", main_ocr_timestamp)
        print("Secondary frame timestamp:", secondary_ocr_timestamp)

        if main_ocr_timestamp and secondary_ocr_timestamp:
            time_delta = (
                main_ocr_timestamp.timestamp() - secondary_ocr_timestamp.timestamp()
            )
            print("OCR Time delta:", time_delta)
            time_deltas.append(time_delta)

            # write the frame index, main frame timestamp, secondary frame timestamp, time delta
            ocr_out.write(
                f"{main_frame_idx}: {main_ocr_timestamp}: {secondary_ocr_timestamp}: {time_delta}\n"
            )

        main_frame_idx += 1

    main_video.release()
    sub_video.release()
    cv2.destroyAllWindows()

    return time_deltas

In [None]:
# Load the videos
main_video_path = "./build/macosx/arm64/debug/e101.mp4"
secondary_video_path = "./build/macosx/arm64/debug/e201.mp4"

main_video = cv2.VideoCapture(main_video_path)
sub_video = cv2.VideoCapture(secondary_video_path)

# Load the timestamp files
main_timestamps_path = os.path.splitext(main_video_path)[0] + ".txt"
secondary_timestamps_path = os.path.splitext(secondary_video_path)[0] + ".txt"

main_timestamps = load_timestamps(main_timestamps_path)
sub_timestamps = load_timestamps(secondary_timestamps_path)

# Start
with open("time_deltas_ocr.txt", "a") as ocr_file:
    with open("time_deltas_rtcp.txt", "a") as rtcp_file:
        main_loop(
            main_video,
            sub_video,
            main_timestamps,
            sub_timestamps,
            ocr_file,
            rtcp_file,
            0,
        )