In [None]:
import imageio
import numpy as np
import pandas as pd
import cv2

In [None]:
def merge_dark_segments(segments, max_gap) -> list:
    """merges multiple segments of black frames to one dark segment, if the gap is smaller than max_gap"""
    
    if not segments:
        return []

    merged = [segments[0]]

    for seg in segments[1:]:
        prev = merged[-1]

        gap = seg["start_frame"] - prev["end_frame"] - 1

        if gap <= max_gap:
            # merge segments
            prev["end_frame"] = seg["end_frame"]
            prev["end_time_ms"] = seg["end_time_ms"]
            prev["num_frames"] = (
                prev["end_frame"] - prev["start_frame"] + 1
            )
        else:
            merged.append(seg)

    return merged

In [None]:
def convert_ms_to_mmss(miliseconds: int) -> str:
    """Converts miliseconds to minute number format"""

    (seconds, miliseconds) = divmod(miliseconds, 1000)
    (minutes, seconds) = divmod(seconds, 60)
    return f"{minutes:02.0f}:{seconds:05.0f}"

In [None]:
def detect_blackout_frame(
    video_path,
    output_csv,
    min_dark_frames=5,
    merge_gap_frames = 5,
):
    """Detect segments of blackout frames between the news segments 
    (they can have different length)"""

    reader = imageio.get_reader(video_path, "ffmpeg")
    meta = reader.get_meta_data()
    fps = meta.get("fps", None)

    dark_events = []

    in_dark_segment = False
    segment_start = None

    frame_idx = 0

    for frame in reader:
        frame_idx += 1

        # crop borders to remove borders from digitalization
        h, w = frame.shape[:2]
        frame = frame[h//10:9*h//10, w//10:9*w//10]

        # use luma instead of mean for black and white images
        luma = (
            0.2126 * frame[..., 0] +
            0.7152 * frame[..., 1] +
            0.0722 * frame[..., 2]
        )
        luma = cv2.GaussianBlur(luma, (5, 5), 0)

        low = np.percentile(luma, 5)
        high = np.percentile(luma, 95)
        is_dark = (high - low) < 5.0

        if is_dark and not in_dark_segment:
            in_dark_segment = True
            segment_start = frame_idx

        elif not is_dark and in_dark_segment:
            segment_end = frame_idx - 1

            if segment_end - segment_start + 1 >= min_dark_frames:
                dark_events.append({
                    "start_frame": segment_start,
                    "end_frame": segment_end,
                    "start_time_ms": segment_start / fps * 1000 if fps else None,
                    "end_time_ms": segment_end / fps * 1000 if fps else None,
                    "num_frames": segment_end - segment_start + 1
                })

            in_dark_segment = False
            segment_start = None

    # Handle case where video ends during darkness
    if in_dark_segment:
        segment_end = frame_idx
        dark_events.append({
            "start_frame": segment_start,
            "end_frame": segment_end,
            "start_time_ms": segment_start / fps * 1000 if fps else None,
            "end_time_ms": segment_end / fps * 1000 if fps else None,
            "num_frames": segment_end - segment_start + 1
        })

    if dark_events:
        # merge dark events that are very close together
        dark_events = merge_dark_segments(dark_events, merge_gap_frames)
        for event in dark_events:
            event["start_time"] = convert_ms_to_mmss(event.pop("start_time_ms"))
            event["end_time"]   = convert_ms_to_mmss(event.pop("end_time_ms"))

        df = pd.DataFrame(dark_events)
        df.to_csv(output_csv, index=False)
        print(f"Detected {len(df)} dark segments.")
    else:
        print("No dark frames detected.")