In [1]:
from scenedetect import detect, AdaptiveDetector, ContentDetector, ThresholdDetector
import os
import cv2
import numpy as np
import glob
import json

In [2]:
def is_solid_color(frame, threshold=0.98, color_variance_threshold=100):
    """
    Detects if a frame is mostly a solid color (includes black/white/any uniform color).

    Args:
        frame: Input frame (BGR format)
        threshold: Percentage of the frame that must be similar color (0.0-1.0)
        color_variance_threshold: Maximum variance in each channel to consider colors similar

    Returns:
        bool: True if the frame is mostly solid color, False otherwise
    """
    # Check if the frame is empty or invalid
    if frame is None or frame.size == 0:
        return True

    # Convert to all three channels if grayscale
    if len(frame.shape) == 2:
        frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)

    # Split the image into channels
    channels = cv2.split(frame)
    h, w = frame.shape[:2]
    total_pixels = h * w

    # Check each channel for variance
    is_uniform = True
    for channel in channels:
        # Calculate variance for the channel
        channel_variance = np.var(channel)

        # If variance is high, the image is not uniform in this channel
        if channel_variance > color_variance_threshold:
            is_uniform = False
            break

    # If initial variance check suggests uniformity, do pixel-wise analysis
    if is_uniform:
        # Get dominant color (center color as approximation for speed)
        center_y, center_x = h // 2, w // 2
        dominant_color = frame[center_y, center_x].copy()

        # Define acceptable range around dominant color
        lower_bound = dominant_color - np.array([20, 20, 20])
        upper_bound = dominant_color + np.array([20, 20, 20])

        # Create mask of pixels that match the dominant color range
        mask = cv2.inRange(frame, lower_bound, upper_bound)

        # Calculate percentage of frame that matches dominant color
        matching_pixels = cv2.countNonZero(mask)
        percentage = matching_pixels / total_pixels

        return percentage >= threshold

    return False

def is_blurry(frame, threshold=150, roi_crop=None):
    """
    Detects if a frame is blurry using Laplacian variance.

    Args:
        frame: Input frame (BGR format)
        threshold: Blur threshold - lower values mean more sensitive to blur
                   Typical values: 100-150 for 720p/1080p images
        roi_crop: Optional tuple (top_percent, bottom_percent, left_percent, right_percent)
                  to crop the frame and analyze only the central region

    Returns:
        bool: True if the frame is blurry, False otherwise
    """
    # Check if the frame is empty or invalid
    if frame is None or frame.size == 0:
        return True

    # Make a copy to avoid modifying the original
    working_frame = frame.copy()

    # Apply optional ROI cropping to focus on the central part of the image
    if roi_crop is not None:
        top, bottom, left, right = roi_crop
        h, w = working_frame.shape[:2]

        # Calculate crop coordinates
        top_px = int(h * top / 100)
        bottom_px = int(h * (100 - bottom) / 100)
        left_px = int(w * left / 100)
        right_px = int(w * (100 - right) / 100)

        # Ensure valid crop region
        if bottom_px > top_px and right_px > left_px:
            working_frame = working_frame[top_px:bottom_px, left_px:right_px]

    # Convert to grayscale
    gray = cv2.cvtColor(working_frame, cv2.COLOR_BGR2GRAY)

    # Skip very dark or very bright frames as they can give false positives
    brightness = np.mean(gray)
    if brightness < 20 or brightness > 235:
        return False  # Exclude very dark/bright frames from blur detection

    # Calculate the Laplacian
    laplacian = cv2.Laplacian(gray, cv2.CV_64F)

    # Calculate the variance (a measure of "sharpness")
    score = laplacian.var()

    # Normalize the score based on image size for better threshold consistency
    # The adjustment helps make the threshold more stable across different resolutions
    h, w = gray.shape
    normalized_score = score * (1920 * 1080) / (h * w)

    return normalized_score < threshold

def is_overexposed(frame, highlight_threshold=230, overexposed_percentage=0.5):
    """
    Detects if a frame is overexposed (too many bright/blown-out highlights).

    Args:
        frame: Input frame (BGR format)
        highlight_threshold: Pixel value threshold for considering a pixel "blown out" (0-255)
        overexposed_percentage: What percentage of the frame needs to be overexposed

    Returns:
        bool: True if the frame is overexposed, False otherwise
    """
    # Convert to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Count pixels that are very bright (blown out highlights)
    num_highlight_pixels = np.sum(gray > highlight_threshold)

    # Calculate the percentage of the frame that is blown out
    total_pixels = gray.size
    highlight_percentage = num_highlight_pixels / total_pixels

    # Check if the percentage exceeds the threshold
    return highlight_percentage > overexposed_percentage

In [3]:
def get_video_information(video_path):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return video_name, fps

In [4]:
def save_frames(video_name, output_dir, frame, frame_count):
    os.makedirs(output_dir, exist_ok=True)
    video_dir = os.path.join(output_dir, video_name)
    os.makedirs(video_dir, exist_ok=True)
    frame_path = os.path.join(video_dir, f"{frame_count}.jpg")
    cv2.imwrite(frame_path, frame)
    return frame_path

In [5]:
def extract_middle_frames(video_path, output_dir, verbose=True):

    video_name, fps = get_video_information(video_path)
    min_num_scenes = 16

    threshold = 15.0
    min_threshold = 1.0
    detector = ContentDetector(threshold=threshold)
    scene_list = detect(video_path, detector)
    print(f"Detected {len(scene_list)} scenes in {video_path}") if verbose else None

    if len(scene_list) < min_num_scenes:
        threshold = threshold * len(scene_list) / min_num_scenes
        threshold = max(threshold, min_threshold)
        print(f"Decreasing content threshold to {threshold}") if verbose else None
        detector = ContentDetector(threshold=threshold)
        scene_list = detect(video_path, detector)
        print(f"Detected {len(scene_list)} scenes in {video_path}") if verbose else None

    cap = cv2.VideoCapture(video_path)
    frame_paths = []

    # Extract middle frame from each scene
    for scene in scene_list:
        start_frame, end_frame = scene[0].frame_num, scene[1].frame_num
        scene_length = end_frame - start_frame

        # Calculate middle frame position
        middle_frame = start_frame + scene_length // 2
        cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
        ret, frame = cap.read()

        if not ret:
            continue

        if is_solid_color(frame) or is_blurry(frame) or is_overexposed(frame):
            continue

        frame_path = save_frames(video_name, output_dir, frame, len(frame_paths)+1)
        frame_paths.append(frame_path)

    cap.release()
    return frame_paths

In [6]:
def get_all_files():
    video_dir = 'E:/queue'
    files = glob.glob(os.path.join(video_dir, '*.mp4'))
    files.sort()
    return files

def update_progress(prog):
    with open('frame-extraction-progress.json', 'w') as file:
        json.dump(prog, file)

def get_progress():
    if not os.path.exists('frame-extraction-progress.json'):
        return []
    with open('frame-extraction-progress.json', 'r') as file:
        prog = json.load(file)
    return prog

def get_item_ids():
    with open('../../data/item_ids.json', 'r') as file:
        item_ids = json.load(file)
    return item_ids

In [7]:
all_files = get_all_files()
progress = get_progress()
item_ids = get_item_ids()
remain_files = []
for file in all_files:
    not_processed = file not in progress
    in_metadata = int(os.path.splitext(os.path.basename(file))[0]) in item_ids
    if not_processed and in_metadata:
        remain_files.append(file)
    else:
        print(f"Skipping {file}")

Skipping E:/queue\1061880.mp4
Skipping E:/queue\1061900.mp4
Skipping E:/queue\1061910.mp4
Skipping E:/queue\1061920.mp4
Skipping E:/queue\1061930.mp4
Skipping E:/queue\1061970.mp4
Skipping E:/queue\1062000.mp4
Skipping E:/queue\1062020.mp4
Skipping E:/queue\1062040.mp4
Skipping E:/queue\1062050.mp4
Skipping E:/queue\1062060.mp4
Skipping E:/queue\1062070.mp4
Skipping E:/queue\1062080.mp4
Skipping E:/queue\1062090.mp4
Skipping E:/queue\1062100.mp4
Skipping E:/queue\1062110.mp4
Skipping E:/queue\1062120.mp4
Skipping E:/queue\1062130.mp4
Skipping E:/queue\1062140.mp4
Skipping E:/queue\1062160.mp4
Skipping E:/queue\1062200.mp4
Skipping E:/queue\1062210.mp4
Skipping E:/queue\1062220.mp4
Skipping E:/queue\1062260.mp4
Skipping E:/queue\1062340.mp4
Skipping E:/queue\1062390.mp4
Skipping E:/queue\1062470.mp4
Skipping E:/queue\1062480.mp4
Skipping E:/queue\1062530.mp4
Skipping E:/queue\1062540.mp4
Skipping E:/queue\1062550.mp4
Skipping E:/queue\1062640.mp4
Skipping E:/queue\1062660.mp4
Skipping E

In [None]:
for i, file in enumerate(remain_files):
    print(f"Processing {i+1}/{len(remain_files)}: {file}")
    frames = extract_middle_frames(file, 'D:/frames')
    progress.append(file)
    update_progress(progress)

Processing 1/1782: E:/queue\1071030.mp4
