# Scene similarity experiments

In [None]:
import os
import sys
from pathlib import Path

import cv2
import numpy as np
from cv2 import VideoCapture
from scenedetect import ContentDetector, SceneManager, open_video

sys.path.append(os.path.join(os.getcwd(), ".."))
from scene_detector.fingerprint import fingerprint_distance

video_path = Path(os.getcwd()).parent.parent.parent / "movies" / "pizza-conversation.mp4"
cap = VideoCapture(video_path)

scene_manager = SceneManager()
scene_manager.add_detector(ContentDetector(threshold=30.0, min_scene_len=2))

# Perform scene detection
scene_manager.detect_scenes(open_video(str(video_path)))

# Get list of scene boundaries (list of (start_time, end_time))
scene_list = scene_manager.get_scene_list()

In [None]:
cap = cv2.VideoCapture(str(video_path))

scene_keypoints = []
for start, end in scene_list:
    start_sec = start.get_seconds()
    end_sec = end.get_seconds()

    # Seek to midpoint frame of the scene to extract keyframe
    mid_sec = (start_sec + end_sec) / 2
    cap.set(cv2.CAP_PROP_POS_MSEC, mid_sec * 1000)
    success, frame = cap.read()
    scene_keypoints.append(frame)

In [None]:
import matplotlib.pyplot as plt

scene_count = len(scene_list)

scenes_per_row = 4
rows = scene_count // scenes_per_row
cols = scenes_per_row

fig, axes = plt.subplots(rows, cols, figsize=(10, 2 * rows))
for i, scene_keypoint in enumerate(scene_keypoints):
    axes[i // cols, i % cols].imshow(cv2.cvtColor(scene_keypoint, cv2.COLOR_BGR2RGB))
    axes[i // cols, i % cols].axis("off")
plt.tight_layout()
plt.show()

In [None]:
def calculate_image_hash(image_array, hash_func):
    """Calculate hash for a single image"""
    try:
        return hash_func(Image.fromarray(image_array))
    except Exception as e:
        print(f"Error processing: {e}")
        return None


def compare_two_images(img_path1, img_path2, hash_func):
    """Compare two specific images"""
    hash1 = calculate_image_hash(img_path1, hash_func)
    hash2 = calculate_image_hash(img_path2, hash_func)

    if hash1 and hash2:
        distance = hash1 - hash2
        return distance
    return None

In [None]:
import imagehash
from PIL import Image

hash_algorithms = {
    "average_hash": imagehash.average_hash,
    # 'perceptual_hash': imagehash.phash,
    # 'difference_hash': imagehash.dhash,
    "wavelet_hash": imagehash.whash,
}


for start, end in scene_list:
    start_sec = start.get_seconds()
    end_sec = end.get_seconds()

    frames_seconds = np.linspace(start_sec, end_sec, 9)
    frames_in_scene = []
    mid_sec = (start_sec + end_sec) / 2
    for frame_sec in frames_seconds:
        cap.set(cv2.CAP_PROP_POS_MSEC, frame_sec * 1000)
        success, frame = cap.read()
        if not success:
            break
        frames_in_scene.append(frame)

    cap.set(cv2.CAP_PROP_POS_MSEC, mid_sec * 1000)
    success, reference_frame = cap.read()

    fig, axes = plt.subplots(len(hash_algorithms), len(frames_in_scene), figsize=(20, 10), tight_layout=True)

    for row, (hash_name, hash_func) in enumerate(hash_algorithms.items()):

        def f(frame):
            return str(hash_func(Image.fromarray(frame), hash_size=32))

        reference_fingerprint = f(reference_frame)

        for i, (second, frame) in enumerate(zip(frames_seconds, frames_in_scene, strict=False)):
            fingerprint = f(frame)
            distance = fingerprint_distance(reference_fingerprint, fingerprint)

            axes[row][i].imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            axes[row][i].axis("off")
            reference_point = abs(mid_sec - second) < 0.1
            axes[row][i].set_title(f"{second:.2f}s" + ("*" if reference_point else "") + f" {hash_name}\n{distance}")

    plt.tight_layout()
    plt.show()
    fig.tight_layout()