# Comparing Shot Boundary Detection with SigLip2 Embeddings

<a target="_blank" href="https://colab.research.google.com/github/everettVT/daft-video-embeddings/blob/main/workload/sbd_image_embeddings_siglip.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
!pip install -q "daft[huggingface]"

In [None]:

T, H, W, C = 16, 288, 288, 3
ROW_LIMIT = 2048
HISTOGRAM_BINS = 32

PATHS = [
    "https://www.youtube.com/watch?v=WAsmZJ2kff0", # GPU Pipeline Optimization Explained
    "https://www.youtube.com/watch?v=BLcKDQRTFKY", # Wrangle PDFs with Custom UDFs
    "https://www.youtube.com/watch?v=Qnw6059ddgE", # Data and AI Processing at Scale
    "https://www.youtube.com/watch?v=eYXDSuNpKTk", # Life after Apache Spark
    "https://www.youtube.com/watch?v=3JWrg1DitaA", # Scaling Data Processing and ML Training with Daft + Ray
]

In [2]:
import daft
from daft.functions import embed_image
from daft import DataType as dt

import numpy as np

In [None]:
df_frames = daft.read_video_frames(
    PATHS,
    image_height=H,
    image_width=W,
).limit(ROW_LIMIT).collect() # Materialize a few frames so we don't re-read from YT
df_frames.show(3)

### Calculate Histogram

For benchmarking our results against Chi-Squared distance

In [None]:
@daft.func()
def histogram(data: daft.Image, bins: int, range: tuple[float, float]) -> dt.tensor(dt.int64(), shape=(3, HISTOGRAM_BINS)):
    flat = np.asarray(data).reshape(-1, 3)
    hist = np.zeros((3, bins), dtype=np.int64)
    for i in range(3):
        h, _ = np.histogram(flat[:, i], bins=bins, range=range)
        hist[i] = h.astype(np.int64, copy=False)
    return hist


df_hist = df_frames.with_column("histogram", histogram(df_frames["data"], bins=HISTOGRAM_BINS))

### Generate SigLip2 Embeddings

In [None]:
df_emb = df_frames.with_column("emb_siglip2_base_patch_16_512", embed_image(df_frames["data"], model_name="google/siglip2-base-patch16-512", provider="transformers"))

In [None]:
df_clips = (
    df_emb
    .with_column("clip_index", df_emb["frame_index"] // T)
    .sort("frame_index")
    .groupby("path", "clip_index")
    .agg_list("frame_index", "histogram", "emb_siglip2_base_patch_16_512") #  Omit image data
)
df_clips.show(3)


In [None]:
@daft.func()
def detect_sbd_traditional(
    hist_list: list[np.ndarray], 
    index_list: list[int], 
    threshold: float = 0.3, 
    num_frames: int = 6
    ) -> list[int]:
    


In [None]:
df_sbd