# Shot Boundary Detection with Window Functions and SigLip2

In [None]:
!pip install -q "daft[huggingface]" transformers numpy

In [None]:
# General Parameters
MODEL_ID = "google/siglip2-base-patch16-512"
B, T, H, W, C = 2, 16, 288, 288, 3 # Batch Size, Clip Size (# frames), Height, Width, RGB
ROW_LIMIT = 50000

PATHS = [
    "https://www.youtube.com/watch?v=eYXDSuNpKTk", # Life after Apache Spark
]


MIN_SHOT_DURATION = 1.0 # seconds


model_col = f"img_emb_{MODEL_ID}"

In [None]:
import daft
from daft.functions import embed_image
from daft import col, lit, Window, DataType as dt

import numpy as np

In [None]:
df_frames = daft.read_video_frames(
    PATHS,
    image_height=H,
    image_width=W,
).limit(ROW_LIMIT).collect() # Materialize a few frames so we don't re-read from YT
df_frames.show(3)

### Generate SigLip2 Embeddings

In [None]:
df_emb = df_frames.with_column(
    model_col,
    embed_image(
        df_frames["data"],
        model_name=MODEL_ID,
        provider="transformers",
    )
)

In [None]:
df_emb = df_emb.collect()

### Cosine Similarity between Frames using Window Functions

In [None]:
w = Window().partition_by("path").order_by("frame_time")
w_cut = w.range_between(-0.3, Window.current_row)
w_dissolve = w.range_between(-1.0, Window.current_row)

df_shots = (
    df_emb
    .with_column("cos_dist", col(model_col).embedding.cosine_distance(col(model_col).lag(1).over(w)))
    .with_column("cos_dist_cut", col("cos_dist").mean().over(w_cut))
    .with_column("cos_dist_dissolve", col("cos_dist").mean().over(w_dissolve))
)


In [None]:
df_shots.collect()

In [None]:
import matplotlib.pyplot as plt

# Select the relevant columns and convert to pandas DataFrame for plotting
df_plot_multiple = df_shots.select("frame_time", "cos_dist", "cos_dist_cut", "cos_dist_dissolve").to_pandas()

# Plot the data
plt.figure(figsize=(12, 6))
plt.plot(df_plot_multiple["frame_time"], df_plot_multiple["cos_dist"], label="Cosine Distance", alpha=0.7)
plt.plot(df_plot_multiple["frame_time"], df_plot_multiple["cos_dist_cut"], label="Cosine Distance (Cut Window)", linewidth=2)
plt.plot(df_plot_multiple["frame_time"], df_plot_multiple["cos_dist_dissolve"], label="Cosine Distance (Dissolve Window)", linewidth=2)
plt.xlabel("Frame Time (seconds)")
plt.ylabel("Cosine Distance")
plt.title("Cosine Distance and Smoothed Cosine Distances over Time")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
THRESHOLD = 0.1
df_sbd = df_shots.with_column("is_cut_boundary", (col("cos_dist") >= THRESHOLD))

In [None]:
df_sbd.where(df_sbd["is_cut_boundary"]).select("data").show()