### Installing YOLO package

In [None]:
!pip install -q ultralytics

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m69.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m87.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

### Run Inference and Collect Highlights

In [None]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T
from tqdm import tqdm
from collections import deque
from ultralytics import YOLO

# --- Configuration ---
VIDEO_PATH     = "nuggets.mp4"
YOLO_PATH      = "best_patrick_cp_1000.pt"
MODEL_WEIGHTS  = "action_classifier_f.pth"
OUTPUT_PATH    = "nuggets_highlight.mp4"

CLIP_LENGTH    = 15
CONF_THRESHOLD = 0.4
SMOOTH_WINDOW  = 6
RESIZE_DIMS    = (128, 128)
TARGET_FPS     = 30
PADDING        = 20
MARGIN_SECS    = 1
MIN_DURATION   = 0.6  # seconds

# --- Setup ---
cap = cv2.VideoCapture(VIDEO_PATH)
FPS = round(cap.get(cv2.CAP_PROP_FPS))
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
TOTAL_FRAMES = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load YOLO model
yolo = YOLO(YOLO_PATH)

# Load Action Recognizer
actions = ["none", "shoot", "layup", "dunk"]
transform = T.Compose([
    T.ToPILImage(),
    T.Resize(RESIZE_DIMS),
    T.ToTensor()
])

model = torchvision.models.video.r3d_18()
model.fc = nn.Linear(model.fc.in_features, len(actions))
model.load_state_dict(torch.load(MODEL_WEIGHTS, map_location=device))
model.to(device).eval()

# Buffers
raw_buffer = deque(maxlen=CLIP_LENGTH)
detect_buffer = deque(maxlen=CLIP_LENGTH)
score_buffer = deque(maxlen=SMOOTH_WINDOW)
timestamps = []

# --- Detection Helper ---
def detect_bounding_box(frame):
    result = yolo.predict(frame, imgsz=1280, conf=0.43, max_det=1, verbose=False)[0]
    if not len(result.boxes):
        return None

    x1, y1, x2, y2 = result.boxes.xyxy[0].cpu().numpy().astype(int)
    h, w = frame.shape[:2]

    return (
        max(0, x1 - PADDING),
        max(0, y1 - PADDING),
        min(w, x2 + PADDING),
        min(h, y2 + PADDING)
    )

def classify_action(buffer):
    batch = torch.stack(list(buffer)[::2], dim=1).unsqueeze(0).to(device)
    logits = model(batch)
    probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
    return probs

# --- Main Loop ---
with torch.no_grad():
    for frame_id in tqdm(range(0, TOTAL_FRAMES, FPS // TARGET_FPS)):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        ret, frame = cap.read()
        if not ret:
            break

        box = detect_bounding_box(frame)
        detect_buffer.append(box is not None)

        if box is not None:
            x1, y1, x2, y2 = box
            cropped = transform(frame[y1:y2, x1:x2])
            raw_buffer.append(cropped)
        elif raw_buffer:
            raw_buffer.append(raw_buffer[-1].clone())

        if len(raw_buffer) == CLIP_LENGTH and any(detect_buffer):
            probs = classify_action(raw_buffer)
            score_buffer.append(1 - probs[0])  # confidence of any action
            if len(score_buffer) == SMOOTH_WINDOW and np.mean(score_buffer) > CONF_THRESHOLD:
                timestamps.append(frame_id)

        elif not any(detect_buffer):
            raw_buffer.clear()
            score_buffer.clear()

cap.release()

100%|█████████▉| 17658/17659 [16:21<00:00, 17.99it/s]


### Group and Merge Highlight Segments

In [None]:
# --- Merge Clips ---
duration = TOTAL_FRAMES / FPS if FPS > 0 else 0
timestamps_sec = [f / FPS for f in timestamps]
segments = []

if timestamps_sec:
    l = r = timestamps_sec[0]
    for s in timestamps_sec[1:]:
        if s - r < 2 * MARGIN_SECS:
            r = s
        else:
            if r - l > MIN_DURATION:
                start = max(0, l - MARGIN_SECS)
                end = min(r + MARGIN_SECS, duration)
                segments.append((start, end))
            l = r = s
    if r - l > MIN_DURATION:
        start = max(0, l - MARGIN_SECS)
        end = min(r + MARGIN_SECS, duration)
        segments.append((start, end))

In [None]:
segments

[(97.16666666666667, 99.8),
 (469.2, 472.23333333333335),
 (474.03333333333336, 478.06666666666666)]

### Export Selected Highlight Clips

In [None]:
from moviepy import VideoFileClip, concatenate_videoclips

OUTPUT_PATH = "nuggets_highlight.mp4"

# --- Clip and Merge ---
clips = []
for start, end in segments:
    clip = VideoFileClip(VIDEO_PATH).subclipped(start, end)
    clips.append(clip)

if clips:
    final_clip = concatenate_videoclips(clips)
    final_clip.write_videofile(OUTPUT_PATH, codec="libx264")
else:
    print("No highlight clips found.")

MoviePy - Building video nuggets_highlight.mp4.
MoviePy - Writing audio in nuggets_highlightTEMP_MPY_wvf_snd.mp3


                                                                   

MoviePy - Done.
MoviePy - Writing video nuggets_highlight.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready nuggets_highlight.mp4
