In [2]:
!pip -q install ultralytics decord tqdm

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m43.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/13.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m8.0/13.6 MB[0m [31m193.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m13.6/13.6 MB[0m [31m223.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m13.6/13.6 MB[0m [31m223.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m94.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from ultralytics import YOLO
import numpy as np, cv2, math, csv, os
from decord import VideoReader, cpu
from tqdm import tqdm
import librosa

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
POSE_MODEL = "yolo11n-pose.pt"  # small & fast
model = YOLO(POSE_MODEL)

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt': 100% ━━━━━━━━━━━━ 6.0MB 85.3MB/s 0.1s


In [6]:
def smooth(x, k=5):
    if len(x)==0: return x
    k = max(1,k)
    return np.convolve(x, np.ones(k)/k, mode='same')

def wrist_velocity_series(kpts):
    """
    kpts: T x J x 2   (xy only; use conf if you want)
    J: COCO-style 17 (Ultralytics uses COCO keypoints)
    wristL, wristR indices (COCO): 9 and 10; elbow 7/8; shoulder 5/6
    """
    Wl, Wr = 9, 10
    # choose per-frame faster wrist as proxy for racket hand
    v = []
    for t in range(1, len(kpts)):
        dl = np.linalg.norm(kpts[t, Wl]-kpts[t-1, Wl])
        dr = np.linalg.norm(kpts[t, Wr]-kpts[t-1, Wr])
        v.append(max(dl, dr))
    v = np.array([v[0]] + v)  # pad
    return smooth(v, k=7)

def elbow_angle_rate(kpts):
    # angle at elbow: shoulder-elbow-wrist
    def angle(p, q, r):
        v1 = p-q; v2 = r-q
        a = np.arctan2(v1[1], v1[0]) - np.arctan2(v2[1], v2[0])
        return np.abs((a+np.pi)%(2*np.pi)-np.pi)
    Ls, Le, Lw = 5,7,9
    Rs, Re, Rw = 6,8,10
    ang=[]
    for t in range(len(kpts)):
        aL = angle(kpts[t,Ls], kpts[t,Le], kpts[t,Lw])
        aR = angle(kpts[t,Rs], kpts[t,Re], kpts[t,Rw])
        ang.append(max(aL,aR))
    ang = np.array(ang)
    d_ang = np.abs(np.diff(ang,prepend=ang[0]))
    return smooth(d_ang, k=7)

def nms_peaks(sig, min_dist=12, thr=0.8):
    idx = []
    last = -999
    mx = sig.max() if len(sig)>0 else 1.0
    for i in range(2, len(sig)-2):
        if sig[i] > sig[i-1] and sig[i] > sig[i+1] and sig[i] > thr*mx and i-last>=min_dist:
            idx.append(i); last = i
    return idx

In [11]:
def propose_contacts_for_video(proxy_path, audio_boost=True):
    vr = VideoReader(proxy_path, ctx=cpu(0))
    fps = float(vr.get_avg_fps()) if hasattr(vr, 'get_avg_fps') else 30.0
    T = len(vr)
    print('reached here')

    # run pose in batches
    boxes = []
    keyseq = []  # T x players x J x 2
    for i in tqdm(range(T), desc="pose"):
        frame = vr[i].asnumpy()
        res = model.predict(frame, verbose=False)[0]

        # pick top 2 person detections by conf/area
        dets=[]
        for b, k in zip(res.boxes, res.keypoints):
            if int(b.cls.item())!=0:  # 0=person for COCO
                continue
            x1,y1,x2,y2 = map(float, b.xyxy[0].tolist())
            conf = float(b.conf[0])
            area = (x2-x1)*(y2-y1)
            kxy = k.xy[0].cpu().numpy()[:,:2]  # J x 2
            dets.append((conf*area, kxy))

        dets = sorted(dets, key=lambda x:-x[0])[:4]

        # Ensure the array always has a consistent shape (2 players)
        frame_kpts = [d[1] for d in dets]
        # Pad with a zero array for a dummy player if less than 2 players are detected
        while len(frame_kpts) < 4:
            frame_kpts.append(np.zeros((17, 4)))

        keyseq.append(np.stack(frame_kpts, axis=0))

    keyseq = np.array(keyseq, dtype=np.float32)  # T x P x J x 2

    # aggregate per player (or max over players per frame)
    v_max=[]; a_max=[]
    for p in range(min(2, keyseq.shape[1])):
        v = wrist_velocity_series(keyseq[:,p])
        a = elbow_angle_rate(keyseq[:,p])
        v_max.append(v); a_max.append(a)

    # Handle the case where no players were detected at all
    if not v_max:
        return [], fps

    v_max = np.max(np.stack(v_max,axis=0), axis=0)
    a_max = np.max(np.stack(a_max,axis=0), axis=0)

    sig = smooth((v_max * a_max), k=5)
    peaks = nms_peaks(sig, min_dist=int(0.4*fps), thr=0.55)  # ~≥0.4s apart

    # optional audio peaks
    if audio_boost:
        # decode audio and find transients
        import tempfile
        tmp_wav = "/content/tmp_audio.wav"
        os.system(f'ffmpeg -y -i "{proxy_path}" -vn -ac 1 -ar 16000 -acodec pcm_s16le "{tmp_wav}"')
        y, sr = librosa.load(tmp_wav, sr=16000)
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        onsets = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='time')
        audio_frames = set([int(t*fps) for t in onsets])
        # keep pose peaks that are near audio peaks
        peaks = [p for p in peaks if any(abs(p - af) <= int(0.06*fps) for af in audio_frames)]

    return peaks, fps

In [8]:
def save_proposals_csv(channel, yt_id, peaks, fps):
    outdir = f"{PROPOSALS}/{channel}/{yt_id}"
    os.makedirs(outdir, exist_ok=True)
    with open(f"{outdir}/proposals.csv","w",newline="") as f:
        w=csv.writer(f); w.writerow(["frame_idx","t_ms"])
        for p in peaks: w.writerow([p, int(1000*p/fps)])

def render_overlay(proxy_path, peaks, out_mp4):
    vr = VideoReader(proxy_path, ctx=cpu(0))
    fps = 25  # proxies are CFR=30
    H,W,_ = vr[0].asnumpy().shape
    tmp = "/content/ov.avi"
    vw = cv2.VideoWriter(tmp, cv2.VideoWriter_fourcc(*"XVID"), fps, (W,H))
    s=set(peaks)
    for i in range(len(vr)):
        f = vr[i].asnumpy()
        if i in s:
            cv2.putText(f, "PROPOSED CONTACT", (40,60), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 3, cv2.LINE_AA)
            cv2.circle(f, (60,90), 12, (0,0,255), -1)
        vw.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
    vw.release()
    os.system(f'ffmpeg -y -i {tmp} -c:v libx264 -preset veryfast -crf 23 "{out_mp4}"')

### **Batch through all proxies → save proposals & an overlay for quick QC**

In [9]:
import glob

ROOT = "/content/drive/MyDrive/FIT3163,3164/SlowFast"
PROX = f"{ROOT}/02_proxy_25fps"
PROPOSALS = f"{ROOT}/03_pose_proposals"
os.makedirs(PROPOSALS, exist_ok=True)

In [12]:
glob_pattern = os.path.join(PROX, "*", "*", "proxy.mp4")
proxy_files = glob.glob(glob_pattern)
print(f"Found {len(proxy_files)} proxy files to process.")

proxy_files = ['/content/drive/MyDrive/FIT3163,3164/SlowFast/02_proxy_25fps/lcw_ld_2016_short/1/proxy.mp4']

for proxy in proxy_files:
    print(f"\n--- Processing file: {proxy} ---")

    try:
        # Extract channel and YouTube ID from the file path
        path_parts = proxy.split(os.sep)
        channel, yt_id = path_parts[-3], path_parts[-2]

        # Propose contacts based on pose and audio
        print("Running pose and audio analysis...")
        peaks, fps = propose_contacts_for_video(proxy, audio_boost=False)
        print(f"Successfully identified {len(peaks)} contact proposals.")

        # Save proposals to CSV
        save_proposals_csv(channel, yt_id, peaks, fps)
        print("CSV file successfully saved.")

        # Render overlay video
        out_mp4 = f"{PROPOSALS}/{channel}/{yt_id}/overlay.mp4"
        if not os.path.exists(out_mp4):
            print(f"Rendering overlay video to {out_mp4}...")
            render_overlay(proxy, peaks, out_mp4)
            if os.path.exists(out_mp4):
                print("Overlay video successfully created.")
            else:
                print("ERROR: Overlay video was not created. Check for errors in the render_overlay function.")
        else:
            print(f"Overlay video already exists at {out_mp4}. Skipping render.")

    except Exception as e:
        print(f"An error occurred while processing {proxy}: {e}")

Found 2 proxy files to process.

--- Processing file: /content/drive/MyDrive/FIT3163,3164/SlowFast/02_proxy_25fps/lcw_ld_2016_short/1/proxy.mp4 ---
Running pose and audio analysis...
reached here


pose:   0%|          | 0/2752 [00:00<?, ?it/s]

An error occurred while processing /content/drive/MyDrive/FIT3163,3164/SlowFast/02_proxy_25fps/lcw_ld_2016_short/1/proxy.mp4: all input arrays must have the same shape



