In [1]:
!pip install pytorchvideo torch torchvision opencv-python

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.5.tar.gz (132 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting torch
  Downloading torch-2.7.1-cp310-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvision-0.22.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Collecting fvcore (from pytorchvideo)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting av (from pytorchvideo)
  Downloading av-14.4.0-cp310-cp310-macosx_12_0_arm64.whl.metadata (4.6 kB)
Collecting parameterized (from pytorchvideo)
  Downloading parameteri

In [4]:
import torch
import cv2
import numpy as np
from torch import nn
from pytorchvideo.models.hub import slowfast_r50
from torchvision.transforms import Compose
import torchvision.transforms._transforms_video as transforms

# Load pretrained SlowFast model and strip classifier head
def load_slowfast_model():
    model = slowfast_r50(pretrained=True)
    model.blocks[-1].proj = nn.Identity()  # remove classification head
    model.eval()
    return model

# Video transform: normalize pixel values
def get_transform():
    return Compose([
        transforms.NormalizeVideo(
            mean=[0.45, 0.45, 0.45],
            std=[0.225, 0.225, 0.225]
        )
    ])

# Load video frames and format for SlowFast
def load_video_frames(path, num_frames=32, slowfast_alpha=4):
    cap = cv2.VideoCapture(path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    sample_rate = max(total_frames // num_frames, 1)

    frames = []
    for i in range(num_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * sample_rate)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (224, 224))
        frames.append(frame)

    cap.release()

    if len(frames) < num_frames:
        raise ValueError(f"Only {len(frames)} frames could be read. Expected {num_frames}.")

    video = np.stack(frames)  # (T, H, W, C)
    video = torch.from_numpy(video).float() / 255.0  # Normalize to [0, 1]
    video = video.permute(3, 0, 1, 2)  # (C, T, H, W)
    video = video.unsqueeze(0)  # (B, C, T, H, W)
    return video

# Prepare video for SlowFast input (two pathways)
def pack_pathway(video, alpha=4):
    fast_pathway = video
    slow_pathway = video[:, :, ::alpha, :, :]
    return [slow_pathway, fast_pathway]

# Main function: extract embedding from video path
def extract_slowfast_embedding(video_path):
    model = load_slowfast_model()
    transform = get_transform()
    video = load_video_frames(video_path)
    video = transform(video.squeeze(0)).unsqueeze(0)
    inputs = pack_pathway(video)

    with torch.no_grad():
        embedding = model(inputs)
    return embedding.squeeze(0)  # shape: (2304,)

# Test path
video_path = "/Users/j/code/bjj_classifier/data/training/pulling_guard/pulling_guard1.mp4"

# Extract and print
embedding = extract_slowfast_embedding(video_path)
print("Embedding shape:", embedding.shape)  # should be torch.Size([2304])
print("Embedding sample:", embedding[:5])    # first 5 values

Embedding shape: torch.Size([2304])
Embedding sample: tensor([0.3879, 0.2860, 0.1396, 0.0334, 0.0007])


In [9]:
name_dict = {"name" : "Jacob"}
name_dict["name"]

'Jacob'

In [27]:
import os
data_dict = {}
data_dict["training"] = {}
data_dict["training"]["pulling_guard"] = {}
for file_name in os.listdir("../data/training/pulling_guard"):
    if file_name != ".DS_Store":
        video_path = f"/Users/j/code/bjj_classifier/data/training/pulling_guard/{file_name}"
        embedding = extract_slowfast_embedding(video_path)
        data_dict["training"]["pulling_guard"][file_name] = embedding

data_dict["training"]["passing_guard"] = {}
for file_name in os.listdir("../data/training/passing_guard"):
    if file_name != ".DS_Store":
        video_path = f"/Users/j/code/bjj_classifier/data/training/passing_guard/{file_name}"
        embedding = extract_slowfast_embedding(video_path)
        data_dict["training"]["passing_guard"][file_name] = embedding
    
data_dict["test"] = {}
data_dict["test"]["pulling_guard"] = {}
for file_name in os.listdir("../data/test/pulling_guard"):
    if file_name != ".DS_Store":
        video_path = f"/Users/j/code/bjj_classifier/data/test/pulling_guard/{file_name}"
        embedding = extract_slowfast_embedding(video_path)
        data_dict["test"]["pulling_guard"][file_name] = embedding

data_dict["test"]["passing_guard"] = {}
for file_name in os.listdir("../data/test/passing_guard"):
    if file_name != ".DS_Store":
        video_path = f"/Users/j/code/bjj_classifier/data/test/passing_guard/{file_name}"
        embedding = extract_slowfast_embedding(video_path)
        data_dict["test"]["passing_guard"][file_name] = embedding        


        
print(data_dict["test"]["passing_guard"])

{'passing_guard06.mp4': tensor([0.6926, 0.2768, 0.3470,  ..., 0.1357, 1.0479, 0.0214]), 'passing_guard05.mp4': tensor([0.2913, 0.0769, 0.1399,  ..., 0.4303, 0.7424, 0.0228])}


In [80]:
import os

def build_dataset(base_path, categories):
    dataset = {}
    for category in categories:
        dataset[category] = {}
        for label in ["pulling_guard", "passing_guard"]:
            label_path = os.path.join(base_path, category, label)
            dataset[category][label] = {}
            for file_name in os.listdir(label_path):
                if not file_name.startswith("."):
                    video_path = os.path.abspath(label_path + "/" + file_name)
                    embedding = extract_slowfast_embedding(video_path)
                    dataset[category][label][file_name] = embedding
    return dataset

data_dict = build_dataset("../data", ["training", "test"])

In [81]:
print(data_dict)

{'training': {'pulling_guard': {'pulling_guard02.mp4': tensor([0.1107, 0.0744, 0.0139,  ..., 0.2746, 0.2973, 0.0362]), 'pulling_guard03.mp4': tensor([0.1043, 0.4781, 0.0077,  ..., 0.0987, 0.4139, 0.1856]), 'pulling_guard01.mp4': tensor([0.3879, 0.2860, 0.1396,  ..., 0.2655, 0.2372, 0.0746]), 'pulling_guard04.mp4': tensor([0.3736, 0.0861, 0.0758,  ..., 0.3169, 0.5630, 0.0889])}, 'passing_guard': {'passing_gaurd03.mp4': tensor([0.1200, 1.0608, 0.0633,  ..., 0.0183, 0.2326, 0.0077]), 'passing_guard01.mp4': tensor([0.3247, 1.5934, 0.3402,  ..., 0.0160, 0.0654, 0.0239]), 'passing_gaurd02.mp4': tensor([0.3031, 1.2158, 0.6485,  ..., 1.2844, 0.2659, 0.0244]), 'passing_gaurd04.mp4': tensor([0.0475, 1.3844, 0.0196,  ..., 0.0000, 0.0993, 0.0014])}}, 'test': {'pulling_guard': {'pulling_guard05.mp4': tensor([0.1640, 0.1205, 0.0652,  ..., 0.0228, 0.4845, 0.0738]), 'pulling_guard06.mp4': tensor([0.0325, 0.1148, 0.0159,  ..., 0.0290, 0.1209, 0.0611])}, 'passing_guard': {'passing_guard06.mp4': tensor([

In [82]:
!pip install scikit-learn



In [83]:
import torch
import torch.nn.functional as F
import sys

embedding_a = data_dict["training"]["pulling_guard"]["pulling_guard02.mp4"]
embedding_b = data_dict["test"]["pulling_guard"]["pulling_guard05.mp4"]
##embedding_b = data_dict["test"]["passing_guard"]["passing_guard05.mp4"]

# Compute cosine similarity between [1, D] vectors
similarity = F.cosine_similarity(embedding_a.unsqueeze(0), embedding_b.unsqueeze(0), dim=1).item()
similarity_percentage = (similarity + 1) / 2 * 100

print(f"Similarity: {similarity_percentage:.2f}%")

Similarity: 87.21%


In [214]:
def perdiction(file_name):

    index1 = 0

    
    while file_name[0].isalpha():
        index1 += 1

    print(index1)

    
    embedding_a = data_dict["test"][file_name[0:len(file_name)-index1]][file_name]
    
    embedding_list_pulling = [] #make embedding list for all pulling guard videos
    embedding_list_passing = [] #make embedding list for all passing guard videos

    
    for files in os.listdir("../data/training/pulling_guard"): 
        if files != ".DS_Store":
            embedding_list_pulling.append(data_dict["training"]["pulling_guard"][files])

    for files in os.listdir("../data/training/passing_guard"): 
        if files != ".DS_Store":
            embedding_list_passing.append(data_dict["training"]["passing_guard"][files])

    
    sum1 = 0
    index2 = 0
    for embeddings in embedding_list_pulling:
        similarity = F.cosine_similarity(embedding_a.unsqueeze(0), embedding_list_pulling[index2].unsqueeze(0), dim=1).item()
        similarity_percentage = (similarity + 1) / 2 * 100
        sum1 += similarity_percentage
        index2 += 1

    average_pulling = sum1/len(embedding_list_pulling)
    print(average_pulling)

    sum2 = 0
    index3 = 0
    for embeddings in embedding_list_passing:
        similarity = F.cosine_similarity(embedding_a.unsqueeze(0), embedding_list_passing[index3].unsqueeze(0), dim=1).item()
        similarity_percentage = (similarity + 1) / 2 * 100
        sum2 += similarity_percentage
        index3 += 1
        
    average_passing = sum2/len(embedding_list_passing)
    print(average_passing)
    
    if average_passing < average_pulling:
        return "This is pulling guard"
    elif average_passing > average_pulling:
        return "This is passing guard"
    



In [216]:
perdiction("pulling_guard05.mp4")

KeyboardInterrupt: 

In [None]:
dir("hello")