In [2]:
# ------------------------------------------------
# FITNESS ACTION RECOGNITION USING PRETRAINED 3D CNN
# (R(2+1)D model trained on Kinetics-400)
# ------------------------------------------------

import torch
import torchvision
import torchvision.transforms as transforms
import cv2
import numpy as np

# ------------------------------------------------
# 1. Load Pretrained R(2+1)D Model
# ------------------------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
model = torchvision.models.video.r2plus1d_18(weights="KINETICS400_V1").to(device)
model.eval()

# ------------------------------------------------
# 2. Define Transform for Video Frames
# ------------------------------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((112, 112)),
    transforms.Normalize(mean=[0.43216, 0.394666, 0.37645],
                         std=[0.22803, 0.22145, 0.216989])
])

def load_video_clip(video_path, num_frames=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)

    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (112, 112))
        frame = transform(frame)
        frames.append(frame)
        if len(frames) == num_frames:
            break
    cap.release()

    if len(frames) < num_frames:
        while len(frames) < num_frames:
            frames.append(frames[-1])

    clip = torch.stack(frames, dim=1).unsqueeze(0).to(device)  # (1,3,T,H,W)
    return clip

# ------------------------------------------------
# 3. Load and Predict
# ------------------------------------------------
video_path =  r"C:\Users\DELL\Downloads\videoplayback (4).mp4" 
#video_path =  r"C:\Users\DELL\Downloads\pushup.mp4"# your pushup video
#video_path =  r"C:\Users\DELL\Downloads\jogging.mp4"
clip = load_video_clip(video_path)

with torch.no_grad():
    outputs = model(clip)
    predicted_class = torch.argmax(outputs, dim=1).item()

# ------------------------------------------------
# 4. Decode Class Name
# ------------------------------------------------
# Get Kinetics-400 labels
import json
from urllib.request import urlopen
labels_url = "https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
labels = urlopen(labels_url).read().decode().splitlines()
action_name = labels[predicted_class]

print(f"üèãÔ∏è Predicted Action: {action_name}")
print("All figures saved in folder: figures")

üèãÔ∏è Predicted Action: jogging
All figures saved in folder: figures
