In [1]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import pickle

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

In [6]:

MODEL_PATH = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\model\plank_mlp_keypoints.pt"
SCALER_PATH = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\model\scaler_keypoints_plank.pkl"


IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER", "RIGHT_SHOULDER",
    "LEFT_ELBOW", "RIGHT_ELBOW",
    "LEFT_WRIST", "RIGHT_WRIST",
    "LEFT_HIP", "RIGHT_HIP",
    "LEFT_KNEE", "RIGHT_KNEE",
    "LEFT_ANKLE", "RIGHT_ANKLE",
    "LEFT_HEEL", "RIGHT_HEEL",
    "LEFT_FOOT_INDEX", "RIGHT_FOOT_INDEX",
]

In [7]:
with open(SCALER_PATH, "rb") as f:
    scaler = pickle.load(f)


class MLP_Full(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)

input_dim = len(IMPORTANT_LMS) * 4  # x,y,z,v
#input_dim = 33 * 4
model = MLP_Full(input_dim)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

  model.load_state_dict(torch.load(MODEL_PATH))


MLP_Full(
  (net): Sequential(
    (0): Linear(in_features=68, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.3, inplace=False)
    (9): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [8]:
def extract_keypoints(results):
    keypoints = []
    lm = results.pose_landmarks.landmark

    for lm_name in IMPORTANT_LMS:
        p = lm[mp_pose.PoseLandmark[lm_name].value]
        keypoints.extend([p.x, p.y, p.z, p.visibility])
    return np.array(keypoints).reshape(1, -1)

# def extract_full_landmarks(results):
#    lm = results.pose_landmarks.landmark
#    row = []
#    for p in lm:
#        row.extend([p.x, p.y, p.z, p.visibility])
#    return np.array(row).reshape(1, -1)


In [9]:

cap = cv2.VideoCapture(r"C:\Users\caovi\Downloads\demo_plank.MOV")

# --- SMOOTHING ---
from collections import deque
prob_smooth = deque(maxlen=20)


threshold = 0.6  # confidence threshold
#r"C:\Users\caovi\Downloads\IMG_5630.mp4"
#r"C:\Users\caovi\Downloads\IMG_5634.MOV"

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img.flags.writeable = False
        results = pose.process(img)

        img.flags.writeable = True
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        if results.pose_landmarks:

            mp_drawing.draw_landmarks(
                img, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            row = extract_keypoints(results)
            #row = extract_full_landmarks(results)

            row_scaled = scaler.transform(row)
            row_tensor = torch.tensor(row_scaled, dtype=torch.float32)

            with torch.no_grad():
                prob = torch.sigmoid(model(row_tensor)).item()
            print("PROB =", prob)

            prob_smooth.append(prob)
            avg_prob = np.mean(prob_smooth)

            # classification
            pred = "GOOD FORM" if avg_prob < threshold else "BAD FORM"
            color = (0,255,0) if pred=="GOOD FORM" else (0,0,255)

            # Display
            cv2.putText(img, f"{pred} ({prob:.2f})",
                        (10,40), cv2.FONT_HERSHEY_SIMPLEX,
                        1, color, 2)

        cv2.imshow("Plank Detection - MLP", img)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()

for i in range(5):
    cv2.waitKey(1)

PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 4.390860935180306e-25
PROB = 1.2211154769368581e-14
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 1.0
PROB = 0.00017290427058469504
PROB = 1.0
PROB = 6.5514570801329364e-24
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.0
PROB = 0.9950057864189148
PROB = 0.9999998807907104
PROB = 0.20156504213809967
PROB = 0.6117478013038635
PROB = 0.9084233045578003
PROB = 0.0071006156504154205
PROB = 1.0
PROB = 1.0
PROB =

In [None]:
INPUT_VIDEO  = r"C:\Users\caovi\Downloads\demo_plank.MOV"
OUTPUT_VIDEO = r"C:\Users\caovi\Downloads\demo_plank_annotated.mp4"

cap = cv2.VideoCapture(INPUT_VIDEO)

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(OUTPUT_VIDEO, fourcc, fps, (width, height))

from collections import deque
prob_smooth = deque(maxlen=20)
threshold = 0.6

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img.flags.writeable = False
        results = pose.process(img)

        img.flags.writeable = True
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        if results.pose_landmarks:

            mp_drawing.draw_landmarks(img, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            row = extract_keypoints(results)
            row_scaled = scaler.transform(row)
            row_tensor = torch.tensor(row_scaled, dtype=torch.float32)

            with torch.no_grad():
                prob = torch.sigmoid(model(row_tensor)).item()

            prob_smooth.append(prob)
            avg_prob = np.mean(prob_smooth)

            pred = "GOOD FORM" if avg_prob < threshold else "BAD FORM"
            color = (0,255,0) if pred=="GOOD FORM" else (0,0,255)

            cv2.putText(img, f"{pred} ({avg_prob:.2f})",
                        (20,60), cv2.FONT_HERSHEY_SIMPLEX,
                        1.4, color, 3)

        out.write(img)

cap.release()
out.release()

print("Video saved to:", OUTPUT_VIDEO)


ðŸŽ‰ Video saved to: C:\Users\caovi\Downloads\demo_plank_annotated.mp4
