In [10]:
%pip install keras


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

video_path = r"DL_Vidoes" # Update this path
keypoints_data = []

for file in os.listdir(video_path):
    if file.endswith(".mp4"):
        cap = cv2.VideoCapture(os.path.join(video_path, file))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(frame_rgb)

            if results.pose_landmarks:
                keypoints = [ 
                    (lm.x, lm.y, lm.z) for lm in results.pose_landmarks.landmark
                ]
                keypoints_data.append(keypoints)

        cap.release()

keypoints_data = np.array(keypoints_data)
np.save("pose_data.npy", keypoints_data)  # Save extracted features


I0000 00:00:1743113282.797173 17053495 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1743113282.904090 17054536 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743113282.921927 17054533 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743113282.940534 17054529 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [18]:
def label_squat(landmarks):
    hip_y = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value][1]  # Y-coordinate of the left hip
    return "DOWN" if hip_y > 0.6 else "UP"  # Adjust threshold based on video


In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

X_train = np.load("pose_data.npy")  # Load extracted features
y_train = np.array([label_squat(frame) for frame in X_train])  # Convert labels to array

# Convert labels to numeric values (0 = DOWN, 1 = UP)
y_train = np.array([0 if label == "DOWN" else 1 for label in y_train])

# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(33, 3)),  # 33 keypoints with (x, y, z)
    LSTM(32),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification (UP or DOWN)
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Save the model
model.save("squat_classifier2.h5")


Epoch 1/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.6796 - loss: 0.6120
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8681 - loss: 0.3280
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8756 - loss: 0.3198
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8621 - loss: 0.3111
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8881 - loss: 0.2701
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8778 - loss: 0.2665
Epoch 7/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8825 - loss: 0.2588
Epoch 8/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8958 - loss: 0.2615
Epoch 9/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━



In [24]:
import cv2
import mediapipe as mp
import os
import numpy as np
import tensorflow as tf

class poseDetector():
    def __init__(self, mode=False, upBody=False, smooth=True, detectionCon=True, trackCon=0.5):
        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(mode, upBody, smooth, detectionCon, trackCon)

    def findPose(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)  # Use the pose object to process the frame
        if self.results.pose_landmarks and draw:
            self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
        return img

    def findPosition(self, img):
        lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, _ = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmList.append([id, cx, cy, lm.x, lm.y, lm.z])
        return lmList

def classify_frame(frame, model, pose, squat_threshold=0.5):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.pose.process(frame_rgb)  # Fix here: Access `pose.process`

    if results.pose_landmarks:
        # Extract all 33 keypoints (x, y, z)
        keypoints = np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark])
        keypoints = np.expand_dims(keypoints, axis=0)  # Reshape to (1, 33, 3)

        # Make prediction
        prediction = model.predict(keypoints)[0][0]
        print(f"Model prediction: {prediction}, Threshold: {squat_threshold}")

        label = "UP" if prediction > squat_threshold else "DOWN"
    else:
        label = "UNKNOWN"

    return label, results.pose_landmarks

# Load model
model = tf.keras.models.load_model("squat_classifier2.h5")

# Open the video
video_path = r"Ryans Videos/Squat1.MOV"  # Change file name if needed
cap = cv2.VideoCapture(video_path)

# Initialize pose detector
pose = poseDetector()

# Initialize frame predictions for smoothing (e.g., last 5 frames)
frame_predictions = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Get classification label and pose landmarks
    label, landmarks = classify_frame(frame, model, pose)

    # Append the current prediction (1 for "UP", 0 for "DOWN") to the list
    frame_predictions.append(1 if label == "UP" else 0)

    # Keep only the last 5 predictions (e.g., smoothing over 5 frames)
    if len(frame_predictions) > 5:
        frame_predictions.pop(0)

    # Apply majority voting or moving average for smoothing
    if np.mean(frame_predictions) > 0.5:
        final_label = "UP"
    else:
        final_label = "DOWN"

    # If landmarks exist, draw them
    if landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, landmarks, mp.solutions.pose.POSE_CONNECTIONS)

    # Overlay final label on frame
    cv2.putText(frame, final_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame with keypoints and label
    cv2.imshow("Pose Estimation with Label", frame)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1743096696.000386 16767264 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
W0000 00:00:1743096696.155702 16830428 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743096696.173981 16830430 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step
Model prediction: 0.984533965587616, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Model prediction: 0.9857223629951477, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Model prediction: 0.985848069190979, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Model prediction: 0.9870951175689697, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Model prediction: 0.9870699644088745, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Model prediction: 0.9873103499412537, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Model prediction: 0.9868662357330322, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Model prediction: 0.9872825741767883, Threshold: 0.5


KeyboardInterrupt: 