In [17]:
import cv2
import mediapipe as mp
import time
import os
import csv
import numpy as np

class poseDetector():
    def __init__(self, mode=False, upBody=False, smooth=True, detectionCon=True, trackCon=0.5):
        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(mode, upBody, smooth, detectionCon, trackCon)

    def findPose(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)
        if self.results.pose_landmarks and draw:
            self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
        return img

    def findPosition(self, img):
        lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, _ = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmList.append([id, cx, cy, lm.x, lm.y, lm.z])
        return lmList

def process_squat_videos(video_folder: str):
    output_folder = os.path.join(video_folder, "Squat_PoseCSVs")
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(video_folder):
        if filename.lower().endswith(".mp4"):  
            video_path = os.path.join(video_folder, filename)
            save_squat_csv(video_path, output_folder)
            print(f"Processed {filename} ✅")

def save_squat_csv(video_path: str, output_folder: str):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Couldn't read {video_path}")
        return
    
    detector = poseDetector()
    base_filename = os.path.splitext(os.path.basename(video_path))[0]
    csv_file = os.path.join(output_folder, f"squat_data_{base_filename}.csv")

    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Time", "Hip_Y", "Squat_State"])

        start_time = time.time()
        hip_y_values = []

        while True:
            success, img = cap.read()
            if not success:
                break
            
            img = detector.findPose(img)
            lmList = detector.findPosition(img)

            timestamp = time.time() - start_time  

            if lmList and len(lmList) > 24:  
                hip_y = (lmList[23][2] + lmList[24][2]) / 2  
                hip_y_values.append(hip_y)

                if len(hip_y_values) > 30:
                    min_hip = np.min(hip_y_values)
                    max_hip = np.max(hip_y_values)
                    squat_threshold = (max_hip + min_hip) / 2  

                    squat_state = "down" if hip_y >= squat_threshold else "up"
                else:
                    squat_state = "unknown"  

                writer.writerow([timestamp, hip_y, squat_state])
                file.flush()  

                cv2.putText(img, f"Squat: {squat_state.upper()}", (50, 100), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)
                cv2.imshow("Squat Detection", img)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()
    print(f"CSV saved: {csv_file} ✅")

if __name__ == "__main__":
    video_folder = r"C:\Users\dogat\Desktop\DL_Vidoes"
    process_squat_videos(video_folder)
    print("✅ All videos processed!")


Downloading model to C:\Users\dogat\anaconda3\Lib\site-packages\mediapipe/modules/pose_landmark/pose_landmark_lite.tflite
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145136.csv ✅
Processed Recording 2025-03-26 145136.mp4 ✅
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145435.csv ✅
Processed Recording 2025-03-26 145435.mp4 ✅
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145629.csv ✅
Processed Recording 2025-03-26 145629.mp4 ✅
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145706.csv ✅
Processed Recording 2025-03-26 145706.mp4 ✅
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145757.csv ✅
Processed Recording 2025-03-26 145757.mp4 ✅
CSV saved: C:\Users\dogat\Desktop\DL_Vidoes\Squat_PoseCSVs\squat_data_Recording 2025-03-26 145921.csv ✅
Processed Recording 2025-03-26 145

In [7]:
import cv2
import mediapipe as mp
import time
import os
import csv
import numpy as np
import cv2
import mediapipe as mp
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [9]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

video_path = r"C:\Users\dogat\Desktop\DL_Vidoes" # Update this path
keypoints_data = []

for file in os.listdir(video_path):
    if file.endswith(".mp4"):
        cap = cv2.VideoCapture(os.path.join(video_path, file))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(frame_rgb)

            if results.pose_landmarks:
                keypoints = [ 
                    (lm.x, lm.y, lm.z) for lm in results.pose_landmarks.landmark
                ]
                keypoints_data.append(keypoints)

        cap.release()

keypoints_data = np.array(keypoints_data)
np.save("pose_data.npy", keypoints_data)  # Save extracted features


In [10]:
def label_squat(landmarks):
    hip_y = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value][1]  # Y-coordinate of the left hip
    return "DOWN" if hip_y > 0.6 else "UP"  # Adjust threshold based on video


In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

X_train = np.load("pose_data.npy")  # Load extracted features
y_train = np.array([label_squat(frame) for frame in X_train])  # Convert labels to array

# Convert labels to numeric values (0 = DOWN, 1 = UP)
y_train = np.array([0 if label == "DOWN" else 1 for label in y_train])

# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(33, 3)),  # 33 keypoints with (x, y, z)
    LSTM(32),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification (UP or DOWN)
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Save the model
model.save("squat_classifier.h5")


Epoch 1/10


  super().__init__(**kwargs)


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.7106 - loss: 0.5948
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.8613 - loss: 0.3657
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - accuracy: 0.8508 - loss: 0.3462
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8779 - loss: 0.2851
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.8913 - loss: 0.2658
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.8813 - loss: 0.2662
Epoch 7/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.9171 - loss: 0.2207
Epoch 8/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.8978 - loss: 0.2475
Epoch 9/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



In [5]:
def classify_frame(frame, model, pose, squat_threshold=0.5):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(frame_rgb)

    if results.pose_landmarks:
        # Extract all 33 keypoints (x, y, z)
        keypoints = np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark])
        keypoints = np.expand_dims(keypoints, axis=0)  # Reshape to (1, 33, 3)

        # Make prediction
        prediction = model.predict(keypoints)[0][0]
        print(f"Model prediction: {prediction}, Threshold: {squat_threshold}")

        label = "UP" if prediction > squat_threshold else "DOWN"
    else:
        label = "UNKNOWN"

    return label, results.pose_landmarks


# Run on live video
cap = cv2.VideoCapture(0)  # Webcam
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    label = classify_frame(frame)
    cv2.putText(frame, label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Squat Detection", frame)

    if cv2.waitKey(10) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


In [12]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Pose model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

# Path to your video
video_path = r"C:\\Users\\dogat\\Desktop\\DL_Vidoes"  # Update with your path

# Open a sample video to extract frames
cap = cv2.VideoCapture(os.path.join(video_path, r"C:\Users\dogat\Downloads\Squat1.MOV"))  # Change file name

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB (MediaPipe works with RGB)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame to get pose landmarks
    results = pose.process(frame_rgb)

    # If landmarks are found, draw them on the frame
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # Display the frame with pose landmarks
    cv2.imshow("Pose Estimation", frame)

    # Break the loop if the user presses 'q'
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


In [13]:
def classify_frame(frame, model, pose, squat_threshold=0.5):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(frame_rgb)

    if results.pose_landmarks:
        # Extract keypoints (hip y-coordinate for squat detection)
        hip_y = results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP].y
        
        # Print debug information
        print(f"Hip Y-coordinate: {hip_y}")
        
        keypoints = np.array([[hip_y]])  # Model expects single feature input
        
        # Make prediction
        prediction = model.predict(keypoints)[0][0]
        print(f"Model prediction: {prediction}, Threshold: {squat_threshold}")

        label = "UP" if prediction > squat_threshold else "DOWN"
    else:
        label = "UNKNOWN"

    return label, results.pose_landmarks


# Load model
model = tf.keras.models.load_model("squat_classifier.h5")

# Open the video
cap = cv2.VideoCapture(os.path.join(video_path, r"C:\Users\dogat\Desktop\DL_Vidoes\Squat1.MOV"))  # Change file name

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Get classification label and pose landmarks
    label, landmarks = classify_frame(frame, model, pose)

    # If landmarks exist, draw them
    if landmarks:
        mp_drawing.draw_landmarks(frame, landmarks, mp_pose.POSE_CONNECTIONS)

    # Overlay label on frame
    cv2.putText(frame, label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame with keypoints and label
    cv2.imshow("Pose Estimation with Label", frame)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()




Hip Y-coordinate: 0.5467966198921204


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(1, 1), dtype=float32). Expected shape (None, 33, 3), but input has incompatible shape (1, 1)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 1), dtype=float32)
  • training=False
  • mask=None

In [13]:
import cv2
import mediapipe as mp
import os
import numpy as np
import tensorflow as tf

class poseDetector():
    def __init__(self, mode=False, upBody=False, smooth=True, detectionCon=True, trackCon=0.5):
        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(mode, upBody, smooth, detectionCon, trackCon)

    def findPose(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)  # Use the pose object to process the frame
        if self.results.pose_landmarks and draw:
            self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
        return img

    def findPosition(self, img):
        lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, _ = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmList.append([id, cx, cy, lm.x, lm.y, lm.z])
        return lmList

def classify_frame(frame, model, pose, squat_threshold=0.5):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.pose.process(frame_rgb)  # Fix here: Access `pose.process`

    if results.pose_landmarks:
        # Extract all 33 keypoints (x, y, z)
        keypoints = np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark])
        keypoints = np.expand_dims(keypoints, axis=0)  # Reshape to (1, 33, 3)

        # Make prediction
        prediction = model.predict(keypoints)[0][0]
        print(f"Model prediction: {prediction}, Threshold: {squat_threshold}")

        label = "UP" if prediction > squat_threshold else "DOWN"
    else:
        label = "UNKNOWN"

    return label, results.pose_landmarks

# Load model
model = tf.keras.models.load_model("squat_classifier.h5")

# Open the video
video_path = r"C:\Users\dogat\Desktop\DL_Vidoes\fortSquat1.mov"  # Change file name if needed
cap = cv2.VideoCapture(video_path)

# Initialize pose detector
pose = poseDetector()

# Initialize frame predictions for smoothing (e.g., last 5 frames)
frame_predictions = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Get classification label and pose landmarks
    label, landmarks = classify_frame(frame, model, pose)

    # Append the current prediction (1 for "UP", 0 for "DOWN") to the list
    frame_predictions.append(1 if label == "UP" else 0)

    # Keep only the last 5 predictions (e.g., smoothing over 5 frames)
    if len(frame_predictions) > 5:
        frame_predictions.pop(0)

    # Apply majority voting or moving average for smoothing
    if np.mean(frame_predictions) > 0.5:
        final_label = "UP"
    else:
        final_label = "DOWN"

    # If landmarks exist, draw them
    if landmarks:
        mp.solutions.drawing_utils.draw_landmarks(frame, landmarks, mp.solutions.pose.POSE_CONNECTIONS)

    # Overlay final label on frame
    cv2.putText(frame, final_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame with keypoints and label
    cv2.imshow("Pose Estimation with Label", frame)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 427ms/step
Model prediction: 0.9948984980583191, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Model prediction: 0.9931038022041321, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Model prediction: 0.9922636151313782, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Model prediction: 0.9914385676383972, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Model prediction: 0.9926180839538574, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Model prediction: 0.9924228191375732, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Model prediction: 0.9923264980316162, Threshold: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Model prediction: 0.9926060438156128, Threshold: 0.5


KeyboardInterrupt

