In [21]:
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
from tensorflow.keras import layers, models

In [22]:
# initialize mediapipe pose estimation model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

In [24]:
# get keypoints from image with mediapipe
def extract_keypoints(image):
    # convert image to RGB
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(rgb_image)

    # if no landmarks (joints) return zeros
    if not results.pose_landmarks:
        return np.zeros(33*3)
    
    # get keypoints in (x,y,z) coordinates format
    keypoints = []
    for landmark in results.pose_landmarks.landmark:
        keypoints.append([landmark.x, landmark.y, landmark.z])
    return np.array(keypoints).flatten()


In [26]:
# load images and their labels
def load_images(folder):
    data = []
    labels = []

    # go through each folder
    for exercise in os.listdir(folder):
        exercise_folder = os.path.join(folder,exercise)
        if os.path.isdir(exercise_folder):
            # go through each image in folder
            for img_file in os.listdir(exercise_folder):
                img_path = os.path.join(exercise_folder, img_file)

                # read image
                image = cv2.imread(img_path)
                if image is not None:
                    keypoints = extract_keypoints(image)
                    data.append(keypoints)
                    labels.append(exercise)
    return np.array(data), np.array(labels)

In [28]:
# load images from workout data folder
base_folder = "./workout_data"
data,labels = load_images(base_folder)



In [31]:
# encode exercise names into numerical format
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [32]:
# split data into training and test sets
from sklearn.model_selection import train_test_split
trainData, testData, trainLabel, testLabel = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

In [33]:
# build neural network to classify exercises based on keypoints
model = models.Sequential([
    layers.Dense(128, activation="relu", input_shape=(trainData.shape[1],)),
    layers.Dense(64, activation="relu"),
    layers.Dense(len(np.unique(labels_encoded)), activation="softmax")
])
# compile model
model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [34]:
model.fit(trainData, trainLabel, epochs=10, validation_data=(testData, testLabel))

Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6733 - loss: 0.7885 - val_accuracy: 0.8174 - val_loss: 0.4430
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8494 - loss: 0.4036 - val_accuracy: 0.8447 - val_loss: 0.3634
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8928 - loss: 0.3180 - val_accuracy: 0.8653 - val_loss: 0.3353
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8925 - loss: 0.2976 - val_accuracy: 0.8950 - val_loss: 0.2621
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9064 - loss: 0.2363 - val_accuracy: 0.9178 - val_loss: 0.2280
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9311 - loss: 0.2179 - val_accuracy: 0.9132 - val_loss: 0.2019
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x23000465870>

In [35]:
loss, accuracy = model.evaluate(testData, testLabel)
print(f"test accuracy: {accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9526 - loss: 0.1662
test accuracy: 0.9429223537445068


In [36]:
# use trained model to predict on new images
def classify_image(image_path):
    image = cv2.imread(image_path)
    keypoints = extract_keypoints(image)
    keypoints = np.expand_dims(keypoints, axis=0)
    prediction = model.predict(keypoints)
    predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_class[0]


In [None]:
def classify_video(video_path):
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        print("Error opening video file")
        return
    while vid.isOpened():
        ret, frame = vid.read()
        if not ret:
            break
        keypoints = extract_keypoints(frame)
        keypoints = np.expand_dims(keypoints, axis=0)
        prediction = model.predict(keypoints)
        predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
        cv2.putText(frame, f'Predicted: {predicted_class[0]}', (10, 30), 
            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        
        cv2.imshow("Video classification", frame)

        # break loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    vid.release()
    cv2.destroyAllWindows()


In [88]:
path_test_img = "./eric_pushup.mp4"
predicted_exercise = classify_video(path_test_img)
print(f"predicted exercise: {predicted_exercise}")

error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
