In [87]:
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
# import tensorflow as tf
from tensorflow.keras import layers, models

In [88]:
# initialize mediapipe pose estimation model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
# pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

In [89]:
# get keypoints from image with mediapipe
def extract_keypoints(image):
    # convert image to RGB
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(rgb_image)

    # if no landmarks (joints) return zeros
    if not results.pose_landmarks:
        return np.zeros(33*3)
    
    # get keypoints in (x,y,z) coordinates format
    keypoints = []
    for landmark in results.pose_landmarks.landmark:
        keypoints.append([landmark.x, landmark.y, landmark.z])
    return np.array(keypoints).flatten()


In [90]:
# load images and their labels
def load_images(folder):
    data = []
    labels = []
    #big3 = ["bench press","squat", "deadlift" ]
    big3 = ["push up","barbell biceps curl", "squat" ]
    # go through each folder
    for exercise in os.listdir(folder):
        if exercise in big3:
            exercise_folder = os.path.join(folder,exercise)
            if os.path.isdir(exercise_folder):
                # go through each image in folder
                for img_file in os.listdir(exercise_folder):
                    img_path = os.path.join(exercise_folder, img_file)

                    # read image
                    image = cv2.imread(img_path)
                    if image is not None:
                        keypoints = extract_keypoints(image)
                        data.append(keypoints)
                        labels.append(exercise)
    return np.array(data), np.array(labels)

In [91]:
# load images from workout data folder
base_folder = "./workout_data"
data,labels = load_images(base_folder)

In [92]:
# encode exercise names into numerical format
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [93]:
# split data into training and test sets
from sklearn.model_selection import train_test_split
trainData, testData, trainLabel, testLabel = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

In [104]:
# build neural network to classify exercises based on keypoints
model = models.Sequential([
    layers.Dense(128, activation="relu", input_shape=(trainData.shape[1],)),
    layers.Dense(64, activation="relu"),
    layers.Dense(len(np.unique(labels_encoded)), activation="softmax")
])

# def create_exercise_recognition_model(num_exercises, sequence_length=30):
#     input_layer = tf.keras.Input(shape=(sequence_length, 33, 3))  # Sequence of 33 pose landmarks, 3 coordinates each
    
#     x = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(input_layer)
#     x = tf.keras.layers.LSTM(128, return_sequences=True)(x)
#     x = tf.keras.layers.LSTM(64)(x)
#     x = tf.keras.layers.Dense(32, activation='relu')(x)
#     output = tf.keras.layers.Dense(num_exercises, activation='softmax')(x)

#     model = tf.keras.Model(inputs=input_layer, outputs=output)
#     return model

# # Create and compile the model
# num_exercises = 5  # Adjust based on the number of exercises you want to recognize
# sequence_length = 30  # Adjust based on your video length and frame rate
# model = create_exercise_recognition_model(num_exercises, sequence_length)


# compile model
model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=['accuracy'])

In [105]:
model.fit(trainData, trainLabel, epochs=10, validation_data=(testData, testLabel))
# model.fit(trainData, trainLabel, epochs=5, validation_data=(testData, testLabel), batch_size=32)


Epoch 1/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6520 - loss: 0.8383 - val_accuracy: 0.7976 - val_loss: 0.4953
Epoch 2/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8133 - loss: 0.4640 - val_accuracy: 0.8268 - val_loss: 0.4091
Epoch 3/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8432 - loss: 0.3776 - val_accuracy: 0.8756 - val_loss: 0.3252
Epoch 4/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8841 - loss: 0.3114 - val_accuracy: 0.9049 - val_loss: 0.2744
Epoch 5/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9083 - loss: 0.2749 - val_accuracy: 0.8634 - val_loss: 0.2943
Epoch 6/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9028 - loss: 0.2564 - val_accuracy: 0.8878 - val_loss: 0.2588
Epoch 7/10
[1m52/52[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x26201a8c350>

In [106]:
loss, accuracy = model.evaluate(testData, testLabel)
print(f"test accuracy: {accuracy}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9202 - loss: 0.2446 
test accuracy: 0.9243902564048767


In [97]:
# use trained model to predict on new images
def classify_image(image_path):
    image = cv2.imread(image_path)
    keypoints = extract_keypoints(image)
    keypoints = np.expand_dims(keypoints, axis=0)
    prediction = model.predict(keypoints)
    predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_class[0]


In [111]:
def classify_video(video_path):
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        print("Error opening video file")
        return
    dict1 = {}
    while vid.isOpened():
        ret, frame = vid.read()
        if not ret:
            break
        keypoints = extract_keypoints(frame)
        keypoints = np.expand_dims(keypoints, axis=0)
        prediction = model.predict(keypoints)
        predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
        if predicted_class[0] not in dict1:
            dict1[predicted_class[0]] =1
        else:
            dict1[predicted_class[0]] +=1
        lol = max(dict1, key=dict1.get)

        print(predicted_class)
        # cv2.putText(frame, f'Predicted: {predicted_class[0]}', (10, 30), 
        #     cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.putText(frame, f'Predicted: {lol}', (10, 30), 
            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow("Video classification", frame)

        # break loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    vid.release()
    cv2.destroyAllWindows()
    # return predicted_class[0]
    maxv = None  # Initialize maxv to negative infinity
    curv = None  
    for v in dict1:
        if  maxv == None or dict1[v] > dict1[maxv]:
            cur1 = dict1[v]
            curv = v
            max1 = cur1
            maxv = curv
    return maxv


In [115]:
vid = cv2.VideoCapture(0, cv2.CAP_DSHOW)
if not vid.isOpened():
    print("Error opening video file")
dict1 = {}
while vid.isOpened():
    ret, frame = vid.read()
    if not ret:
        break
    keypoints = extract_keypoints(frame)
    keypoints = np.expand_dims(keypoints, axis=0)
    prediction = model.predict(keypoints)
    predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
    if predicted_class[0] not in dict1:
        dict1[predicted_class[0]] =1
    else:
        dict1[predicted_class[0]] +=1
    lol = max(dict1, key=dict1.get)

    print(predicted_class)
    # cv2.putText(frame, f'Predicted: {predicted_class[0]}', (10, 30), 
    #     cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.putText(frame, f'Predicted: {lol}', (10, 30), 
        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    # print(predicted_class)
    # cv2.putText(frame, f'Predicted: {predicted_class[0]}', (10, 30), 
    #     cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    
    cv2.imshow("Video classification", frame)

    # break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
vid.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [112]:
import warnings

# Suppress specific UserWarning from protobuf
warnings.filterwarnings("ignore", category=UserWarning, message=r'SymbolDatabase.GetPrototype\(\) is deprecated')


In [113]:
path_test_img = "./test_data/jessefront_curl.mov"
# predicted_exercise = classify_image(path_test_img)
predicted_exercise = classify_video(path_test_img)
print(f"predicted exercise: {predicted_exercise}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
['barbell biceps curl']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [117]:
save_format = 'keras'
model.save('RepModel.keras')

In [120]:
model = tf.keras.models.load_model('RepModel.keras')
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model
with open('RepModelLite.tflite', 'wb') as f:
    f.write(tflite_model)

  saveable.load_own_variables(weights_store.get(inner_path))


INFO:tensorflow:Assets written to: C:\Users\Jesse\AppData\Local\Temp\tmp472urh4e\assets


INFO:tensorflow:Assets written to: C:\Users\Jesse\AppData\Local\Temp\tmp472urh4e\assets


Saved artifact at 'C:\Users\Jesse\AppData\Local\Temp\tmp472urh4e'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 99), dtype=tf.float32, name='input_layer_16')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  2618035692816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2618035692624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2618035685328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2618035685712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2618035694928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2618035686288: TensorSpec(shape=(), dtype=tf.resource, name=None)
