In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import os

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Determine important landmarks for plank
landmarks = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
    "LEFT_HEEL",
    "RIGHT_HEEL",
    "LEFT_FOOT_INDEX",
    "RIGHT_FOOT_INDEX",
]

# Generate all columns of the data frame

headers = ["label"] # Label column

for landmark in landmarks:
    headers += [f"{landmark.lower()}_x", f"{landmark.lower()}_y", f"{landmark.lower()}_z", f"{landmark.lower()}_v"]

# Create a data frame
empty_df = pd.DataFrame(columns=headers)

# Print the data frame
print(empty_df)

Empty DataFrame
Columns: [label, nose_x, nose_y, nose_z, nose_v, left_shoulder_x, left_shoulder_y, left_shoulder_z, left_shoulder_v, right_shoulder_x, right_shoulder_y, right_shoulder_z, right_shoulder_v, left_elbow_x, left_elbow_y, left_elbow_z, left_elbow_v, right_elbow_x, right_elbow_y, right_elbow_z, right_elbow_v, left_wrist_x, left_wrist_y, left_wrist_z, left_wrist_v, right_wrist_x, right_wrist_y, right_wrist_z, right_wrist_v, left_hip_x, left_hip_y, left_hip_z, left_hip_v, right_hip_x, right_hip_y, right_hip_z, right_hip_v, left_knee_x, left_knee_y, left_knee_z, left_knee_v, right_knee_x, right_knee_y, right_knee_z, right_knee_v, left_ankle_x, left_ankle_y, left_ankle_z, left_ankle_v, right_ankle_x, right_ankle_y, right_ankle_z, right_ankle_v, left_heel_x, left_heel_y, left_heel_z, left_heel_v, right_heel_x, right_heel_y, right_heel_z, right_heel_v, left_foot_index_x, left_foot_index_y, left_foot_index_z, left_foot_index_v, right_foot_index_x, right_foot_index_y, right_foot_inde

In [3]:
base_options = python.BaseOptions(model_asset_path='pose_landmarker.task')
options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)

I0000 00:00:1718081419.898682 9861974 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1718081420.002760 9862346 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1718081420.114946 9862346 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [4]:
def create_image_data(files, features, label, df):
    df = df.copy()
    
    for file in files:
        image = mp.Image.create_from_file(file)

        pose_landmarks = detector.detect(image).pose_landmarks[0]
        # print('Pose landmarks:', pose_landmarks)
        # print('Number of Pose landmarks:', len(pose_landmarks)) # should be 33

        landmarks = []
        for feature in features:
            landmark = pose_landmarks[mp.solutions.pose.PoseLandmark[feature].value]
            landmarks.append([landmark.x, landmark.y, landmark.z, landmark.visibility])
        landmarks = list(np.array(landmarks).flatten())
        # print('Number of landmarks:', len(landmarks))

        # Add the label to the landmarks
        sample = [label] + landmarks
        sample = pd.Series(sample, index=df.columns)

        # Add the sample to the data frame
        df = pd.concat([df, sample.to_frame().T], ignore_index=True)
    
    return df

In [11]:
def create_video_data(files, frame_skip_interval, features, label, df):
    df = df.copy()

    for file in files:
        frame_number = 0
        
        with mp.solutions.pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            video = cv2.VideoCapture(file)
            fps = video.get(cv2.CAP_PROP_FPS)
            frame_number_capture_rate = int( fps / frame_skip_interval)

            while video.isOpened():
                ret, frame = video.read()
                
                if not ret:
                    break

                if frame_number == frame_number_capture_rate:
                    landmarks = []
                    pose_landmarks = pose.process(frame).pose_landmarks.landmark

                    for feature in features:
                        landmark = pose_landmarks[mp.solutions.pose.PoseLandmark[feature].value]
                        landmarks.append([landmark.x, landmark.y, landmark.z, landmark.visibility])

                    landmarks = list(np.array(landmarks).flatten())
                    sample = [label] + landmarks
                    print(sample)

                    # if len(sample) == len(df.columns):
                    #     sample = pd.Series(sample, index=df.columns)
                    #     df = pd.concat([df, sample.to_frame().T], ignore_index=True)
            
            video.release()
       
    return df

In [None]:
# frame_number = 0
# video_df = pd.DataFrame(columns=headers)

# with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
#     while video.isOpened():
#         ret, frame = video.read()
        
#         if not ret:
#             break
        
#         # keypoints = []

#         if frame_number % 10 == 0:
#             # pose_landmarks = detector.detect(frame).pose_landmarks[0]
#             results = pose.process(frame)
#             # print(results.pose_landmarks)
#             print(frame_number)

#             keypoints = []
            
#             for landmark in landmarks:
#                 keypoint = results.pose_landmarks.landmark[mp_pose.PoseLandmark[landmark].value]
#                 keypoints.append([keypoint.x, keypoint.y, keypoint.z, keypoint.visibility])

#             # Insert action as the label (first column)
#             keypoints = list(np.array(keypoints).flatten())
#             keypoints.insert(0, 'c')
#             keypoints = pd.Series(keypoints, index=video_df.columns)

#             video_df = pd.concat([video_df, keypoints.to_frame().T], ignore_index=True)

#         frame_number += 1
#         # print(f'Frame number: {frame_number}')

# video.release()

# # keypoints = list(np.array(keypoints).flatten())
# video_df


In [6]:
image_data_path = "data/plank/image"
video_data_path = "data/plank/video"

In [12]:
correct_files = [os.path.join(image_data_path, "correct", file) for file in os.listdir(os.path.join(image_data_path, "correct"))]
print("Number of correct samples:", len(correct_files))

correct_df = create_image_data(correct_files, landmarks, 'c', empty_df)
print('Correct data frame shape:', correct_df.shape)
correct_df.head()

Number of correct samples: 12
Correct data frame shape: (12, 69)


Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,c,0.106999,0.474287,0.041421,0.999992,0.204851,0.488668,-0.18609,0.999937,0.229784,...,0.165992,0.500833,0.893337,0.800755,-0.156181,0.965273,0.883106,0.752967,0.10702,0.515329
1,c,0.932826,0.475001,-5.4e-05,0.999962,0.799452,0.323116,0.251574,0.999886,0.813881,...,-0.084245,0.989843,0.120464,0.835478,0.194059,0.81017,0.086306,0.890535,-0.209508,0.990062
2,c,0.119034,0.49746,-0.114617,0.999986,0.243312,0.473431,-0.390952,0.999932,0.257345,...,0.427192,0.434707,0.928693,0.914889,-0.077621,0.873237,0.910039,0.877281,0.310827,0.421052
3,c,0.883873,0.498216,0.004287,0.999995,0.75265,0.383505,0.290709,0.99999,0.789969,...,-0.057218,0.997657,0.150336,0.819928,0.173781,0.956127,0.144818,0.86357,-0.159592,0.996758
4,c,0.916077,0.564887,0.018603,0.999997,0.781779,0.428076,0.255041,0.999986,0.810024,...,-0.055593,0.988392,0.137897,0.852291,0.140435,0.761842,0.123406,0.889846,-0.133799,0.988035


In [13]:
low_files = [os.path.join(image_data_path, "low", file) for file in os.listdir(os.path.join(image_data_path, "low"))]
print("Number of low samples:", len(low_files))
print(low_files)

low_df = create_image_data(low_files, landmarks, 'l', empty_df)
print('Low data frame shape:', low_df.shape)
low_df.head()

Number of low samples: 13
['data/plank/image/low/Screen Shot 2024-06-10 at 1.55.00 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.50.05 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 2.00.00 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.57.48 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.55.49 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.54.36 PM.png', 'data/plank/image/low/rawImage.jpg', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.41.08 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.55.35 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.48.12 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.57.30 PM.png', 'data/plank/image/low/Screen Shot 2024-06-10 at 1.57.16 PM.png', 'data/plank/image/low/plank-back-pain-form-mistake-1.jpg']
Low data frame shape: (13, 69)


Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,l,0.101902,0.561759,-0.044028,0.999938,0.207304,0.377525,-0.207559,0.999781,0.22457,...,0.159017,0.306151,0.889678,0.822372,-0.158673,0.917595,0.855868,0.764802,0.103131,0.276856
1,l,0.880689,0.509248,-0.024418,0.999994,0.772448,0.237865,0.200576,0.999983,0.78795,...,-0.057278,0.997923,0.126143,0.771611,0.254396,0.965288,0.097072,0.862351,-0.1261,0.998315
2,l,0.89645,0.609938,-0.094008,0.999992,0.775156,0.448662,0.214646,0.999879,0.807127,...,-0.030357,0.950442,0.128417,0.896496,0.23014,0.555288,0.109461,0.925922,-0.06684,0.938204
3,l,0.906445,0.350456,-0.048803,0.999864,0.814106,0.119444,0.207929,0.999981,0.813866,...,-0.084347,0.992579,0.092681,0.66186,0.2286,0.903576,0.056413,0.804102,-0.232467,0.993057
4,l,0.065302,0.490127,-0.015589,0.999968,0.141306,0.273406,-0.271097,0.999978,0.204585,...,0.226206,0.474378,0.956583,0.86284,-0.222333,0.969115,0.951063,0.73628,0.147898,0.444942


In [14]:
high_files = [os.path.join(image_data_path, "high", file) for file in os.listdir(os.path.join(image_data_path, "high"))]
print("Number of high samples:", len(high_files))

high_df = create_image_data(high_files, landmarks, 'h', empty_df)
print('High data frame shape:', high_df.shape)
high_df.head()

Number of high samples: 13
High data frame shape: (13, 69)


Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,h,0.879158,0.572394,-0.001965,0.999998,0.761738,0.473951,0.191882,0.999982,0.799332,...,-0.017896,0.994403,0.1704,0.7477,0.210187,0.778022,0.166921,0.784381,-0.087047,0.991719
1,h,0.163141,0.541749,-0.085495,0.999855,0.278051,0.442553,-0.23253,0.999546,0.273764,...,0.40498,0.035663,0.873996,0.69597,0.271223,0.288011,0.857829,0.689818,0.391697,0.029006
2,h,0.871076,0.666401,-0.006063,0.999997,0.789327,0.44269,0.237454,0.999969,0.808275,...,-0.138777,0.999456,0.196323,0.805368,0.214246,0.947438,0.12813,0.891945,-0.246104,0.999249
3,h,0.88381,0.660109,-0.001744,0.999996,0.789116,0.428211,0.242215,0.999969,0.811261,...,-0.097662,0.998134,0.193621,0.811241,0.236521,0.887194,0.140698,0.890875,-0.199125,0.997404
4,h,0.165611,0.608966,-0.060661,0.998918,0.246222,0.502463,-0.225676,0.999137,0.266306,...,0.205451,0.020024,0.894425,0.894556,-0.070551,0.284873,0.880512,0.84386,0.133038,0.011646


In [24]:
correct_video_files = [os.path.join(video_data_path, "correct", file) for file in os.listdir(os.path.join(video_data_path, "correct"))]
print("Number of correct samples:", len(correct_video_files))
print(correct_video_files[0])

correct_video_df = create_video_data(correct_video_files, 10, landmarks, 'c', empty_df)
correct_video_df.head()


Number of correct samples: 1
data/plank/video/correct/correct_plank.mp4


I0000 00:00:1718081591.107095 9861974 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
W0000 00:00:1718081591.200270 9864817 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1718081591.209217 9864817 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v


In [149]:
# Concatenate the data frames
final_df = pd.concat([correct_df, low_df, high_df], ignore_index=True)
final_df

# Save the data frame
final_df.to_csv("plank_data.csv", index=False)

In [18]:
# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [22]:
video = cv2.VideoCapture("data/plank/video/high/high_plank.mp4")

fps = video.get(cv2.CAP_PROP_FPS)
print(f'FPS: {fps}')

FPS: 60.0


In [23]:
frame_number = 0
video_df = pd.DataFrame(columns=headers)

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while video.isOpened():
        ret, frame = video.read()
        
        if not ret:
            break
        
        # keypoints = []

        if frame_number % 10 == 0:
            # pose_landmarks = detector.detect(frame).pose_landmarks[0]
            results = pose.process(frame)
            # print(results.pose_landmarks)
            print(frame_number)

            keypoints = []
            
            for landmark in landmarks:
                keypoint = results.pose_landmarks.landmark[mp_pose.PoseLandmark[landmark].value]
                keypoints.append([keypoint.x, keypoint.y, keypoint.z, keypoint.visibility])

            # Insert action as the label (first column)
            keypoints = list(np.array(keypoints).flatten())
            keypoints.insert(0, 'c')
            keypoints = pd.Series(keypoints, index=video_df.columns)

            video_df = pd.concat([video_df, keypoints.to_frame().T], ignore_index=True)

        frame_number += 1
        # print(f'Frame number: {frame_number}')

video.release()

# keypoints = list(np.array(keypoints).flatten())
video_df


I0000 00:00:1718081560.979212 9861974 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
W0000 00:00:1718081561.082529 9864219 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1718081561.095570 9864219 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380


Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,c,0.895068,0.690565,0.018974,0.999848,0.800204,0.465185,0.30256,0.996815,0.818665,...,-0.18362,0.986964,0.154245,0.832613,0.298837,0.717325,0.107849,0.909471,-0.300584,0.987417
1,c,0.891157,0.689075,0.042846,0.999858,0.800201,0.461336,0.318944,0.99704,0.818647,...,-0.315155,0.987002,0.154239,0.83539,0.421889,0.721194,0.098807,0.913059,-0.426246,0.987578
2,c,0.889084,0.687469,0.042962,0.999867,0.800204,0.457981,0.323685,0.997231,0.818464,...,-0.316248,0.986951,0.154253,0.836239,0.41652,0.723713,0.098612,0.915067,-0.431316,0.987656
3,c,0.888776,0.685011,0.040019,0.999876,0.800517,0.454637,0.322045,0.997412,0.817682,...,-0.31275,0.987046,0.154351,0.838987,0.433947,0.728291,0.099154,0.91908,-0.42943,0.987894
4,c,0.888021,0.684972,0.038382,0.999885,0.80072,0.452388,0.320428,0.997574,0.817417,...,-0.315066,0.987273,0.153857,0.841624,0.427457,0.735746,0.098809,0.918944,-0.433057,0.988256
5,c,0.887698,0.684984,0.045758,0.999892,0.800777,0.452238,0.325076,0.997717,0.817436,...,-0.302119,0.987346,0.152781,0.844687,0.418092,0.739366,0.099043,0.918789,-0.415396,0.988304
6,c,0.887586,0.684852,0.050585,0.999898,0.800833,0.452022,0.326724,0.99784,0.817696,...,-0.28823,0.987371,0.152424,0.845385,0.393699,0.741917,0.099311,0.919717,-0.401829,0.988314
7,c,0.887619,0.685008,0.045727,0.999905,0.80097,0.451993,0.320389,0.997933,0.817822,...,-0.298665,0.987481,0.153166,0.845282,0.408549,0.745231,0.099051,0.921244,-0.414281,0.988461
8,c,0.888014,0.685812,0.028057,0.999911,0.801356,0.451954,0.302459,0.998012,0.817917,...,-0.329686,0.987621,0.155373,0.845423,0.440084,0.748322,0.09675,0.921282,-0.451604,0.988647
9,c,0.888221,0.686792,0.025742,0.999917,0.801591,0.451914,0.29793,0.998086,0.818033,...,-0.34396,0.98773,0.154113,0.846904,0.43126,0.753578,0.093704,0.921443,-0.465384,0.988899


In [15]:
# save_counts = 0

# # init_csv(DATASET_PATH)

# with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
#     while cap.isOpened():
#         ret, image = cap.read()

#         if not ret:
#             break

#         # Reduce size of a frame
#         # image = rescale_frame(image, 60)
#         image = cv2.flip(image, 1)

#         # Recolor image from BGR to RGB for mediapipe
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         image.flags.writeable = False

#         results = pose.process(image)

#         if not results.pose_landmarks: continue

#         # Recolor image from BGR to RGB for mediapipe
#         image.flags.writeable = True
#         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

#         # Draw landmarks and connections
#         mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

#         # Display the saved count
#         cv2.putText(image, f"Saved: {save_counts}", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 0), 2, cv2.LINE_AA)

#         cv2.imshow("CV2", image)

#         # Pressed key for action
#         k = cv2.waitKey(1) & 0xFF

#         # Press C to save as correct form
#         if k == ord('c'): 
#             # export_landmark_to_csv(DATASET_PATH, results, "C")
#             save_counts += 1
#         # Press L to save as low back
#         elif k == ord("l"):
#             # export_landmark_to_csv(DATASET_PATH, results, "L")
#             save_counts += 1
#         # Press L to save as high back
#         elif k == ord("h"):
#             # export_landmark_to_csv(DATASET_PATH, results, "H")
#             save_counts += 1

#         # Press q to stop
#         elif k == ord("q"):
#             break
#         else: continue

#     cap.release()
#     cv2.destroyAllWindows()

#     # (Optional)Fix bugs cannot close windows in MacOS (https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv)
#     for i in range (1, 5):
#         cv2.waitKey(1)