Out dataset is compose of 2 classes (squats and deadlifts)
Currently we have extracted around 500+ .npy files for each class.
Separated in 2 folders: squats and deadlifts under the data/keypoints folder.
The data is extracted from the videos using the extract_keypoints.py script.
Our goal in this script is to load the data and create a dataset that can be used for training and testing.
We will combine the data from both classes into a single dataset and create labels for each class.
The labels will be 0 for squats and 1 for deadlifts.

In [1]:
import os
import numpy as np

In [2]:
def load_npy(path: str) -> np.ndarray:
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    return np.load(path, allow_pickle=True)  # allow_pickle=True for loading object arrays

In [3]:
sample = load_npy(path="data/keypoints/deadlifts/deadlift_10.npy")

In [4]:
# We want to verify the shape of the data we have extracted.
# We expect a shape of (F, J, 4) where F is the number of frames, J is the number of joints, and 4 represents the x, y, z, and visibility values.
sample.shape

(78, 33, 4)

In [5]:
sample[0].shape  # Check the shape of the first frame to confirm the number of joints and values per joint

(33, 4)

In [6]:
sample[10][0]  # Check the values of the first joint in the first frame to confirm the x, y, z, and confidence values

array([0.07330841, 1.67114449, 0.15921673, 0.99966794])

In [4]:
# Let us process every data file in each directory.
# The directory name is the class of the exercise.

# Assign numerical lables to each class
labels = { "squats": 0, "deadlifts": 1, "shoulder_press":2}

temp_samples = []  # List to hold all samples

MAX_FRAMES = 0

for exercise, label in labels.items():
    folder_path = os.path.join("data/keypoints", exercise)
    for file in os.listdir(folder_path):
        if file.endswith(".npy"):
            path = os.path.join(folder_path, file)
            sample = np.load(path)
            temp_samples.append((sample, label))
            
            print (f"Loaded {file} from {exercise} with shape {sample.shape}")
            if sample.shape[0] > MAX_FRAMES:
                MAX_FRAMES = sample.shape[0]

# Pass through the sample and pad each sample to the maximum number of frames
# Pass through the samples and pad each sample to the maximum number of frames
X = []  # Feature data
y = []  # Labels

for sample, label in temp_samples:
    # Use the sample directly - it's already a numpy array
    sample_frames = sample.shape[0]
    pad_len = MAX_FRAMES - sample_frames
    
    if pad_len > 0:
        # Make sure we use the same shape for padding
        pad = np.zeros((pad_len, sample.shape[1], sample.shape[2]))  # Preserve all dimensions
        padded_sample = np.concatenate((sample, pad), axis=0)
    else:
        padded_sample = sample
    
    X.append(padded_sample)
    y.append(label)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

print(f"Processed {len(X)} samples with {MAX_FRAMES} max frames")
print(f"X shape: {X.shape}, y shape: {y.shape}")


Loaded squat_0.npy from squats with shape (85, 33, 4)
Loaded squat_1.npy from squats with shape (85, 33, 4)
Loaded squat_10.npy from squats with shape (149, 33, 4)
Loaded squat_100.npy from squats with shape (95, 33, 4)
Loaded squat_101.npy from squats with shape (95, 33, 4)
Loaded squat_102.npy from squats with shape (80, 33, 4)
Loaded squat_103.npy from squats with shape (80, 33, 4)
Loaded squat_104.npy from squats with shape (80, 33, 4)
Loaded squat_105.npy from squats with shape (80, 33, 4)
Loaded squat_106.npy from squats with shape (80, 33, 4)
Loaded squat_107.npy from squats with shape (80, 33, 4)
Loaded squat_108.npy from squats with shape (102, 33, 4)
Loaded squat_109.npy from squats with shape (102, 33, 4)
Loaded squat_11.npy from squats with shape (149, 33, 4)
Loaded squat_110.npy from squats with shape (102, 33, 4)
Loaded squat_111.npy from squats with shape (102, 33, 4)
Loaded squat_112.npy from squats with shape (102, 33, 4)
Loaded squat_113.npy from squats with shape (10

In [5]:
final = np.savez_compressed("data/keypoints/deadlifts_squats_shoulder_press_2025-06-01.npz", X=X, y=y)

In [6]:
# test the saved file
loaded = np.load("data/keypoints/deadlifts_squats_shoulder_press_2025-06-01.npz")

loaded["X"].shape, loaded["y"].shape  # Check the shapes of the loaded data

((1962, 331, 33, 4), (1962,))