In [1]:
# Convert to input format for MMAction2
import glob
import numpy as np
import re
import pickle
from collections import defaultdict

exercise_dict = {
    'm01':'deep squat',
    'm02':'hurdle step',
    'm03':'inline lunge',
    'm04':'side lunge',
    'm05':'sit to stand',
    'm06':'standing active leg raise',
    'm07':'standing shoulder abduction',
    'm08':'standing shoulder extension',
    'm09':'standing shoulder internal-external rotation',
    'm10':'standing shoulder scaption' 
}

exercise_number_dict = {
    'm01':0,
    'm02':1,
    'm03':2,
    'm04':3,
    'm05':4,
    'm06':5,
    'm07':6,
    'm08':7,
    'm09':8,
    'm10':9 
}

exercise_number_dict_20 = {
    'm01':0,
    'm02':2,
    'm03':4,
    'm04':6,
    'm05':8,
    'm06':10,
    'm07':12,
    'm08':14,
    'm09':16,
    'm10':18 
}

# Dictionary to store data per subject
subject_data = defaultdict(list)

exercises_npy = [file for file in glob.glob("../../Scale Normalized Filtered Skeletal Data/*.npy")]
assert (len(exercises_npy) == 2000), f"length is: {len(exercises_npy)}"

for file, exercise_data in enumerate(exercises_npy):
    frame_dir = ""
    
    # Get exercise name
    exercise_name_match = re.search(r"m0[1-9]|m10", exercise_data)
    exercise_name = ""
    exercise_label_key = -1
    if exercise_name_match:
        exercise_label_key = exercise_name_match.group(0)
        exercise_name = exercise_dict.get(exercise_label_key, "Unknown exercise")
        
    else:
        continue
        # print("Key not found in the input string.")
        
    # Get identifier (frame_dir)
    identifier_match = re.search(r"_s\d{2}_e\d{2}", exercise_data)    
    identifier_name = ""
    if identifier_match:
        identifier_name = identifier_match.group(0)
    else:
        print("Pattern not found: ", exercise_name)
    
    frame_dir = exercise_name + identifier_name
    
    # Get correct or incorrect - label 0 for incorrect, 1 for correct
    incorrect_match = re.search(r"inc", exercise_data)
    if incorrect_match:
        frame_dir += "_inc"
        label = 1
    else:
        frame_dir += "_corr"
        label = 0
            
    # Prepare other values for dictionary
    exercise_npy = np.load(exercise_data) # Shape (22x3xnum_frames)
    
    total_frames = exercise_npy.shape[2]
    
    # Keypoint format requires 4d array of size [MxTxVxC] - data is currently [VxCxT]
    exercise_npy_transposed = exercise_npy.transpose(2,0,1) # Puts it in [TxVxC]
    keypoint = np.expand_dims(exercise_npy_transposed, axis=0) # Adds in M dimension
    keypoint[0,:,:,:] = 1 # M = number of persons

    print("frame_dir: ", frame_dir)    
    exercise_json = {
        'frame_dir': frame_dir,
        'label': int(label),
        'total_frames':total_frames,
        'keypoint':keypoint,
        'exercise_name':exercise_name # Extra key-value for identifying type of exercise
    }    
    
    # Extract subject ID (e.g., 's01') and store in subject_data dictionary
    subject_match = re.search(r"s\d{2}", identifier_name)
    if subject_match:
        subject_id = subject_match.group(0)
        subject_data[subject_id].append(exercise_json)
    else:
        print("Subject ID not found in:", exercise_data)


frame_dir:  deep squat_s01_e01_inc
frame_dir:  deep squat_s01_e01_corr
frame_dir:  deep squat_s01_e02_inc
frame_dir:  deep squat_s01_e02_corr
frame_dir:  deep squat_s01_e03_inc
frame_dir:  deep squat_s01_e03_corr
frame_dir:  deep squat_s01_e04_inc
frame_dir:  deep squat_s01_e04_corr
frame_dir:  deep squat_s01_e05_inc
frame_dir:  deep squat_s01_e05_corr
frame_dir:  deep squat_s01_e06_inc
frame_dir:  deep squat_s01_e06_corr
frame_dir:  deep squat_s01_e07_inc
frame_dir:  deep squat_s01_e07_corr
frame_dir:  deep squat_s01_e08_inc
frame_dir:  deep squat_s01_e08_corr
frame_dir:  deep squat_s01_e09_inc
frame_dir:  deep squat_s01_e09_corr
frame_dir:  deep squat_s01_e10_inc
frame_dir:  deep squat_s01_e10_corr
frame_dir:  deep squat_s02_e01_inc
frame_dir:  deep squat_s02_e01_corr
frame_dir:  deep squat_s02_e02_inc
frame_dir:  deep squat_s02_e02_corr
frame_dir:  deep squat_s02_e03_inc
frame_dir:  deep squat_s02_e03_corr
frame_dir:  deep squat_s02_e04_inc
frame_dir:  deep squat_s02_e04_corr
frame_

In [2]:
# Create and save LOSO splits
for subject_id, val_data in subject_data.items():
    
    # Training data is all other subjects
    train_data = [item for sid, data in subject_data.items() if sid != subject_id for item in data]
    print(len(train_data))
    
    # Define the input files for MMAction2
    loso_data = {
        "split": {
            "xsub_train": [item['frame_dir'] for item in train_data],
            "xsub_val": [item['frame_dir'] for item in val_data]
        },
        "annotations": train_data + val_data  # Include all data
    }
        
    # Save to a pickle file for each LOSO split
    with open(f'loso_split_{subject_id}.pkl', 'wb') as f:
        pickle.dump(loso_data, f)
    
    print(f"Saving pkl file LOSO split for leaving out subject {subject_id}")

print("LOSO split files with annotations saved successfully.")

1800
Saving pkl file LOSO split for leaving out subject s01
1800
Saving pkl file LOSO split for leaving out subject s02
1800
Saving pkl file LOSO split for leaving out subject s03
1800
Saving pkl file LOSO split for leaving out subject s04
1800
Saving pkl file LOSO split for leaving out subject s05
1800
Saving pkl file LOSO split for leaving out subject s06
1800
Saving pkl file LOSO split for leaving out subject s07
1800
Saving pkl file LOSO split for leaving out subject s08
1800
Saving pkl file LOSO split for leaving out subject s09
1800
Saving pkl file LOSO split for leaving out subject s10
LOSO split files with annotations saved successfully.
