## Code to convert skeleton files to numpy arrays:

In [1]:
from pathlib import Path
import os
import numpy as np
import json

def parse_skeleton_file_to_dict(file_path, max_body=4, njoints=25):
    with open(file_path, 'r') as f:
        datas = f.readlines()

    nframe = int(datas[0])
    bodymat = {
        'file_name': os.path.basename(file_path)[-29:-9],
        'nbodys': [],
        'njoints': njoints
    }

    for body in range(max_body):
        bodymat[f'skel_body{body}'] = np.zeros((nframe, njoints, 3))
        bodymat[f'depth_body{body}'] = np.zeros((nframe, njoints, 2))
        bodymat[f'rgb_body{body}'] = np.zeros((nframe, njoints, 2))

    cursor = 0
    for frame in range(nframe):
        cursor += 1
        bodycount = int(datas[cursor])
        if bodycount == 0:
            continue

        bodymat['nbodys'].append(bodycount)
        for body in range(bodycount):
            cursor += 1  # body header
            cursor += 1  # skip body metadata line
            njoints = int(datas[cursor])
            for joint in range(njoints):
                cursor += 1
                jointinfo = list(map(float, datas[cursor].split()))
                bodymat[f'skel_body{body}'][frame, joint] = jointinfo[0:3]
                bodymat[f'depth_body{body}'][frame, joint] = jointinfo[3:5]
                bodymat[f'rgb_body{body}'][frame, joint] = jointinfo[5:7]

    # Prune bodies that don't appear
    for body in range(max_body):
        if body >= max(bodymat['nbodys'], default=0):
            del bodymat[f'skel_body{body}']
            del bodymat[f'depth_body{body}']
            del bodymat[f'rgb_body{body}']

    return bodymat

def convert_skeletons_and_create_metadata(input_folder, output_folder, metadata_path, missing_file_list=None, max_files=None):
    input_path = Path(input_folder)
    output_path = Path(output_folder)
    output_path.mkdir(parents=True, exist_ok=True)

    if missing_file_list:
        with open(missing_file_list, 'r') as f:
            missing = {line.strip() for line in f.readlines()}
    else:
        missing = set()

    metadata = {}
    file_count = 0

    for file in sorted(os.listdir(input_path)):
        if not file.endswith('.skeleton'):
            continue

        file_id = file[:20]
        if file_id in missing:
            print(f"Skipping missing: {file}")
            continue

        if max_files is not None and file_count >= max_files:
            break

        out_file = f"{file}.npy"
        full_path = input_path / file
        save_path = output_path / out_file

        if save_path.exists():
            print(f"Skipping existing: {file}")
            continue

        data = parse_skeleton_file_to_dict(full_path)
        np.save(save_path, data)

        try:
            label = int(file.split('A')[1][:3]) - 1
            metadata[out_file] = label
        except Exception as e:
            print(f"Could not parse label for {file}: {e}")

        file_count += 1

    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)

    return f"{file_count} files converted and metadata saved to {metadata_path}"


## Now parse all files and assign labels (based on action ID in filename):

In [2]:
input_folder = "../data/"  # e.g., "../data/raw"
output_folder = "../parsed_data/"

convert_skeletons_and_create_metadata(
    input_folder,
    output_folder,
    metadata_path="../parsed_data/metadata.json",
    missing_file_list="./NTU_RGBD120_samples_with_missing_skeletons.txt"
)



Skipping missing: S001C002P005R002A008.skeleton
Skipping missing: S001C002P006R001A008.skeleton
Skipping missing: S001C003P002R001A055.skeleton
Skipping missing: S001C003P002R002A012.skeleton
Skipping missing: S001C003P005R002A004.skeleton
Skipping missing: S001C003P005R002A005.skeleton
Skipping missing: S001C003P005R002A006.skeleton
Skipping missing: S001C003P006R002A008.skeleton
Skipping missing: S002C002P011R002A030.skeleton
Skipping missing: S002C003P008R001A020.skeleton
Skipping missing: S002C003P010R002A010.skeleton
Skipping missing: S002C003P011R002A007.skeleton
Skipping missing: S002C003P011R002A011.skeleton
Skipping missing: S002C003P014R002A007.skeleton
Skipping missing: S003C001P019R001A055.skeleton
Skipping missing: S003C002P002R002A055.skeleton
Skipping missing: S003C002P018R002A055.skeleton
Skipping missing: S003C003P002R001A055.skeleton
Skipping missing: S003C003P016R001A055.skeleton
Skipping missing: S003C003P018R002A024.skeleton
Skipping missing: S004C002P003R001A013.s

'56578 files converted and metadata saved to ../parsed_data/metadata.json'