In [22]:
import numpy as np
import sys
import os
from os.path import join as pjoin

def mean_variance(data_dir, save_dir, joints_num):
    file_list = os.listdir(data_dir)
    data_list = []
    expected_seq_len = None  # Track the expected sequence length
    total_files = len(file_list)
    skipped_files = 0  # Count files skipped due to NaN or mismatch

    print(f"Total files in {data_dir}: {total_files}")
    for file in file_list:
        data = np.load(pjoin(data_dir, file))
        if np.isnan(data).any():
            print(f"Skipping {file} due to NaN values")
            skipped_files += 1
            continue
        # Set expected_seq_len from the first valid file
        if expected_seq_len is None and len(data.shape) > 1:
            expected_seq_len = data.shape[1]
        # Skip if seq_len doesnâ€™t match
        if len(data.shape) > 1 and data.shape[1] != expected_seq_len:
            print(f"Skipping {file} due to shape mismatch: {data.shape[1]} vs {expected_seq_len}")
            skipped_files += 1
            continue
        data_list.append(data)

    if not data_list:
        raise ValueError("No valid files to concatenate")
    
    data = np.concatenate(data_list, axis=0)
    print(f"Concatenated data shape: {data.shape}")
    Mean = data.mean(axis=0)
    Std = data.std(axis=0)
    Std[0:1] = Std[0:1].mean() / 1.0
    Std[1:3] = Std[1:3].mean() / 1.0
    Std[3:4] = Std[3:4].mean() / 1.0
    Std[4: 4+(joints_num - 1) * 3] = Std[4: 4+(joints_num - 1) * 3].mean() / 1.0
    Std[4+(joints_num - 1) * 3: 4+(joints_num - 1) * 9] = Std[4+(joints_num - 1) * 3: 4+(joints_num - 1) * 9].mean() / 1.0
    Std[4+(joints_num - 1) * 9: 4+(joints_num - 1) * 9 + joints_num*3] = Std[4+(joints_num - 1) * 9: 4+(joints_num - 1) * 9 + joints_num*3].mean() / 1.0
    Std[4 + (joints_num - 1) * 9 + joints_num * 3: ] = Std[4 + (joints_num - 1) * 9 + joints_num * 3: ].mean() / 1.0

    assert 8 + (joints_num - 1) * 9 + joints_num * 3 == Std.shape[-1]

    np.save(pjoin(save_dir, 'Mean.npy'), Mean)
    np.save(pjoin(save_dir, 'Std.npy'), Std)

    # Calculate and print statistics
    total_readable_files = total_files - skipped_files
    readable_percentage = (total_readable_files / total_files) * 100 if total_files > 0 else 0

    print("\nSummary:")
    print(f"Total files: {total_files}")
    print(f"Total readable files: {total_readable_files}")
    print(f"Total skipped files: {skipped_files}")
    print(f"Percentage of readable files: {readable_percentage:.2f}%")

    return Mean, Std

In [23]:
# The given data is used to double check if you are on the right track.
reference1 = np.load('./HumanML3D/Mean.npy')
reference2 = np.load('./HumanML3D/Std.npy')

In [27]:
if __name__ == '__main__':
    data_dir = './HumanML3D/new_joint_vecs/'
    save_dir = './HumanML3D/'
    mean, std = mean_variance(data_dir, save_dir, 22)
    #print(mean)
    #print(std)

Total files in ./HumanML3D/new_joint_vecs/: 37040
Skipping M00234.npy due to shape mismatch: 251 vs 263
Skipping M02728.npy due to shape mismatch: 251 vs 263
Skipping M01385.npy due to shape mismatch: 251 vs 263
Skipping M03119.npy due to shape mismatch: 251 vs 263
Skipping M00893.npy due to shape mismatch: 251 vs 263
Skipping M01736.npy due to shape mismatch: 251 vs 263
Skipping 00488.npy due to shape mismatch: 251 vs 263
Skipping 02428.npy due to shape mismatch: 251 vs 263
Skipping 00191.npy due to shape mismatch: 251 vs 263
Skipping 01109.npy due to shape mismatch: 251 vs 263
Skipping 02247.npy due to shape mismatch: 251 vs 263
Skipping M03388.npy due to shape mismatch: 251 vs 263
Skipping M01179.npy due to shape mismatch: 251 vs 263
Skipping M00285.npy due to shape mismatch: 251 vs 263
Skipping M01563.npy due to shape mismatch: 251 vs 263
Skipping 00408.npy due to shape mismatch: 251 vs 263
Skipping M01012.npy due to shape mismatch: 251 vs 263
Skipping M03866.npy due to shape misma

### Check if your data is correct. If it's aligned with the given reference, then it is right

In [25]:
abs(mean-reference1).sum()

0.0

In [26]:
abs(std-reference2).sum()

0.0