In [6]:
import os
from collections import Counter

# Path to your dataset
root_dir = "/Users/alexeidelgado/Desktop/mpgcn-playground-scenes/data/npy"

# Collect all .npy files
all_files = [f for f in os.listdir(root_dir) if f.endswith(".npy")]

# Categorize files based on suffix
categories = []
for f in all_files:
    if f.endswith("_data.npy") and not f.endswith("_object_data.npy"):
        categories.append("pose_data")
    elif f.endswith("_object_data.npy"):
        categories.append("object_data")
    elif f.endswith("_object.npy"):
        categories.append("object_old_style")
    else:
        categories.append("other")

# Count category occurrences
counts = Counter(categories)

print("=== FILE NAMING SUMMARY ===")
for cat, count in counts.items():
    print(f"{cat:20}: {count}")
print(f"\nTotal .npy files: {len(all_files)}")

# Identify name outliers
outliers = [f for f in all_files if f not in
            [x for x in all_files if x.endswith("_data.npy") or
             x.endswith("_object_data.npy") or
             x.endswith("_object.npy")]]

if outliers:
    print("\n=== NAME OUTLIERS (unrecognized patterns) ===")
    for f in outliers[:30]:  # print only first 30
        print(" -", f)
else:
    print("\nNo outliers found — all filenames follow expected patterns.")

# Optional: show example pairs
print("\n=== EXAMPLE PAIRS (first 5) ===")
pose_examples = [f for f in all_files if f.endswith("_data.npy") and not f.endswith("_object_data.npy")]
for p in pose_examples[:5]:
    base = p.replace("_data.npy", "")
    obj = base + "_object_data.npy"
    exists = os.path.exists(os.path.join(root_dir, obj))
    print(f"{p}  -->  object file exists? {'✅' if exists else '❌'}")

=== FILE NAMING SUMMARY ===
object_data         : 1146
pose_data           : 1146

Total .npy files: 2292

No outliers found — all filenames follow expected patterns.

=== EXAMPLE PAIRS (first 5) ===
trimmed_columpioscam1-2024-11-14_19_10_18_00-00-24_to_00-00-28_840_data.npy  -->  object file exists? ✅
trimmed_columpios_cam4-2024-12-16_14-04-00_00-00-09_to_00-00-13_1583_data.npy  -->  object file exists? ✅
trimmed_columpioscam3-2024-11-13_07_28_07_00-00-08_to_00-00-12_1202_data.npy  -->  object file exists? ✅
trimmed_columpioscam1-2024-11-17_17_04_22_00-00-06_to_00-00-10_1159_data.npy  -->  object file exists? ✅
trimmed_columpios_cam4-2024-11-12_19_46_43_00-00-04_to_00-00-08_1444_data.npy  -->  object file exists? ✅


In [7]:
import os

root_dir = "/Users/alexeidelgado/Desktop/mpgcn-playground-scenes/data/npy"
pose_files = sorted([f for f in os.listdir(root_dir) if f.endswith("_data.npy") and not f.endswith("_object_data.npy")])
object_files = sorted([f for f in os.listdir(root_dir) if f.endswith("_object_data.npy")])

missing_pairs = []
for pose_file in pose_files:
    base = pose_file.replace("_data.npy", "")
    expected_object = base + "_object_data.npy"
    if expected_object not in object_files:
        # Try fuzzy match for debugging
        fuzzy = [o for o in object_files if base in o]
        missing_pairs.append((pose_file, expected_object, fuzzy))

print(f"Total pose files checked: {len(pose_files)}")
print(f"Unmatched pose files: {len(missing_pairs)}")

for p, exp, fuzz in missing_pairs[:10]:
    print("\nPose:", p)
    print("Expected:", exp)
    print("Closest matches found:", fuzz)

Total pose files checked: 1146
Unmatched pose files: 0
