In [1]:
import numpy as np
from pathlib import Path

# 각 split별 파일 경로 지정 (수정해서 사용)
splits = ['train', 'valid', 'test']
base = Path(r"D:\golfDataset\dataset\crop_pkl")  # 파일이 있는 디렉토리
for split in splits:
    npy_path = base / f'skeleton_dataset_{split}_ids.npy'
    print(f"\n[+] {npy_path.name}")
    ids = np.load(npy_path, allow_pickle=True)
    print(f"- shape: {ids.shape}")
    print(f"- dtype: {ids.dtype}")
    print(f"- example (앞/뒤 3개): {ids[:3].tolist()} ... {ids[-3:].tolist()}")
    # 중복 체크
    print(f"- unique IDs: {len(set(ids))} / {len(ids)}")
    # None or 빈 값 체크
    nulls = [i for i, v in enumerate(ids) if v is None or str(v).strip() == ""]
    if nulls:
        print(f"  ⚠️ None/empty ID found at idx: {nulls[:10]} (총 {len(nulls)}개)")
    else:
        print("  ✅ 모든 ID가 정상")

# 전체 교집합/차집합 분석 (train-valid-test)
ids_train = np.load(base / 'skeleton_dataset_train_ids.npy', allow_pickle=True)
ids_valid = np.load(base / 'skeleton_dataset_valid_ids.npy', allow_pickle=True)
ids_test  = np.load(base / 'skeleton_dataset_test_ids.npy', allow_pickle=True)

set_train = set(ids_train)
set_valid = set(ids_valid)
set_test  = set(ids_test)
print("\n--- Cross check ---")
print(f"train ∩ valid : {len(set_train & set_valid)}")
print(f"train ∩ test  : {len(set_train & set_test)}")
print(f"valid ∩ test  : {len(set_valid & set_test)}")

all_ids = set_train | set_valid | set_test
print(f"전체 합집합(중복 제거): {len(all_ids)}")



[+] skeleton_dataset_train_ids.npy
- shape: (1026,)
- dtype: <U42
- example (앞/뒤 3개): ['20201201_General_061_DOS_A_M40_MM_052_crop', '20201203_General_071_DOS_A_M40_MM_032_crop', '20201202_General_068_DOS_A_M40_MM_050_crop'] ... ['20201117_General_012_NOC_A_M40_MM_056_crop', '20201120_General_028_NOC_A_M40_MM_072_crop', '20201123_General_029_DOS_A_M40_MM_057_crop']
- unique IDs: 1026 / 1026
  ✅ 모든 ID가 정상

[+] skeleton_dataset_valid_ids.npy
- shape: (127,)
- dtype: <U42
- example (앞/뒤 3개): ['20201203_General_069_DOS_A_M40_MM_016_crop', '20201202_General_065_DOC_A_M40_MM_067_crop', '20201117_General_005_DOC_A_M40_MM_021_crop'] ... ['20201211_General_120_DOC_A_M30_MM_003_crop', '20201202_General_066_DOC_A_M40_MM_066_crop', '20201126_General_048_DOS_A_M30_MM_041_crop']
- unique IDs: 127 / 127
  ✅ 모든 ID가 정상

[+] skeleton_dataset_test_ids.npy
- shape: (130,)
- dtype: <U42
- example (앞/뒤 3개): ['20201118_General_017_DOR_A_M40_MM_053_crop', '20201130_General_058_DOS_A_M30_MM_039_crop', '202011