In [2]:
import pickle
from pathlib import Path

import os
# 작업 디렉토리를 crop_pkl로 변경
ROOT = r"D:\golfDataset\dataset\crop_pkl"
os.chdir(ROOT)

def check_pkl_annotations(pkl_files):
    if isinstance(pkl_files, (str, Path)):
        pkl_files = [pkl_files]
    for pkl_path in pkl_files:
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        print(f"{Path(pkl_path).name}: annotations 개수 = {len(data['annotations'])}")

# 사용 예시 (파일명만 전달해도 동작)
check_pkl_annotations(['skeleton_dataset_train.pkl'])
check_pkl_annotations(['skeleton_dataset_valid.pkl'])

skeleton_dataset_train.pkl: annotations 개수 = 1457
skeleton_dataset_valid.pkl: annotations 개수 = 164


In [10]:
import pickle
import numpy as np

def check_mmaction2_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)

    # 1. 최상위 키 체크
    print(f"최상위 키: {list(data.keys())}")
    assert 'annotations' in data, "annotations 키가 없습니다."
    assert 'split' in data, "split 키가 없습니다."

    # 2. split 구조 체크
    print(f"split: {data['split']}")
    assert isinstance(data['split'], dict), "split은 dict여야 합니다."
    for k, v in data['split'].items():
        assert isinstance(v, list), f"split[{k}]은 list여야 합니다."

    # 3. annotations 구조 및 필드 체크
    annos = data['annotations']
    print(f"샘플 개수: {len(annos)}")
    required_keys = {'frame_dir', 'label', 'img_shape', 'original_shape', 'total_frames', 'keypoint', 'keypoint_score'}
    for i, a in enumerate(annos):
        keys = set(a.keys())
        if not required_keys.issubset(keys):
            print(f"[{i}] 필드 누락: {required_keys - keys}")
        # label 타입
        if not isinstance(a['label'], int):
            print(f"[{i}] label 타입 오류: {type(a['label'])}")
        # keypoint shape
        kp = np.array(a['keypoint'])
        if kp.ndim != 4 or kp.shape[2] != 17 or kp.shape[3] != 2:
            print(f"[{i}] keypoint shape 오류: {kp.shape}")
        # 프레임 개수
        if kp.shape[1] == 0:
            print(f"[{i}] 프레임 없음: {a['frame_dir']}")
        # NaN/Inf 체크
        if np.isnan(kp).any() or np.isinf(kp).any():
            print(f"[{i}] NaN/Inf 포함: {a['frame_dir']}")
        # 0-only 체크
        if (kp == 0).all():
            print(f"[{i}] 0-only keypoint: {a['frame_dir']}")

    print("✅ MMACTION2 예제 포맷 체크 완료")

# 사용 예시
if __name__ == '__main__':
    check_mmaction2_pkl('D:\\golfDataset\\dataset\\crop_pkl\\train_unnorm.pkl')

최상위 키: ['split', 'annotations']
split: {'xsub_train': ['20201117_General_007_DOC_A_M40_MM_071_crop', '20201120_General_024_DOC_A_M40_MM_001_crop', '20201123_General_032_DOS_A_F30_MM_004_crop', '20201123_General_032_DOS_A_F30_MM_064_crop', '20201118_General_016_DOC_A_F30_SM_009_crop', '20201118_General_019_DOC_A_M30_MM_027_crop', '20201118_General_020_DOC_A_F50_MM_059_crop', '20201116_General_002_DOS_A_F40_MM_049_crop', '20201202_General_068_DOS_A_M40_MM_025_crop', '20201127_General_054_DOC_A_F30_SM_060_crop', '20201130_General_058_DOS_A_M30_MM_059_crop', '20201116_General_004_DOS_A_F30_MM_015_crop', '20201117_General_009_DOC_A_M40_SM_029_crop', '20201120_General_027_DOC_A_M40_MM_049_crop', '20201117_General_005_DOC_A_M40_MM_007_crop', '20201116_General_003_DOS_A_F30_MM_046_crop', '20201124_General_033_DOS_A_M40_MM_018_crop', '20201120_General_022_DOC_A_M40_MS_067_crop', '20201123_General_030_DOS_A_M40_MM_029_crop', '20201118_General_020_DOC_A_F50_MM_063_crop', '20201117_General_005_DOC

In [9]:
import pickle
with open(r"D:\golfDataset\dataset\crop_pkl\train.pkl", "rb") as f:
    data = pickle.load(f)
print(data['annotations'][0])  # 첫 샘플 출력

import pickle
import numpy as np

def check_nan_in_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    annos = data['annotations']
    nan_count = 0
    inf_count = 0
    for i, anno in enumerate(annos):
        kp = np.array(anno['keypoint'])
        if np.isnan(kp).any():
            print(f"[{i}] NaN 포함: {anno['frame_dir']}")
            nan_count += 1
        if np.isinf(kp).any():
            print(f"[{i}] Inf 포함: {anno['frame_dir']}")
            inf_count += 1
    print(f"총 NaN 샘플: {nan_count}, 총 Inf 샘플: {inf_count}")

# 사용 예시
check_nan_in_pkl(r"D:\golfDataset\dataset\crop_pkl\train.pkl")
check_nan_in_pkl(r"D:\golfDataset\dataset\crop_pkl\test.pkl")
check_nan_in_pkl(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl")

zero_score_count = 0
for ann in data['annotations']:
    score = np.array(ann['keypoint_score'])
    if (score == 0).all():
        print(f"[!] All-zero keypoint_score: {ann['frame_dir']}")
        zero_score_count += 1
print(f"총 score=0 샘플: {zero_score_count}")

for ann in data['annotations']:
    if not isinstance(ann['label'], int):
        print(f"[!] label 타입 이상: {ann['frame_dir']} - {type(ann['label'])}")


{'total_frames': 192, 'img_shape': (1080, 1920), 'original_shape': (1080, 1920), 'keypoint': array([[[[0.11664688, 0.19081852],
         [0.        , 0.        ],
         [0.11664844, 0.17751203],
         ...,
         [0.03006073, 0.38776112],
         [0.03283948, 0.4819861 ],
         [0.01702531, 0.51017225]],

        [[0.11664636, 0.19242778],
         [0.        , 0.        ],
         [0.11664896, 0.1775787 ],
         ...,
         [0.03005922, 0.3877583 ],
         [0.03283969, 0.4804352 ],
         [0.01701505, 0.5101546 ]],

        [[0.11664584, 0.19412036],
         [0.        , 0.        ],
         [0.11665104, 0.17922036],
         ...,
         [0.03006151, 0.38774166],
         [0.03284   , 0.4804361 ],
         [0.01701489, 0.51014996]],

        ...,

        [[0.        , 0.        ],
         [0.        , 0.        ],
         [0.        , 0.        ],
         ...,
         [0.05616979, 0.37787408],
         [0.02634276, 0.48376945],
         [0.05427083, 0.47

In [6]:
import pickle

def check_label_range(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    labels = [a['label'] for a in data['annotations']]
    print('라벨 값 분포:', set(labels))
    if any(l not in [0, 1] for l in labels):
        print('[!] 0/1 이외의 라벨이 존재합니다!')
    else:
        print('모든 라벨이 0/1 입니다.')

check_label_range(r"D:\golfDataset\dataset\crop_pkl\train.pkl")
check_label_range(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl")

라벨 값 분포: {0, 1}
모든 라벨이 0/1 입니다.
라벨 값 분포: {0, 1}
모든 라벨이 0/1 입니다.


In [7]:
import pickle

def check_min_frames(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    frames = [a['total_frames'] for a in data['annotations']]
    print('최소 프레임 수:', min(frames))
    print('100 미만 샘플 수:', sum(f < 50 for f in frames))

check_min_frames(r"D:\golfDataset\dataset\crop_pkl\train.pkl")
check_min_frames(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl")

최소 프레임 수: 50
100 미만 샘플 수: 0
최소 프레임 수: 50
100 미만 샘플 수: 0


In [None]:
import pickle
with open(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl", "rb") as f:
    data = pickle.load(f)
annos = data['annotations']  # ← 여기서 샘플 리스트 추출
for i, sample in enumerate(annos):
    kp = sample['keypoint']  # (T, V, C)
    import numpy as np
    kp = np.array(kp)
    if np.isnan(kp).any():
        print(f"NaN in keypoint at sample {i} ({sample['frame_dir']})")
    # score도 마찬가지로 확인
    score = np.array(sample['keypoint_score'])
    if np.isnan(score).any():
        print(f"NaN in keypoint_score at sample {i} ({sample['frame_dir']})")