In [7]:
import pickle
from pathlib import Path


def check_pkl_annotations(pkl_files):
    if isinstance(pkl_files, (str, Path)):
        pkl_files = [pkl_files]
    for pkl_path in pkl_files:
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        print(f"{Path(pkl_path).name}: annotations 개수 = {len(data['annotations'])}")

# 사용 예시 (파일명만 전달해도 동작)
check_pkl_annotations(['combined_5class.pkl'])

combined_5class.pkl: annotations 개수 = 3945


In [8]:
import pickle
import numpy as np

def check_mmaction2_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)

    # 1. 최상위 키 체크
    print(f"최상위 키: {list(data.keys())}")
    assert 'annotations' in data, "annotations 키가 없습니다."
    assert 'split' in data, "split 키가 없습니다."

    # 2. split 구조 체크
    print(f"split: {data['split']}")
    assert isinstance(data['split'], dict), "split은 dict여야 합니다."
    for k, v in data['split'].items():
        assert isinstance(v, list), f"split[{k}]은 list여야 합니다."

    # 3. annotations 구조 및 필드 체크
    annos = data['annotations']
    print(f"샘플 개수: {len(annos)}")
    required_keys = {'frame_dir', 'label', 'img_shape', 'original_shape', 'total_frames', 'keypoint', 'keypoint_score'}
    for i, a in enumerate(annos):
        keys = set(a.keys())
        if not required_keys.issubset(keys):
            print(f"[{i}] 필드 누락: {required_keys - keys}")
        # label 타입
        if not isinstance(a['label'], int):
            print(f"[{i}] label 타입 오류: {type(a['label'])}")
        # keypoint shape
        kp = np.array(a['keypoint'])
        if kp.ndim != 4 or kp.shape[2] != 17 or kp.shape[3] != 2:
            print(f"[{i}] keypoint shape 오류: {kp.shape}")
        # 프레임 개수
        if kp.shape[1] == 0:
            print(f"[{i}] 프레임 없음: {a['frame_dir']}")
        # NaN/Inf 체크
        if np.isnan(kp).any() or np.isinf(kp).any():
            print(f"[{i}] NaN/Inf 포함: {a['frame_dir']}")
        # 0-only 체크
        if (kp == 0).all():
            print(f"[{i}] 0-only keypoint: {a['frame_dir']}")

    print("✅ MMACTION2 예제 포맷 체크 완료")

# 사용 예시
if __name__ == '__main__':
    check_mmaction2_pkl('combined_5class.pkl')

최상위 키: ['split', 'annotations']
split: {'xsub_train': ['true/good/20201123_General_011_DIS_S_F20_MM_001_part04_person01_crop', 'true/best/20201123_General_004_DIS_S_M20_SS_001_part41_person01_crop', 'true/good/20201123_General_008_DIS_S_M20_MS_001_part22_person01_crop', 'true/normal/20201117_General_008_DOC_A_M40_MM_033_crop', 'false/bad/20201123_General_030_DOS_A_M40_MM_050_person01_crop', 'true/good/20201123_General_011_DIS_S_F20_MM_002_part12_person01_crop', 'false/bad/20201202_General_065_DOC_A_M40_MM_034_person02_crop', 'true/normal/20201118_General_015_DOC_A_M40_SM_031_crop', 'false/bad/20201202_General_065_DOC_A_M40_MM_014_person02_crop', 'true/good/20201123_General_008_DIS_S_M20_MS_001_129_person01_crop', 'true/normal/20201118_General_020_NOC_A_F50_MM_073_crop', 'false/bad/20201208_General_095_DOS_A_M30_MM_003_01_person01_crop', 'true/good/20201123_General_008_DIS_S_M20_MS_001_part05_person01_crop', 'true/normal/20201127_General_054_DOC_A_F30_SM_064_crop', 'false/bad/20201117_G

In [9]:
import pickle
with open(r"combined_5class.pkl", "rb") as f:
    data = pickle.load(f)
print(data['annotations'][0])  # 첫 샘플 출력

import pickle
import numpy as np

def check_nan_in_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    annos = data['annotations']
    nan_count = 0
    inf_count = 0
    for i, anno in enumerate(annos):
        kp = np.array(anno['keypoint'])
        if np.isnan(kp).any():
            print(f"[{i}] NaN 포함: {anno['frame_dir']}")
            nan_count += 1
        if np.isinf(kp).any():
            print(f"[{i}] Inf 포함: {anno['frame_dir']}")
            inf_count += 1
    print(f"총 NaN 샘플: {nan_count}, 총 Inf 샘플: {inf_count}")

# 사용 예시
check_nan_in_pkl(r"combined_5class.pkl")

zero_score_count = 0
for ann in data['annotations']:
    score = np.array(ann['keypoint_score'])
    if (score == 0).all():
        print(f"[!] All-zero keypoint_score: {ann['frame_dir']}")
        zero_score_count += 1
print(f"총 score=0 샘플: {zero_score_count}")

for ann in data['annotations']:
    if not isinstance(ann['label'], int):
        print(f"[!] label 타입 이상: {ann['frame_dir']} - {type(ann['label'])}")


{'frame_dir': 'false/bad/20201116_General_001_DOS_A_M40_MM_043_crop', 'label': 1, 'keypoint': array([[[[193.86 , 204.367],
         [202.309, 187.575],
         [177.126, 191.865],
         ...,
         [172.874, 509.207],
         [317.971, 645.789],
         [156.066, 645.851]],

        [[187.616, 208.647],
         [195.995, 193.826],
         [174.894, 198.08 ],
         ...,
         [170.733, 509.256],
         [317.947, 645.813],
         [155.974, 645.906]],

        [[183.415, 202.323],
         [193.851, 187.571],
         [172.82 , 191.854],
         ...,
         [164.496, 509.253],
         [317.948, 645.82 ],
         [151.878, 647.917]],

        ...,

        [[311.706, 118.194],
         [309.442, 111.94 ],
         [307.343, 107.741],
         ...,
         [263.325, 511.306],
         [326.229, 641.642],
         [174.913, 631.145]],

        [[313.714, 116.143],
         [309.442, 111.94 ],
         [307.358, 107.736],
         ...,
         [263.326, 511.305],
  

In [6]:
import pickle

def check_label_range(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    labels = [a['label'] for a in data['annotations']]
    labels_count = {"0":0, "1":0, "2":0, "3":0, "4":0}
    for label in labels:
        if label == 0:
            labels_count["0"] += 1
        elif label == 1:
            labels_count["1"] += 1
        elif label == 2:
            labels_count["2"] += 1
        elif label == 3:
            labels_count["3"] += 1
        elif label == 4:
            labels_count["4"] += 1
    print('라벨 값 분포:', labels_count)

check_label_range(r"combined_5class.pkl")

라벨 값 분포: {'0': 125, '1': 1255, '2': 1202, '3': 879, '4': 484}


In [11]:
import pickle

def check_min_frames(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    frames = [a['total_frames'] for a in data['annotations']]
    print('최소 프레임 수:', min(frames))
    print('100 미만 샘플 수:', sum(f < 50 for f in frames))

check_min_frames(r"combined_5class.pkl")

최소 프레임 수: 6
100 미만 샘플 수: 20


In [12]:
import pickle
with open(r"combined_5class.pkl", "rb") as f:
    data = pickle.load(f)
annos = data['annotations']  # ← 여기서 샘플 리스트 추출
for i, sample in enumerate(annos):
    kp = sample['keypoint']  # (T, V, C)
    import numpy as np
    kp = np.array(kp)
    if np.isnan(kp).any():
        print(f"NaN in keypoint at sample {i} ({sample['frame_dir']})")
    # score도 마찬가지로 확인
    score = np.array(sample['keypoint_score'])
    if np.isnan(score).any():
        print(f"NaN in keypoint_score at sample {i} ({sample['frame_dir']})")