In [15]:
import pickle
from pathlib import Path


def check_pkl_annotations(pkl_files):
    if isinstance(pkl_files, (str, Path)):
        pkl_files = [pkl_files]
    for pkl_path in pkl_files:
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        print(f"{Path(pkl_path).name}: annotations 개수 = {len(data['annotations'])}")

# 사용 예시 (파일명만 전달해도 동작)

PKL = "combined_5class_test.pkl"
check_pkl_annotations([PKL])

combined_5class_test.pkl: annotations 개수 = 1115


In [16]:
import pickle
import numpy as np

def check_mmaction2_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)

    # 1. 최상위 키 체크
    print(f"최상위 키: {list(data.keys())}")
    assert 'annotations' in data, "annotations 키가 없습니다."
    assert 'split' in data, "split 키가 없습니다."

    # 2. split 구조 체크
    print(f"split: {data['split']}")
    assert isinstance(data['split'], dict), "split은 dict여야 합니다."
    for k, v in data['split'].items():
        assert isinstance(v, list), f"split[{k}]은 list여야 합니다."

    # 3. annotations 구조 및 필드 체크
    annos = data['annotations']
    print(f"샘플 개수: {len(annos)}")
    required_keys = {'frame_dir', 'label', 'img_shape', 'original_shape', 'total_frames', 'keypoint', 'keypoint_score'}
    for i, a in enumerate(annos):
        keys = set(a.keys())
        if not required_keys.issubset(keys):
            print(f"[{i}] 필드 누락: {required_keys - keys}")
        # label 타입
        if not isinstance(a['label'], int):
            print(f"[{i}] label 타입 오류: {type(a['label'])}")
        # keypoint shape
        kp = np.array(a['keypoint'])
        if kp.ndim != 4 or kp.shape[2] != 17 or kp.shape[3] != 2:
            print(f"[{i}] keypoint shape 오류: {kp.shape}")
        # 프레임 개수
        if kp.shape[1] == 0:
            print(f"[{i}] 프레임 없음: {a['frame_dir']}")
        # NaN/Inf 체크
        if np.isnan(kp).any() or np.isinf(kp).any():
            print(f"[{i}] NaN/Inf 포함: {a['frame_dir']}")
        # 0-only 체크
        if (kp == 0).all():
            print(f"[{i}] 0-only keypoint: {a['frame_dir']}")

    print("✅ MMACTION2 예제 포맷 체크 완료")

# 사용 예시
if __name__ == '__main__':
    check_mmaction2_pkl(PKL)

최상위 키: ['split', 'annotations']
split: {'xsub_val': ['false/bad/20201210_General_112_DOC_A_M20_MM_001_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_002_person03_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_005_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_008_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_009_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_010_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_011_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_012_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_014_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_015_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_018_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_020_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_021_person01_crop', 'false/bad/20201210_General_112_DOC_A_M20_MM_022_person01_crop', 'false/bad/20201210_General_112_DOC_A

In [17]:
import pickle
with open(PKL, "rb") as f:
    data = pickle.load(f)
print(data['annotations'][0])  # 첫 샘플 출력

import pickle
import numpy as np

def check_nan_in_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    annos = data['annotations']
    nan_count = 0
    inf_count = 0
    for i, anno in enumerate(annos):
        kp = np.array(anno['keypoint'])
        if np.isnan(kp).any():
            print(f"[{i}] NaN 포함: {anno['frame_dir']}")
            nan_count += 1
        if np.isinf(kp).any():
            print(f"[{i}] Inf 포함: {anno['frame_dir']}")
            inf_count += 1
    print(f"총 NaN 샘플: {nan_count}, 총 Inf 샘플: {inf_count}")

# 사용 예시
check_nan_in_pkl(PKL)

zero_score_count = 0
for ann in data['annotations']:
    score = np.array(ann['keypoint_score'])
    if (score == 0).all():
        print(f"[!] All-zero keypoint_score: {ann['frame_dir']}")
        zero_score_count += 1
print(f"총 score=0 샘플: {zero_score_count}")

for ann in data['annotations']:
    if not isinstance(ann['label'], int):
        print(f"[!] label 타입 이상: {ann['frame_dir']} - {type(ann['label'])}")


{'frame_dir': 'false/bad/20201210_General_112_DOC_A_M20_MM_001_person01_crop', 'label': 1, 'keypoint': array([[[[0.10651094, 0.15757778],
         [0.11213385, 0.15036297],
         [0.10164948, 0.14894167],
         ...,
         [0.07810208, 0.3507028 ],
         [0.13404219, 0.4314963 ],
         [0.06840677, 0.4300324 ]],

        [[0.10651042, 0.15757407],
         [0.1121349 , 0.15035833],
         [0.10164896, 0.14894074],
         ...,
         [0.07810156, 0.35069352],
         [0.1340401 , 0.43149537],
         [0.06840521, 0.43003705]],

        [[0.10403646, 0.15760371],
         [0.10810625, 0.15039723],
         [0.10000157, 0.15180278],
         ...,
         [0.07809531, 0.34930834],
         [0.13404115, 0.43151295],
         [0.0683875 , 0.43140554]],

        ...,

        [[0.11212032, 0.09703889],
         [0.10651042, 0.10998426],
         [0.10729688, 0.09128305],
         ...,
         [0.08217604, 0.36376852],
         [0.12675208, 0.4401278 ],
         [0.0489

In [10]:
import pickle

def check_label_range(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    labels = [a['label'] for a in data['annotations']]
    labels_count = {"0":0, "1":0, "2":0, "3":0, "4":0}
    for label in labels:
        if label == 0:
            labels_count["0"] += 1
        elif label == 1:
            labels_count["1"] += 1
        elif label == 2:
            labels_count["2"] += 1
        elif label == 3:
            labels_count["3"] += 1
        elif label == 4:
            labels_count["4"] += 1
    print('라벨 값 분포:', labels_count)

check_label_range(PKL)

라벨 값 분포: {'0': 48, '1': 51, '2': 491, '3': 51, '4': 474}


In [12]:
import pickle

def check_min_frames(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    frames = [a['total_frames'] for a in data['annotations']]
    print('최소 프레임 수:', min(frames))
    print('100 미만 샘플 수:', sum(f < 100 for f in frames))

check_min_frames(PKL)

최소 프레임 수: 51
100 미만 샘플 수: 271


In [14]:
import pickle
with open(PKL, "rb") as f:
    data = pickle.load(f)
annos = data['annotations']  # ← 여기서 샘플 리스트 추출
for i, sample in enumerate(annos):
    kp = sample['keypoint']  # (T, V, C)
    import numpy as np
    kp = np.array(kp)
    if np.isnan(kp).any():
        print(f"NaN in keypoint at sample {i} ({sample['frame_dir']})")
    # score도 마찬가지로 확인
    score = np.array(sample['keypoint_score'])
    if np.isnan(score).any():
        print(f"NaN in keypoint_score at sample {i} ({sample['frame_dir']})")