In [97]:
import pickle
import pprint
from pathlib import Path
from collections import defaultdict
import random
import numpy as np

# 1) pkl 파일 경로
CUR_DIR = Path.cwd()
CHOICE_DIR = CUR_DIR / "crop_pkl"
pkl_files = list(CHOICE_DIR.glob("*.pkl"))

# --- (A) 랜덤 하나 선택해서 상세 정보 출력 ---
pkl_path = random.choice(pkl_files)
print(f"Selected pkl file: {pkl_path}")

with open(pkl_path, 'rb') as f:
    data = pickle.load(f)

print("Top-level keys:")
pprint.pprint(list(data.keys()))

print("\nsplit keys and lengths:")
for split_name, vids in data['split'].items():
    print(f"  {split_name}: {len(vids)} videos")

print("\nannotation[0] keys and types/shapes:")
sample = data['annotations'][0]
for k, v in sample.items():
    if hasattr(v, 'shape'):
        print(f"  {k}: numpy array, shape={v.shape}")
    else:
        print(f"  {k}: {type(v)}")

# --- 클래스(label) 정보 출력 ---
if 'label' in sample:
    print(f"\nlabel type: {type(sample['label'])}, value: {sample['label']}")

if 'keypoint' in sample:
    print("\nkeypoint dtype, min/max:", sample['keypoint'].dtype,
          sample['keypoint'].min(), sample['keypoint'].max())
if 'keypoint_score' in sample:
    print("keypoint_score dtype, min/max:", sample['keypoint_score'].dtype,
          sample['keypoint_score'].min(), sample['keypoint_score'].max())

# --- (B) 전체 PKL 디렉터리 요약 정보 출력 ---
print("\n=== 전체 PKL 데이터 요약 ===")
total_pkls = len(pkl_files)
print(f"총 PKL 파일 수: {total_pkls}")

total_samples = 0
split_counts = defaultdict(int)
all_frame_counts = []
label_counts = defaultdict(int)

for path in pkl_files:
    with open(path, 'rb') as f:
        d = pickle.load(f)
    # 샘플 수 집계
    n_ann = len(d['annotations'])
    total_samples += n_ann
    # split 별 개수 집계
    for split_name, vids in d['split'].items():
        split_counts[split_name] += len(vids)
    # 프레임 길이 & class 수집
    for ann in d['annotations']:
        all_frame_counts.append(ann['total_frames'])
        # label 집계
        label = ann.get('label', None)
        if label is not None:
            label_counts[label] += 1

print(f"총 샘플 수 (annotations): {total_samples}")
for split_name, count in split_counts.items():
    print(f"  {split_name}: {count} samples")

# 클래스 분포
print("\n클래스(label) 분포:")
for lbl, cnt in sorted(label_counts.items()):
    print(f"  Label {lbl}: {cnt} samples")
    
unique_labels = sorted(label_counts.keys())
print(f"\n총 클래스 수: {len(unique_labels)}")
print(f"클래스 레이블 목록: {unique_labels}")

# 프레임 길이 통계
all_frame_counts = np.array(all_frame_counts)
print("\n프레임 길이 통계:")
print(f"  최소: {all_frame_counts.min()} 프레임")
print(f"  최대: {all_frame_counts.max()} 프레임")
print(f"  평균: {all_frame_counts.mean():.1f} 프레임")
print(f"  중앙값: {np.median(all_frame_counts)} 프레임")


Selected pkl file: d:\golfDataset\스포츠 사람 동작 영상(골프)\Training\Public\male\train\crop_pkl\skeleton_dataset_90_10.pkl
Top-level keys:
['split', 'annotations']

split keys and lengths:
  xsub_train: 392 videos
  xsub_val: 44 videos

annotation[0] keys and types/shapes:
  total_frames: <class 'int'>
  img_shape: <class 'tuple'>
  original_shape: <class 'tuple'>
  keypoint: numpy array, shape=(1, 474, 17, 2)
  keypoint_score: numpy array, shape=(1, 474, 17)
  frame_dir: <class 'str'>
  label: <class 'int'>

label type: <class 'int'>, value: 1

keypoint dtype, min/max: float32 0.0 0.7360296
keypoint_score dtype, min/max: float32 0.0 0.944335

=== 전체 PKL 데이터 요약 ===
총 PKL 파일 수: 1
총 샘플 수 (annotations): 436
  xsub_train: 392 samples
  xsub_val: 44 samples

클래스(label) 분포:
  Label 0: 218 samples
  Label 1: 218 samples

총 클래스 수: 2
클래스 레이블 목록: [0, 1]

프레임 길이 통계:
  최소: 19 프레임
  최대: 702 프레임
  평균: 198.6 프레임
  중앙값: 188.0 프레임


In [6]:
import pickle, numpy as np
from pathlib import Path

pkl = 'crop_pkl/skeleton_dataset_90_10.pkl'
data = pickle.load(open(pkl,'rb'))

for ann in data['annotations']:
    kp = ann['keypoint']
    scores = ann['keypoint_score']
    if np.isnan(kp).any() or np.isinf(kp).any():
        print(f"NaN/Inf in keypoint: {ann['frame_dir']}")
    if np.isnan(scores).any() or np.isinf(scores).any():
        print(f"NaN/Inf in score: {ann['frame_dir']}")
