In [None]:
# 두 pkl 파일의 키포인트 분포를 비교하는 코드
import pickle
from pathlib import Path
import numpy as np

def load_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        return pickle.load(f)

def summarize_annotation(ann):
    return {
        'frame_dir': ann.get('frame_dir'),
        'label': ann.get('label'),
        'keypoint_shape': np.array(ann.get('keypoint')).shape if 'keypoint' in ann else None,
        'keypoint_score_shape': np.array(ann.get('keypoint_score')).shape if 'keypoint_score' in ann else None,
        'total_frames': ann.get('total_frames'),
    }

def compare_annotations(ann1, ann2, idx):
    summary1 = summarize_annotation(ann1)
    summary2 = summarize_annotation(ann2)

    diffs = {}
    for k in summary1:
        if summary1[k] != summary2[k]:
            diffs[k] = (summary1[k], summary2[k])
    if diffs:
        print(f"[!] Difference in annotation {idx} (frame_dir: {ann1.get('frame_dir')}):")
        for k, (v1, v2) in diffs.items():
            print(f"    ▸ {k}: {v1} ≠ {v2}")

def compare_pkls(pkl1_path, pkl2_path):
    pkl1 = load_pkl(pkl1_path)
    pkl2 = load_pkl(pkl2_path)

    # Check top-level keys
    print("🔍 Top-level keys comparison:")
    keys1 = set(pkl1.keys())
    keys2 = set(pkl2.keys())
    print(" - Only in pkl1:", keys1 - keys2)
    print(" - Only in pkl2:", keys2 - keys1)
    print()

    # Check split differences
    if 'split' in pkl1 and 'split' in pkl2:
        print("📎 Split key comparison:")
        for split_name in ['xsub_train', 'xsub_val']:
            s1 = set(pkl1['split'].get(split_name, []))
            s2 = set(pkl2['split'].get(split_name, []))
            diff = s1.symmetric_difference(s2)
            if diff:
                print(f" - Difference in split '{split_name}': {diff}")
        print()

    # Compare annotations
    ann1_list = pkl1['annotations']
    ann2_list = pkl2['annotations']

    print(f"📊 Number of annotations: pkl1={len(ann1_list)}, pkl2={len(ann2_list)}")

    min_len = min(len(ann1_list), len(ann2_list))
    for i in range(min_len):
        compare_annotations(ann1_list[i], ann2_list[i], i)

    # Optional: extra entries
    if len(ann1_list) != len(ann2_list):
        print("\n⚠️ Different number of annotations:")
        if len(ann1_list) > len(ann2_list):
            print(" - Extra annotations in pkl1:")
            for i in range(len(ann2_list), len(ann1_list)):
                print(f"    {ann1_list[i].get('frame_dir')}")
        else:
            print(" - Extra annotations in pkl2:")
            for i in range(len(ann1_list), len(ann2_list)):
                print(f"    {ann2_list[i].get('frame_dir')}")

# ──────────────────────────────
# 사용 예시 (파일 경로 수정)
# ──────────────────────────────
PKL_PATH_1 = Path(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl")     # MMAction2 예시
PKL_PATH_2 = Path(r"D:\golfDataset\dataset\crop_pkl\train.pkl")  # 너가 만든 것

compare_pkls(PKL_PATH_1, PKL_PATH_2)


🔍 Top-level keys comparison:
 - Only in pkl1: set()
 - Only in pkl2: set()

📎 Split key comparison:
 - Difference in split 'xsub_train': {'S014C001P017R002A009', '20201117_General_006_DOC_A_M40_BS_021_crop', 'S008C001P034R002A033', 'S012C001P018R002A015', 'S007C002P015R002A032', 'S007C001P018R001A013', 'S008C002P035R001A039', 'S006C003P015R002A024', 'S007C003P027R002A050', 'S013C001P017R002A052', 'S006C003P001R001A006', 'S003C002P016R002A003', 'S010C002P015R001A030', 'S014C003P008R002A010', 'S012C001P018R001A052', 'S007C002P016R002A037', 'S008C002P025R002A041', 'S002C001P008R002A052', 'S016C002P025R001A014', 'S005C002P018R002A060', 'S012C003P016R002A003', 'S012C003P025R001A051', 'S007C001P028R002A024', 'S008C001P035R002A008', 'S011C003P027R001A057', 'S002C003P008R002A012', 'S011C003P019R002A060', 'S012C001P019R001A055', 'S015C001P008R001A014', 'S007C001P001R002A051', 'S007C001P008R001A023', 'S007C003P017R002A060', 'S016C001P025R002A020', 'S001C003P004R002A022', 'S010C002P025R001A043', 

In [16]:
import pickle
import numpy as np
from pathlib import Path

def load_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        return pickle.load(f)

def keypoint_stats(annotations):
    # 모든 keypoint를 (N, T, V, 2) → (all, 2)로 합침
    kp_all = []
    nan_count = 0
    inf_count = 0
    for ann in annotations:
        kp = np.array(ann['keypoint'])
        # NaN/Inf 체크
        nan_count += np.isnan(kp).sum()
        inf_count += np.isinf(kp).sum()
        kp_all.append(kp.reshape(-1, 2))
    kp_all = np.concatenate(kp_all, axis=0)
    stats = {
        'min': np.min(kp_all, axis=0),
        'max': np.max(kp_all, axis=0),
        'mean': np.mean(kp_all, axis=0),
        'std': np.std(kp_all, axis=0),
        'zero_ratio': np.mean(kp_all == 0, axis=0),
        'nan_count': nan_count,
        'inf_count': inf_count,
        'total': kp_all.shape[0] * kp_all.shape[1]
    }
    return stats

def print_stats(stats, name):
    print(f"📊 {name} keypoint 분포:")
    print(f"  min:  {stats['min']}")
    print(f"  max:  {stats['max']}")
    print(f"  mean: {stats['mean']}")
    print(f"  std:  {stats['std']}")
    print(f"  zero_ratio: {stats['zero_ratio']}")
    print(f"  NaN 개수: {stats['nan_count']} / {stats['total']}")
    print(f"  Inf 개수: {stats['inf_count']} / {stats['total']}")
    print()

def compare_keypoint_distribution(pkl1_path, pkl2_path, pkl3_path=None):
    pkl1 = load_pkl(pkl1_path)
    pkl2 = load_pkl(pkl2_path)
    stats1 = keypoint_stats(pkl1['annotations'])
    stats2 = keypoint_stats(pkl2['annotations'])
    print_stats(stats1, f"{Path(pkl1_path).name}")
    print_stats(stats2, f"{Path(pkl2_path).name}")
    if pkl3_path is not None:
        pkl3 = load_pkl(pkl3_path)
        stats3 = keypoint_stats(pkl3['annotations'])
        print_stats(stats3, f"{Path(pkl3_path).name}")

# 사용 예시
PKL_PATH_1 = Path(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl")     # MMAction2 예시
PKL_PATH_2 = Path(r"D:\golfDataset\dataset\crop_pkl\train.pkl")        # 너가 만든 것
PKL_PATH_3 = Path(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl")

compare_keypoint_distribution(PKL_PATH_1, PKL_PATH_2, PKL_PATH_3)

📊 ntu60_2d.pkl keypoint 분포:
  min:  [-389.25 -149.75]
  max:  [2274. 1674.]
  mean: [367.29764 188.13676]
  std:  [441.61224 314.72678]
  zero_ratio: [0.00179435 0.00179435]
  NaN 개수: 0 / 187095982
  Inf 개수: 0 / 187095982

📊 train.pkl keypoint 분포:
  min:  [0. 0.]
  max:  [0.40165937 0.8916185 ]
  mean: [0.06073872 0.19516627]
  std:  [0.04201406 0.15176444]
  zero_ratio: [0.1601775 0.1601775]
  NaN 개수: 0 / 10663046
  Inf 개수: 0 / 10663046

📊 train_unnorm.pkl keypoint 분포:
  min:  [-443.0643  -389.38937]
  max:  [1033.6134 1522.6797]
  mean: [123.17865 362.41635]
  std:  [131.25415 357.8559 ]
  zero_ratio: [0. 0.]
  NaN 개수: 0 / 10663046
  Inf 개수: 0 / 10663046



In [None]:
import pickle
from pathlib import Path

PKL_PATH_1 = Path(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl")     # MMAction2 예시
PKL_PATH_2 = Path(r"D:\golfDataset\dataset\crop_pkl\test.pkl")        # 너가 만든 것
PKL_PATH_3 = Path(r"D:\golfDataset\dataset\crop_pkl\test_unnorm.pkl")

def print_split_info(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    print(f"\n📂 {pkl_path.name}")
    if 'split' in data:
        for k, v in data['split'].items():
            print(f"  {k}: {len(v)} samples")
    else:
        print("  No 'split' key found.")
    print(f"  Total annotations: {len(data.get('annotations', []))}")

print_split_info(PKL_PATH_1)
print_split_info(PKL_PATH_2)
print_split_info(PKL_PATH_3)


📂 ntu60_2d.pkl
  xsub_train: 40091 samples
  xsub_val: 16487 samples
  xview_train: 37646 samples
  xview_val: 18932 samples
  Total annotations: 56578

📂 train.pkl
  xsub_train: 1457 samples
  xsub_val: 164 samples
  Total annotations: 1621

📂 train_unnorm.pkl
  xsub_train: 1457 samples
  xsub_val: 164 samples
  Total annotations: 1621


In [17]:
import pickle
import numpy as np
from pathlib import Path

def load_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        return pickle.load(f)

def label_stats(annotations):
    labels = [ann['label'] for ann in annotations]
    print("📋 label 통계:")
    print(f"  min: {min(labels)}, max: {max(labels)}")
    print(f"  unique: {sorted(set(labels))}")
    print(f"  count: {len(labels)}")
    print()

def keypoint_stats(annotations):
    kp_all = []
    nan_count = 0
    inf_count = 0
    for ann in annotations:
        kp = np.array(ann['keypoint'])
        nan_count += np.isnan(kp).sum()
        inf_count += np.isinf(kp).sum()
        kp_all.append(kp.reshape(-1, 2))
    kp_all = np.concatenate(kp_all, axis=0)
    stats = {
        'min': np.min(kp_all, axis=0),
        'max': np.max(kp_all, axis=0),
        'mean': np.mean(kp_all, axis=0),
        'std': np.std(kp_all, axis=0),
        'zero_ratio': np.mean(kp_all == 0, axis=0),
        'nan_count': nan_count,
        'inf_count': inf_count,
        'total': kp_all.shape[0] * kp_all.shape[1]
    }
    return stats

def print_stats(stats, name):
    print(f"📊 {name} keypoint 분포:")
    print(f"  min:  {stats['min']}")
    print(f"  max:  {stats['max']}")
    print(f"  mean: {stats['mean']}")
    print(f"  std:  {stats['std']}")
    print(f"  zero_ratio: {stats['zero_ratio']}")
    print(f"  NaN 개수: {stats['nan_count']} / {stats['total']}")
    print(f"  Inf 개수: {stats['inf_count']} / {stats['total']}")
    print()

def analyze_pkl(pkl_path):
    data = load_pkl(pkl_path)
    print(f"\n==== {Path(pkl_path).name} ====")
    label_stats(data['annotations'])
    stats = keypoint_stats(data['annotations'])
    print_stats(stats, Path(pkl_path).name)

# 사용 예시
PKL_PATHS = [
    Path(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl"),
    Path(r"D:\golfDataset\dataset\crop_pkl\train.pkl"),
    Path(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl"),
]

for pkl_path in PKL_PATHS:
    analyze_pkl(pkl_path)


==== ntu60_2d.pkl ====
📋 label 통계:
  min: 0, max: 59
  unique: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
  count: 56578

📊 ntu60_2d.pkl keypoint 분포:
  min:  [-389.25 -149.75]
  max:  [2274. 1674.]
  mean: [367.29764 188.13676]
  std:  [441.61224 314.72678]
  zero_ratio: [0.00179435 0.00179435]
  NaN 개수: 0 / 187095982
  Inf 개수: 0 / 187095982


==== train.pkl ====
📋 label 통계:
  min: 0, max: 1
  unique: [0, 1]
  count: 1621

📊 train.pkl keypoint 분포:
  min:  [0. 0.]
  max:  [0.40165937 0.8916185 ]
  mean: [0.06073872 0.19516627]
  std:  [0.04201406 0.15176444]
  zero_ratio: [0.1601775 0.1601775]
  NaN 개수: 0 / 10663046
  Inf 개수: 0 / 10663046


==== train_unnorm.pkl ====
📋 label 통계:
  min: 0, max: 1
  unique: [0, 1]
  count: 1621

📊 train_unnorm.pkl keypoint 분포:
  min:  [-443.0643  -389.38937]
  max:  [10

In [19]:
import pickle
import numpy as np

with open(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl", "rb") as f:
    data = pickle.load(f)

# keypoint_score 체크
scores = [ann['keypoint_score'] for ann in data['annotations']]
shapes = [np.array(s).shape for s in scores]
mins = [np.min(s) for s in scores]
maxs = [np.max(s) for s in scores]
print("keypoint_score shape 예시:", shapes[:3])
print("keypoint_score min/max 예시:", list(zip(mins, maxs))[:3])

# split 체크
print("split keys:", data['split'].keys())
for k, v in data['split'].items():
    print(f"{k}: {len(v)} samples")

keypoint_score shape 예시: [(1, 103, 17), (1, 158, 17), (1, 104, 17)]
keypoint_score min/max 예시: [(0.704, 0.9966), (0.731, 0.9976), (0.74, 0.998)]
split keys: dict_keys(['xsub_train', 'xsub_val', 'xview_train', 'xview_val'])
xsub_train: 40091 samples
xsub_val: 16487 samples
xview_train: 37646 samples
xview_val: 18932 samples


In [15]:
import pickle
import numpy as np
from pathlib import Path

NTU_PATH = Path(r"D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl")
SRC_PATH = Path(r"D:\golfDataset\dataset\crop_pkl\train.pkl")
DST_PATH = Path(r"D:\golfDataset\dataset\crop_pkl\train_unnorm.pkl")

# 1. ntu60_2d.pkl 구조와 필드값 읽기
with open(NTU_PATH, 'rb') as f:
    ntu = pickle.load(f)
ntu_ann_template = ntu['annotations'][0]
ntu_fields = list(ntu_ann_template.keys())
ntu_field_defaults = {k: ntu_ann_template[k] for k in ntu_fields}

# 2. 내 pkl 불러오기
with open(SRC_PATH, 'rb') as f:
    mine = pickle.load(f)

# 3. annotation 구조 완전 매칭
def convert_ann(mine_ann, ntu_field_defaults):
    out = {}
    # frame_dir, total_frames, label은 내 데이터에서 가져오고, 없으면 템플릿에서
    out['frame_dir'] = mine_ann.get('frame_dir', ntu_field_defaults['frame_dir'])
    out['total_frames'] = int(mine_ann.get('total_frames', ntu_field_defaults['total_frames']))
    out['label'] = int(mine_ann.get('label', ntu_field_defaults['label']))
    # img_shape, original_shape는 템플릿에서
    out['img_shape'] = tuple(ntu_field_defaults['img_shape'])
    out['original_shape'] = tuple(ntu_field_defaults['original_shape'])
    # keypoint: (M, T, V, C) or (T, V, C) → (1, T, V, C)
    kp = np.array(mine_ann['keypoint'], dtype=np.float32)
    if kp.ndim == 3:
        kp = kp[None, ...]
    if not np.all(np.isfinite(kp)):
        print("비정상 keypoint:", mine_ann.get('frame_dir'))
        kp = np.nan_to_num(kp, nan=0.0, posinf=0.0, neginf=0.0)
    cur_mean = np.mean(kp)
    cur_std = np.std(kp)
    if cur_std < 1e-6:
        print("경고: std=0, 변환 skip", mine_ann.get('frame_dir'))
        kp = np.zeros_like(kp)
    else:
        kp = (kp - cur_mean) / (cur_std + 1e-8)
        kp = kp * target_std + target_mean
    if not np.all(np.isfinite(kp)):
        print("변환 후 비정상 keypoint:", mine_ann.get('frame_dir'))
        kp = np.nan_to_num(kp, nan=0.0, posinf=0.0, neginf=0.0)
    out['keypoint'] = kp.astype(np.float32)
    # keypoint_score: (M, T, V)
    if 'keypoint_score' in mine_ann:
        score = np.array(mine_ann['keypoint_score'], dtype=np.float32)
        if score.ndim == 2:
            score = score[None, ...]  # (1, T, V)
        out['keypoint_score'] = score
    else:
        # 템플릿 shape에 맞게 1로 채움
        score_shape = np.array(ntu_field_defaults['keypoint_score']).shape
        out['keypoint_score'] = np.ones(score_shape, dtype=np.float32)
    return out

# 4. 전체 annotation 변환
new_annotations = [convert_ann(ann, ntu_field_defaults) for ann in mine['annotations']]

# 5. split 정보 복사 (내 pkl에 split이 있으면 그대로, 없으면 ntu split 구조만 복사)
if 'split' in mine:
    new_split = mine['split']
else:
    new_split = {k: [] for k in ntu['split'].keys()}

# 6. 새 pkl 저장 (ntu60_2d.pkl과 동일한 구조)
final = dict(split=new_split, annotations=new_annotations)
with open(DST_PATH, 'wb') as f:
    pickle.dump(final, f)