In [None]:
import pickle
from pathlib import Path
import numpy as np

def load_pkl(pkl_path):
    with open(pkl_path, 'rb') as f:
        return pickle.load(f)

def summarize_annotation(ann):
    return {
        'frame_dir': ann.get('frame_dir'),
        'label': ann.get('label'),
        'keypoint_shape': np.array(ann.get('keypoint')).shape if 'keypoint' in ann else None,
        'keypoint_score_shape': np.array(ann.get('keypoint_score')).shape if 'keypoint_score' in ann else None,
        'total_frames': ann.get('total_frames'),
    }

def compare_annotations(ann1, ann2, idx):
    summary1 = summarize_annotation(ann1)
    summary2 = summarize_annotation(ann2)

    diffs = {}
    for k in summary1:
        if summary1[k] != summary2[k]:
            diffs[k] = (summary1[k], summary2[k])
    if diffs:
        print(f"[!] Difference in annotation {idx} (frame_dir: {ann1.get('frame_dir')}):")
        for k, (v1, v2) in diffs.items():
            print(f"    ▸ {k}: {v1} ≠ {v2}")

def compare_pkls(pkl1_path, pkl2_path):
    pkl1 = load_pkl(pkl1_path)
    pkl2 = load_pkl(pkl2_path)

    # Check top-level keys
    print("🔍 Top-level keys comparison:")
    keys1 = set(pkl1.keys())
    keys2 = set(pkl2.keys())
    print(" - Only in pkl1:", keys1 - keys2)
    print(" - Only in pkl2:", keys2 - keys1)
    print()

    # Check split differences
    if 'split' in pkl1 and 'split' in pkl2:
        print("📎 Split key comparison:")
        for split_name in ['xsub_train', 'xsub_val']:
            s1 = set(pkl1['split'].get(split_name, []))
            s2 = set(pkl2['split'].get(split_name, []))
            diff = s1.symmetric_difference(s2)
            if diff:
                print(f" - Difference in split '{split_name}': {diff}")
        print()

    # Compare annotations
    ann1_list = pkl1['annotations']
    ann2_list = pkl2['annotations']

    print(f"📊 Number of annotations: pkl1={len(ann1_list)}, pkl2={len(ann2_list)}")

    min_len = min(len(ann1_list), len(ann2_list))
    for i in range(min_len):
        compare_annotations(ann1_list[i], ann2_list[i], i)

    # Optional: extra entries
    if len(ann1_list) != len(ann2_list):
        print("\n⚠️ Different number of annotations:")
        if len(ann1_list) > len(ann2_list):
            print(" - Extra annotations in pkl1:")
            for i in range(len(ann2_list), len(ann1_list)):
                print(f"    {ann1_list[i].get('frame_dir')}")
        else:
            print(" - Extra annotations in pkl2:")
            for i in range(len(ann1_list), len(ann2_list)):
                print(f"    {ann2_list[i].get('frame_dir')}")

# ──────────────────────────────
# 사용 예시 (파일 경로 수정)
# ──────────────────────────────
PKL_PATH_1 = Path("D:\golfDataset\dataset\crop_pkl\ntu60_2d.pkl")     # MMAction2 예시
PKL_PATH_2 = Path("D:\golfDataset\dataset\crop_pkl\train.pkl")  # 너가 만든 것

compare_pkls(PKL_PATH_1, PKL_PATH_2)
