In [1]:
tf = True   # True면 balanced_true 디렉토리, False면 false 디렉토리

In [None]:
# %% -------------------------------------------------
# 🏌️‍♂️ Cell 2 — OpenPose ➜ 1-Person Median-Box Crop & CSV Keypoints
# ---------------------------------------------------------
"""
1. OpenPose (`--tracking 1 --number_people_max 2`) → _tmp_json/raw_<name>
   • frame마다 person_id==0 만 추출
2. Torso keypoints(Neck·MidHip·Shoulders·RootHip)로
   • 중심(cx,cy) = median
   • 폭·높이(w,h) = 90-percentile × (1+MARGIN)
3. FFmpeg 고정 크롭 → crop_video/
4. 크롭 영상 재-OpenPose → _tmp_json/crop_<name>
5. frame별 keypoints → CSV(25×3 cols) → crop_keypoint/
"""

import os, subprocess
from pathlib import Path
import subprocess, json, shutil, numpy as np, pandas as pd
from tqdm import tqdm
from sklearn.cluster import DBSCAN

# ✏️ 경로 설정
CUR_DIR = r"D:\golfDataset\dataset\test"  # 현재 작업 디렉토리


# ROOT_DIR을 balanced_true냐 false 냐에 따라 작업 위치 변경
if tf:
    ROOT_DIR = CUR_DIR / "balanced_true"  # true 데이터셋 작업 디렉토리
else:
    ROOT_DIR = CUR_DIR / "false"
VIDEO_DIR  = ROOT_DIR / "video"        # 생성될 MP4 저장 폴더
FPS        = 30                        # 출력 비디오 FPS
VIDEO_DIR.mkdir(parents=True, exist_ok=True)

# ✏️ 경로 / 파라미터 -----------------------------------------------------------
OPENPOSE_EXE    = Path(r"C:/openpose/openpose/bin/OpenPoseDemo.exe")
OPENPOSE_ROOT   = OPENPOSE_EXE.parent.parent       # …/openpose
CROP_VIDEO_DIR  = ROOT_DIR / "crop_video"
CROP_KP_DIR     = ROOT_DIR / "crop_keypoint"
TMP_JSON_DIR    = ROOT_DIR / "_tmp_json"
PAD_RATIO       = 0.10
for d in [CROP_VIDEO_DIR, CROP_KP_DIR, TMP_JSON_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# ---------- OpenPose ---------------------------------------------------

def run_openpose(video: Path, out_dir: Path):
    """Run OpenPose CLI (CPU/GPU auto) and dump JSONs to out_dir."""
    out_dir.mkdir(parents=True, exist_ok=True)
    cmd = [str(OPENPOSE_EXE),
           "--video", str(video),
           "--write_json", str(out_dir),
           "--display", "0", "--render_pose", "0",
           "--number_people_max", "1",
           "--model_folder", str(OPENPOSE_ROOT / "models")]
    subprocess.run(cmd, check=True, cwd=OPENPOSE_ROOT)

# ---------- 주요 인물 프레임 추출 --------------------------------------
TORSO_IDXS = [1, 8, 11, 14, 17]   # Neck, MidHip, Shoulders, RootHip

def main_person_boxes(json_dir: Path):
    """Return list[box] for the dominant person (DBSCAN filtering)."""
    centers, boxes = [], []
    for jf in sorted(json_dir.glob("*.json")):
        data = json.load(open(jf))
        if not data.get("people"):             # no detections
            continue
        kps = np.array(data["people"][0]["pose_keypoints_2d"]).reshape(-1, 3)
        if kps[8, 2] < 0.10:                    # MidHip conf too low
            continue
        cx, cy = kps[8, :2]
        valid = kps[:, 2] > 0.05
        xs, ys = kps[valid, 0], kps[valid, 1]
        centers.append([cx, cy])
        boxes.append([xs.min(), ys.min(), xs.max(), ys.max()])

    if not centers:
        return []

    centers = np.array(centers)
    # DBSCAN cluster on MidHip center positions (pixel space)
    labels = DBSCAN(eps=100, min_samples=5).fit_predict(centers)
    if (labels != -1).any():
        main_label = np.bincount(labels[labels != -1]).argmax()
    else:                                       # all noise → take all
        main_label = 0
    return [boxes[i] for i, lb in enumerate(labels) if lb == main_label]

# ---------- Union‑box ---------------------------------------------------

def union_box(box_list):
    arr = np.array(box_list)
    x1, y1 = arr[:, :2].min(0)
    x2, y2 = arr[:, 2:].max(0)
    w, h = x2 - x1, y2 - y1
    pad_w, pad_h = w * PAD_RATIO, h * PAD_RATIO
    return int(x1 - pad_w), int(y1 - pad_h), int(w + 2 * pad_w), int(h + 2 * pad_h)

# ---------- Crop helper -------------------------------------------------

def crop_video(src: Path, dst: Path, bbox):
    x, y, w, h = bbox
    cmd = ["ffmpeg", "-y", "-i", str(src),
           "-filter:v", f"crop={w}:{h}:{x}:{y}",
           "-pix_fmt", "yuv420p", str(dst)]
    subprocess.run(cmd, check=True)

# ---------- JSON → CSV --------------------------------------------------
KP = ["Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder", "LElbow", "LWrist",
      "MidHip", "RHip", "RKnee", "RAnkle", "LHip", "LKnee", "LAnkle", "REye", "LEye",
      "REar", "LEar", "LBigToe", "LSmallToe", "LHeel", "RBigToe", "RSmallToe", "RHeel"]
COLS = [f"{n}_{a}" for n in KP for a in ("x", "y", "c")]

def json_dir_to_csv(json_dir: Path, csv_path: Path):
    rows = []
    for jf in sorted(json_dir.glob("*.json")):
        data = json.load(open(jf))
        if not data.get("people"):
            rows.append([np.nan] * 75)
            continue
        rows.append(data["people"][0]["pose_keypoints_2d"])
    pd.DataFrame(rows, columns=COLS).to_csv(csv_path, index=False)

# ---------- 전체 파이프라인 --------------------------------------------

def preprocess_all():
    for vid in tqdm(sorted(VIDEO_DIR.glob("*.mp4"))):
        name = vid.stem
        raw_dir = TMP_JSON_DIR / f"raw_{name}"
        run_openpose(vid, raw_dir)

        boxes = main_person_boxes(raw_dir)
        if not boxes:
            print(f"⚠️  No valid person in {vid.name}")
            continue
        bbox = union_box(boxes)

        crop_mp4 = CROP_VIDEO_DIR / f"{name}_crop.mp4"
        crop_video(vid, crop_mp4, bbox)

        crop_dir = TMP_JSON_DIR / f"crop_{name}"
        run_openpose(crop_mp4, crop_dir)

        csv_path = CROP_KP_DIR / f"{name}.csv"
        json_dir_to_csv(crop_dir, csv_path)
        print(f"✅ {name} saved")

    shutil.rmtree(TMP_JSON_DIR)  # ← 중간 JSON 정리하려면 주석 해제

# 실행 예시
preprocess_all()
