In [1]:
import os
import subprocess
import json
import shutil
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.cluster import DBSCAN

# ✏️ Base 작업 디렉토리 및 서브폴더 목록
CUR_DIR = Path(r"D:\golfDataset\dataset\test")  # 현재 작업 루트 디렉토리
BALANCED_FOLDERS = ["balanced_true", "false"]   # 처리할 두 가지 서브 디렉토리
FPS = 30

# OpenPose 설정
OPENPOSE_EXE  = Path(r"C:/openpose/openpose/bin/OpenPoseDemo.exe")
OPENPOSE_ROOT = OPENPOSE_EXE.parent.parent  # …/openpose 폴더
PAD_RATIO     = 0.10

# Torso Keypoint 인덱스 (Neck, MidHip, RShoulder, LShoulder, RootHip)
TORSO_IDXS = [1, 8, 11, 14, 17]
# Keypoint 이름 및 CSV 컬럼 생성
KP   = ["Nose","Neck","RShoulder","RElbow","RWrist","LShoulder","LElbow","LWrist",
        "MidHip","RHip","RKnee","RAnkle","LHip","LKnee","LAnkle","REye","LEye",
        "REar","LEar","LBigToe","LSmallToe","LHeel","RBigToe","RSmallToe","RHeel"]
COLS = [f"{n}_{a}" for n in KP for a in ("x","y","c")]

# ---------- OpenPose 실행 함수 --------------------------------------
def run_openpose(video: Path, out_dir: Path):
    """OpenPose CLI를 통해 JSON으로 keypoints 추출"""
    out_dir.mkdir(parents=True, exist_ok=True)
    cmd = [str(OPENPOSE_EXE),
           "--video", str(video),
           "--write_json", str(out_dir),
           "--display", "0", "--render_pose", "0",
           "--number_people_max", "1",
           "--model_folder", str(OPENPOSE_ROOT / "models")]
    subprocess.run(cmd, check=True, cwd=OPENPOSE_ROOT)

# ---------- 주요 인물 박스 추출 함수 ---------------------------------
def main_person_boxes(json_dir: Path):
    centers, boxes = [], []
    for jf in sorted(json_dir.glob("*.json")):
        data = json.load(open(jf))
        people = data.get("people")
        if not people:
            continue
        kps = np.array(people[0]["pose_keypoints_2d"]).reshape(-1, 3)
        if kps[8, 2] < 0.10:  # MidHip confidence 확인
            continue
        cx, cy = kps[8, :2]
        valid = kps[:, 2] > 0.05
        xs, ys = kps[valid, 0], kps[valid, 1]
        centers.append([cx, cy])
        boxes.append([xs.min(), ys.min(), xs.max(), ys.max()])

    if not centers:
        return []
    centers = np.array(centers)
    labels = DBSCAN(eps=100, min_samples=5).fit_predict(centers)
    if (labels != -1).any():
        main_label = np.bincount(labels[labels != -1]).argmax()
    else:
        main_label = 0
    return [boxes[i] for i, lb in enumerate(labels) if lb == main_label]

# ---------- Union Box 계산 함수 ------------------------------------
def union_box(box_list):
    arr = np.array(box_list)
    x1, y1 = arr[:, :2].min(0)
    x2, y2 = arr[:, 2:].max(0)
    w, h    = x2 - x1, y2 - y1
    pad_w   = w * PAD_RATIO
    pad_h   = h * PAD_RATIO
    return int(x1 - pad_w), int(y1 - pad_h), int(w + 2 * pad_w), int(h + 2 * pad_h)

# ---------- FFmpeg Crop 함수 ----------------------------------------
def crop_video(src: Path, dst: Path, bbox):
    x, y, w, h = bbox
    cmd = ["ffmpeg", "-y", "-i", str(src),
           "-filter:v", f"crop={w}:{h}:{x}:{y}",
           "-pix_fmt", "yuv420p", str(dst)]
    subprocess.run(cmd, check=True)

# ---------- JSON → CSV 변환 함수 ------------------------------------
def json_dir_to_csv(json_dir: Path, csv_path: Path):
    rows = []
    for jf in sorted(json_dir.glob("*.json")):
        data = json.load(open(jf))
        people = data.get("people")
        if not people:
            rows.append([np.nan] * len(COLS))
        else:
            rows.append(people[0]["pose_keypoints_2d"])
    pd.DataFrame(rows, columns=COLS).to_csv(csv_path, index=False)

# ---------- 전체 파이프라인 함수 -----------------------------------
def preprocess_all(root_dir: Path):
    VIDEO_DIR      = root_dir / "video"
    CROP_VIDEO_DIR = root_dir / "crop_video"
    CROP_KP_DIR    = root_dir / "crop_keypoint"
    TMP_JSON_DIR   = root_dir / "_tmp_json"
    for d in [CROP_VIDEO_DIR, CROP_KP_DIR, TMP_JSON_DIR]:
        d.mkdir(parents=True, exist_ok=True)

    for vid in tqdm(sorted(VIDEO_DIR.glob("*.mp4")), desc=f"Processing {root_dir.name}"):
        name    = vid.stem
        raw_dir = TMP_JSON_DIR / f"raw_{name}"
        run_openpose(vid, raw_dir)

        boxes = main_person_boxes(raw_dir)
        if not boxes:
            print(f"⚠️  No valid person in {vid.name}")
            continue
        bbox = union_box(boxes)

        crop_mp4 = CROP_VIDEO_DIR / f"{name}_crop.mp4"
        crop_video(vid, crop_mp4, bbox)

        crop_dir  = TMP_JSON_DIR / f"crop_{name}"
        run_openpose(crop_mp4, crop_dir)

        csv_path = CROP_KP_DIR / f"{name}.csv"
        json_dir_to_csv(crop_dir, csv_path)
        print(f"✅ {root_dir.name} / {name} saved")

    # 중간 JSON 삭제가 필요하다면 아래 주석 해제
    # shutil.rmtree(TMP_JSON_DIR)

# ---------- 스크립트 실행부 -----------------------------------------
if __name__ == "__main__":
    for folder in BALANCED_FOLDERS:
        root = CUR_DIR / folder
        preprocess_all(root)


Processing balanced_true:   2%|▎         | 1/40 [00:38<25:16, 38.88s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_001 saved


Processing balanced_true:   5%|▌         | 2/40 [01:06<20:30, 32.37s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_002 saved


Processing balanced_true:   8%|▊         | 3/40 [01:39<20:11, 32.73s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_003 saved


Processing balanced_true:  10%|█         | 4/40 [02:05<17:51, 29.77s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_004 saved


Processing balanced_true:  12%|█▎        | 5/40 [02:31<16:38, 28.52s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_005 saved


Processing balanced_true:  15%|█▌        | 6/40 [02:57<15:46, 27.85s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_006 saved


Processing balanced_true:  18%|█▊        | 7/40 [03:22<14:43, 26.77s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_007 saved


Processing balanced_true:  20%|██        | 8/40 [08:51<1:05:33, 122.91s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_008 saved


Processing balanced_true:  22%|██▎       | 9/40 [09:15<47:36, 92.13s/it]   

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_009 saved


Processing balanced_true:  25%|██▌       | 10/40 [09:48<36:56, 73.87s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_010 saved


Processing balanced_true:  28%|██▊       | 11/40 [10:14<28:32, 59.03s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_011 saved


Processing balanced_true:  30%|███       | 12/40 [10:40<22:54, 49.08s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_012 saved


Processing balanced_true:  32%|███▎      | 13/40 [11:06<18:56, 42.11s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_013 saved


Processing balanced_true:  35%|███▌      | 14/40 [11:30<15:49, 36.52s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_014 saved


Processing balanced_true:  38%|███▊      | 15/40 [11:56<13:53, 33.35s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_015 saved


Processing balanced_true:  40%|████      | 16/40 [12:20<12:12, 30.52s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_016 saved


Processing balanced_true:  42%|████▎     | 17/40 [12:52<11:54, 31.07s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_017 saved


Processing balanced_true:  45%|████▌     | 18/40 [13:16<10:35, 28.87s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_018 saved


Processing balanced_true:  48%|████▊     | 19/40 [13:39<09:32, 27.28s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_019 saved


Processing balanced_true:  50%|█████     | 20/40 [14:09<09:20, 28.02s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_020 saved


Processing balanced_true:  52%|█████▎    | 21/40 [14:33<08:30, 26.89s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_021 saved


Processing balanced_true:  55%|█████▌    | 22/40 [14:59<07:56, 26.48s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_022 saved


Processing balanced_true:  57%|█████▊    | 23/40 [15:29<07:49, 27.64s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_023 saved


Processing balanced_true:  60%|██████    | 24/40 [15:58<07:29, 28.11s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_024 saved


Processing balanced_true:  62%|██████▎   | 25/40 [16:22<06:40, 26.69s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_025 saved


Processing balanced_true:  65%|██████▌   | 26/40 [16:46<06:02, 25.90s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_026 saved


Processing balanced_true:  68%|██████▊   | 27/40 [17:17<05:58, 27.60s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_027 saved


Processing balanced_true:  70%|███████   | 28/40 [17:49<05:45, 28.82s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_028 saved


Processing balanced_true:  72%|███████▎  | 29/40 [18:14<05:06, 27.83s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_029 saved


Processing balanced_true:  75%|███████▌  | 30/40 [18:39<04:27, 26.79s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_030 saved


Processing balanced_true:  78%|███████▊  | 31/40 [19:07<04:06, 27.35s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_031 saved


Processing balanced_true:  80%|████████  | 32/40 [19:41<03:53, 29.19s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_032 saved


Processing balanced_true:  82%|████████▎ | 33/40 [20:05<03:12, 27.49s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_033 saved


Processing balanced_true:  85%|████████▌ | 34/40 [20:28<02:38, 26.39s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_034 saved


Processing balanced_true:  88%|████████▊ | 35/40 [20:56<02:14, 26.83s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_035 saved


Processing balanced_true:  90%|█████████ | 36/40 [21:22<01:46, 26.55s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_036 saved


Processing balanced_true:  92%|█████████▎| 37/40 [21:48<01:18, 26.31s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_037 saved


Processing balanced_true:  95%|█████████▌| 38/40 [22:18<00:54, 27.34s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_038 saved


Processing balanced_true:  98%|█████████▊| 39/40 [22:43<00:26, 26.91s/it]

✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_039 saved


Processing balanced_true: 100%|██████████| 40/40 [23:10<00:00, 34.77s/it]


✅ balanced_true / 20201124_General_037_DOS_A_M40_MS_040 saved


Processing false: 100%|██████████| 1/1 [00:22<00:00, 22.80s/it]

✅ false / 20201123_General_030_DOS_A_M40_MM_070 saved



