1. Imports & 기본 설정

In [141]:
import numpy as np
import pandas as pd
import json
from pathlib import Path
from tqdm import tqdm

In [142]:
# 고정 파라미터 (확정)
FPS = 10
NUM_FRAMES = 100
NUM_KEYPOINTS = 17

T = 40          # sequence length (4초)
STRIDE = 3     # sliding window stride

DATA_ROOT = Path("../data/")
EDA_ROOT = Path("../EDA/")

In [143]:
from pathlib import Path

pose_ids = [p.stem for p in Path("../data/pose").glob("*.npy")]
print("pose count:", len(pose_ids))
pose_ids[:10]

pose count: 200


['01997_H_A_SY_C1',
 '00655_H_D_SY_C5',
 '02026_H_A_N_C2',
 '00734_H_D_N_C2',
 '00597_H_D_SY_C3',
 '00341_H_A_SY_C3',
 '00657_H_D_N_C4',
 '00549_H_D_SY_C1',
 '02551_H_D_SY_C1',
 '02243_H_A_SY_C5']

In [144]:
from pathlib import Path

pose_ids = {p.stem for p in Path("../data/pose").glob("*.npy")}

df_all = pd.read_csv(EDA_ROOT / "train_clean.csv")
df = df_all[df_all["scene_id"].isin(pose_ids)].reset_index(drop=True)

print("usable samples:", len(df))

usable samples: 200


In [145]:
def load_pose(scene_id: str) -> np.ndarray:
    """
    return shape: (100, 17, 3)
    """
    pose_path = DATA_ROOT / "pose" / f"{scene_id}.npy"
    if not pose_path.exists():
        raise FileNotFoundError(pose_path)
    return np.load(pose_path)


낙상 구간 JSON 로더  
	•	비낙상(N/N): JSON 없음 → (None, None)  
	•	낙상(Y/SY): JSON 있음  

In [146]:
def load_fall_range(scene_id: str):
    """
    return: (fall_start_frame, fall_end_frame) or (None, None)
    """
    label_root = DATA_ROOT / "train" / "label"

    for modality in ["sensor", "video"]:
        json_path = (
            label_root
            / modality
            / "Y"
            / "SY"
            / scene_id
            / f"{scene_id}.json"
        )

        if json_path.exists():
            with open(json_path, "r") as f:
                data = json.load(f)
            return (
                data["sensordata"]["fall_start_frame"],
                data["sensordata"]["fall_end_frame"],
            )

    return None, None

Feature 생성 함수 (D = 85)  

구성:  
	•	keypoint (x, y) → 34  
	•	confidence → 17  
	•	velocity (dx, dy) → 34  
총 85  

In [147]:
def make_feature(pose_seq: np.ndarray) -> np.ndarray:
    """
    input : (100, 17, 3)
    output: (100, 85)
    """
    features = []
    prev_xy = None

    for frame in pose_seq:
        xy = frame[:, :2]      # (17, 2)
        conf = frame[:, 2]     # (17,)

        # 중심 정렬 (hip center)
        center = (xy[11] + xy[12]) / 2
        xy = xy - center

        # scale 정규화 (shoulder-hip)
        scale = np.linalg.norm(xy[5] - xy[11]) + 1e-6
        xy = xy / scale

        # velocity
        if prev_xy is None:
            vel = np.zeros_like(xy)
        else:
            vel = xy - prev_xy

        prev_xy = xy.copy()

        feat = np.concatenate([
            xy.flatten(),      # 34
            conf,              # 17
            vel.flatten()      # 34
        ])

        features.append(feat)

    return np.asarray(features, dtype=np.float32)

시퀀스 라벨 판정 함수

In [148]:
def is_fall_sequence(seq_start, seq_end, fall_start, fall_end):
    """
    overlap 있으면 낙상(1)
    """
    if fall_start is None:
        return 0
    return int(not (seq_end < fall_start or seq_start > fall_end))

전체 시퀀스 생성 루프 (핵심)

In [149]:
def fix_frame_length(pose, target_len=100):
    """
    pose: (F, 17, 3)
    return: (target_len, 17, 3)
    """
    F = pose.shape[0]

    if F > target_len:
        # 균등 샘플링
        idx = np.linspace(0, F - 1, target_len).astype(int)
        pose = pose[idx]
    elif F < target_len:
        pad = np.zeros((target_len - F, 17, 3), dtype=np.float32)
        pose = np.concatenate([pose, pad], axis=0)

    return pose

In [150]:
X_all, y_all = [], []

for _, row in tqdm(df.iterrows(), total=len(df)):
    scene_id = row["scene_id"]

    # 1. pose 로드 & 정규화
    pose = load_pose(scene_id)                      # (100, 17, 3)
    pose = fix_frame_length(pose, target_len=NUM_FRAMES)

    # 2. feature 생성
    feat = make_feature(pose)                       # (100, 85)

    # 3. scene-level 라벨
    label = int(row["class"] == "Y/SY")

    # 4. 슬라이딩 윈도우 (sequence 생성)
    for start in range(0, NUM_FRAMES - T + 1, STRIDE):
        X_all.append(feat[start:start + T])         # (T, 85)
        y_all.append(label)

100%|██████████| 200/200 [00:00<00:00, 970.30it/s]


In [151]:
print(len(X_all), len(y_all))

4200 4200


In [152]:
import numpy as np
np.unique(y_all, return_counts=True)

(array([0, 1]), array([2100, 2100]))

In [153]:
print(X_all[0].shape)   

(40, 85)


In [154]:
X_all = np.asarray(X_all, dtype=np.float32)
y_all = np.asarray(y_all, dtype=np.int64)

print("X:", X_all.shape)
print("y:", y_all.shape)

(DATA_ROOT / "sequence").mkdir(parents=True, exist_ok=True)

np.save(DATA_ROOT / "sequence" / "X.npy", X_all)
np.save(DATA_ROOT / "sequence" / "y.npy", y_all)

X: (4200, 40, 85)
y: (4200,)


In [157]:
X_all = np.asarray(X_all, dtype=np.float32)
y_all = np.asarray(y_all, dtype=np.int64)

print(X_all.shape, y_all.shape)
print(np.unique(y_all, return_counts=True))
print(X_all[0].shape)

(4200, 40, 85) (4200,)
(array([0, 1]), array([2100, 2100]))
(40, 85)
