In [None]:
from google.colab import drive
drive.mount('/content/drive')  # 선택: 체크포인트/결과를 드라이브에 복사하고 싶을 때

!mkdir -p /content/work/read_ntu_rgbd/raw_txt
!mkdir -p /content/work/read_ntu_rgbd/raw_npy
%cd /content/work

In [None]:
!pip -q install gdown aria2

# 공식 README의 skeleton-only 링크들
# https://drive.google.com/open?id=1CUZnBtYwifVXS21yVg62T-vrPVayso5H
# https://drive.google.com/open?id=1tEbuaEqMxAV7dNc4fqu1O4M7mC6CJ50w

!gdown --id 1CUZnBtYwifVXS21yVg62T-vrPVayso5H -O /content/work/ntu120_skeleton_part1.zip
!gdown --id 1tEbuaEqMxAV7dNc4fqu1O4M7mC6CJ50w -O /content/work/ntu120_skeleton_part2.zip


In [None]:
!unzip -o -q /content/work/ntu120_skeleton_part1.zip -d /content/work/unpacked1
!unzip -o -q /content/work/ntu120_skeleton_part2.zip -d /content/work/unpacked2

# 모든 .skeleton 파일을 raw_txt/로 이동
!find /content/work/unpacked1 -type f -name "*.skeleton" -exec mv -n {} /content/work/read_ntu_rgbd/raw_txt/ \;
!find /content/work/unpacked2 -type f -name "*.skeleton" -exec mv -n {} /content/work/read_ntu_rgbd/raw_txt/ \;

# 개수 확인
!ls /content/work/read_ntu_rgbd/raw_txt | wc -l


In [None]:
# 공식 raw 파일 받기
!wget -q -O /content/work/read_ntu_rgbd/ntu_rgb120_missings.txt \
  https://raw.githubusercontent.com/shahroudy/NTURGB-D/master/Matlab/NTU_RGBD120_samples_with_missing_skeletons.txt

# 몇 줄 확인
!head -n 10 /content/work/read_ntu_rgbd/ntu_rgb120_missings.txt


In [None]:
import os, io, numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm

RAW_TXT = "/content/work/read_ntu_rgbd/raw_txt"
RAW_NPY = "/content/work/read_ntu_rgbd/raw_npy"
MISSINGS = "/content/work/read_ntu_rgbd/ntu_rgb120_missings.txt"

# 결측/불완전 샘플 목록 로드 (파일명 형식 동일: SxxxCxxxPxxxRxxxAxxx)
with open(MISSINGS, 'r') as f:
    bad = set(x.strip() for x in f if x.strip() and not x.startswith('#'))

def parse_ntu_skeleton_file(fp):
    """
    NTU RGB+D skeleton txt 파서 (간략 버전)
    - 반환 dict keys:
      file_name, nbodys(list), njoints(int=25),
      skel_body{idx}: (nframe, 25, 3) float32
      rgb_body{idx}:  (nframe, 25, 2) float32
      depth_body{idx}:(nframe, 25, 2) float32
    """
    with open(fp, 'r') as f:
        lines = f.read().strip().splitlines()
    it = iter(lines)

    def _next():
        return next(it)

    nframe = int(_next())
    # 임시 저장: frame마다 body별 좌표를 쌓고, 마지막에 body index별로 쌓아 합칩니다.
    # frames[b][f] = (25, 3/2/2)
    frames_xyz = {}   # bodyId -> list of (25, 3)
    frames_rgb = {}
    frames_dep = {}
    nbodys = []

    for fidx in range(nframe):
        nb = int(_next()); nbodys.append(nb)
        for _ in range(nb):
            # body meta (10개 넘는 필드가 있으나 여기선 소비만 하고 건너뜀)
            body_id = _next().split()[0]  # 첫 토큰만 bodyID
            # 그 다음 줄에 joint 수(25)
            nj = int(_next())
            # joint 25개 파싱
            xyz = np.zeros((25, 3), dtype=np.float32)
            rgb = np.zeros((25, 2), dtype=np.float32)
            dep = np.zeros((25, 2), dtype=np.float32)
            for j in range(nj):
                vals = _next().split()
                # 포맷: x y z depthX depthY colorX colorY oriW oriX oriY oriZ trackingState
                x, y, z = map(float, vals[0:3])
                dx, dy = map(float, vals[3:5])
                cx, cy = map(float, vals[5:7])
                # oriW..oriZ = vals[7:11], track = vals[11]
                xyz[j] = (x, y, z)
                dep[j] = (dx, dy)
                rgb[j] = (cx, cy)

            # body별 프레임 스택
            frames_xyz.setdefault(body_id, []).append(xyz)
            frames_rgb.setdefault(body_id, []).append(rgb)
            frames_dep.setdefault(body_id, []).append(dep)

    # body 별 시퀀스를 numpy로
    data = {
        "file_name": os.path.splitext(os.path.basename(fp))[0],
        "nbodys": nbodys,
        "njoints": 25,
    }
    # body id 정렬(안정성)
    for bidx, body_id in enumerate(sorted(frames_xyz.keys())):
        xyz_seq = np.stack(frames_xyz[body_id], axis=0)  # (nframe, 25, 3)
        rgb_seq = np.stack(frames_rgb[body_id], axis=0)  # (nframe, 25, 2)
        dep_seq = np.stack(frames_dep[body_id], axis=0)  # (nframe, 25, 2)
        data[f"skel_body{bidx}"]  = xyz_seq
        data[f"rgb_body{bidx}"]   = rgb_seq
        data[f"depth_body{bidx}"] = dep_seq

    return data

def worker(in_name):
    stem = os.path.splitext(in_name)[0]  # SxxxCxxxPxxxRxxxAxxx
    if stem in bad:
        return (in_name, "skip_missing")

    in_path  = os.path.join(RAW_TXT, in_name)
    out_path = os.path.join(RAW_NPY, f"{in_name}.npy")
    if os.path.exists(out_path):
        return (in_name, "exists")

    try:
        data = parse_ntu_skeleton_file(in_path)
        np.save(out_path, data, allow_pickle=True)
        return (in_name, "ok")
    except Exception as e:
        return (in_name, f"error:{e}")

all_txt = [f for f in os.listdir(RAW_TXT) if f.endswith(".skeleton")]
print("원본 .skeleton 개수:", len(all_txt))

results = {"ok":0, "exists":0, "skip_missing":0, "error":0}
with ThreadPoolExecutor(max_workers=8) as ex:
    futs = [ex.submit(worker, name) for name in all_txt]
    for fut in tqdm(as_completed(futs), total=len(futs)):
        name, status = fut.result()
        results[status] = results.get(status, 0) + 1

print("요약:", results)
print("생성된 .npy 개수:", len([f for f in os.listdir(RAW_NPY) if f.endswith(".npy")]))


In [None]:
import os, numpy as np
root = "/content/work/read_ntu_rgbd/raw_npy"
samples = [f for f in os.listdir(root) if f.endswith(".npy")]
print("샘플 예시:", samples[:3])

# 하나 로드
p = os.path.join(root, samples[0])
data = np.load(p, allow_pickle=True).item()
print("keys:", list(data.keys()))
print("file_name:", data["file_name"])
print("njoints:", data["njoints"])
# 첫 번째 사람의 XYZ/투영 크기
k_skel = [k for k in data if k.startswith("skel_body")][0]
k_rgb  = k_skel.replace("skel", "rgb")
k_dep  = k_skel.replace("skel", "depth")
print(k_skel, data[k_skel].shape)  # (nframe, 25, 3)
print(k_rgb,  data[k_rgb].shape)   # (nframe, 25, 2)
print(k_dep,  data[k_dep].shape)   # (nframe, 25, 2)


In [None]:
!cp -r /content/work/read_ntu_rgbd/raw_npy /content/drive/MyDrive/ntu_rgbd_npy_backup