In [None]:
#!/usr/bin/env python3
import os, sys

# (필요시) 중복 OpenMP 런타임 허용
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

# 1) mmaction2 루트로 이동 & 모듈 경로 등록
BASE_DIR = r"D:\mmaction2"
os.chdir(BASE_DIR)
sys.path.insert(0, BASE_DIR)

import numpy as np
import torch
import torch.nn as nn
from mmengine.config import Config
from mmengine.runner import Runner, load_checkpoint

# 2) 설정
CFG_PATH      = "configs/skeleton/stgcnpp/my_stgcnpp.py"
CKPT_PATH     = "checkpoints/stgcnpp_8xb16-joint-u100-80e_ntu60-xsub-keypoint-2d_20221228-86e1e77a.pth"
INPUT_PKL     = r"D:\golfDataset\dataset\train\crop_pkl\skeleton_dataset_90_10.pkl"
OUTPUT_EMB    = r"D:\Jabez\golf\fusion\embbeding_data\stgcnpp\train_embeddings.npy"
OUTPUT_LABELS = r"D:\Jabez\golf\fusion\embbeding_data\stgcnpp\train_labels.npy"
DEVICE        = "cuda:0"
# ---------------------------------------------

# 3) Config 로드 & ann_file 덮어쓰기
cfg = Config.fromfile(CFG_PATH)
if hasattr(cfg, "test_dataloader"):
    cfg.test_dataloader.dataset.ann_file = INPUT_PKL
else:
    cfg.data.test.ann_file = INPUT_PKL

# 4) Runner 생성 & 모델만 로드 (헤드 매칭 무시)
runner = Runner.from_cfg(cfg)
load_checkpoint(runner.model, CKPT_PATH, map_location="cpu", strict=False)
runner.model.to(DEVICE).eval()

# 5) cls_head 마지막 Linear 모듈 찾기
last_lin = None
for m in runner.model.cls_head.modules():
    if isinstance(m, nn.Linear):
        last_lin = m
if last_lin is None:
    raise RuntimeError("cls_head에 nn.Linear 레이어가 없습니다.")

# 6) test loop: 비디오 당 하나의 embedding & label 수집
final_embs = []
final_labels = []

with torch.no_grad():
    for batch in runner.test_dataloader:
        # 6-1) GT 라벨
        data_samples = batch["data_samples"]
        gt_label = int(data_samples[0].gt_label)
        final_labels.append(gt_label)

        # 6-2) raw_inputs 추출 (리스트 또는 Tensor)
        raw_inputs = batch["inputs"]

        # 클립별 임베딩을 모아줄 임시 리스트
        clip_embs = []

        # 6-3) hook: 마지막 Linear의 입력(inp[0])을 저장
        def _hook_fn(module, inp, out):
            # inp[0] shape == (batch_size, in_channels=256)
            clip_embs.append(inp[0].detach().cpu().squeeze(0))
        handle = last_lin.register_forward_hook(_hook_fn)

        # 6-4) 클립별 forward
        if isinstance(raw_inputs, list):
            for clip in raw_inputs:
                inp = clip.unsqueeze(0).to(DEVICE)  # (1, C, T, V, M)
                runner.model.forward(inp, data_samples, mode="predict")
        else:
            # dict 혹은 Tensor 형태
            inp = raw_inputs
            if torch.is_tensor(inp):
                inp = inp.unsqueeze(0).to(DEVICE)
            else:
                # dict of tensors
                inp = {k: v.unsqueeze(0).to(DEVICE) if torch.is_tensor(v) and v.dim()==4 
                       else v.to(DEVICE) if torch.is_tensor(v) 
                       else v
                       for k, v in inp.items()}
            runner.model.forward(inp, data_samples, mode="predict")

        # hook 해제
        handle.remove()

        # 6-5) 클립별 임베딩 평균 → (256,)
        video_emb = torch.stack(clip_embs, dim=0).mean(dim=0)
        final_embs.append(video_emb.numpy())

# 7) 결과 저장
emb_array   = np.stack(final_embs, axis=0)       # (N, 256)
label_array = np.array(final_labels, dtype=np.int64)  # (N,)

np.save(OUTPUT_EMB, emb_array)
np.save(OUTPUT_LABELS, label_array)

print(f"✅ Saved embeddings: {emb_array.shape} → {OUTPUT_EMB}")
print(f"✅ Saved labels:     {label_array.shape} → {OUTPUT_LABELS}")


07/20 01:11:10 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: win32
    Python: 3.8.20 (default, Oct  3 2024, 15:19:54) [MSC v.1929 64 bit (AMD64)]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 423575249
    GPU 0: NVIDIA GeForce RTX 4060 Laptop GPU
    CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6
    NVCC: Cuda compilation tools, release 12.6, V12.6.20
    MSVC: n/a, reason: fileno
    PyTorch: 2.1.2+cu121
    PyTorch compiling details: PyTorch built with:
  - C++ Version: 199711
  - MSVC 192930151
  - Intel(R) Math Kernel Library Version 2020.0.2 Product Build 20200624 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 2019
  - LAPACK is enabled (usually provided by MKL)
  - CPU capability usage: AVX512
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=c

In [None]:
#!/usr/bin/env python3
import os, sys

# (필요시) 중복 OpenMP 런타임 허용
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

# 1) mmaction2 루트로 이동 & 모듈 경로 등록
BASE_DIR = r"D:\mmaction2"
os.chdir(BASE_DIR)
sys.path.insert(0, BASE_DIR)

import numpy as np
import torch
import torch.nn as nn
from mmengine.config import Config
from mmengine.runner import Runner, load_checkpoint

# 2) 설정
CFG_PATH      = "configs/skeleton/stgcnpp/my_stgcnpp.py"
CKPT_PATH     = "checkpoints/stgcnpp_8xb16-joint-u100-80e_ntu60-xsub-keypoint-2d_20221228-86e1e77a.pth"
INPUT_PKL     = r"D:\golfDataset\dataset\train\crop_pkl\skeleton_dataset_90_10.pkl"
OUTPUT_EMB    = r"D:\Jabez\golf\fusion\embbeding_data\stgcnpp\train_embeddings.npy"
OUTPUT_LABELS = r"D:\Jabez\golf\fusion\embbeding_data\stgcnpp\train_labels.npy"
DEVICE        = "cuda:0"
# ---------------------------------------------

# 3) Config 로드 & ann_file 덮어쓰기
cfg = Config.fromfile(CFG_PATH)
if hasattr(cfg, "test_dataloader"):
    cfg.test_dataloader.dataset.ann_file = INPUT_PKL
else:
    cfg.data.test.ann_file = INPUT_PKL

# 4) Runner 생성 & 모델만 로드 (헤드 매칭 무시)
runner = Runner.from_cfg(cfg)
load_checkpoint(runner.model, CKPT_PATH, map_location="cpu", strict=False)
runner.model.to(DEVICE).eval()

# 5) cls_head 마지막 Linear 모듈 찾기
last_lin = None
for m in runner.model.cls_head.modules():
    if isinstance(m, nn.Linear):
        last_lin = m
if last_lin is None:
    raise RuntimeError("cls_head에 nn.Linear 레이어가 없습니다.")

# 6) test loop: 비디오 당 하나의 embedding & label 수집
final_embs = []
final_labels = []

with torch.no_grad():
    for batch in runner.test_dataloader:
        # 6-1) GT 라벨
        data_samples = batch["data_samples"]
        gt_label = int(data_samples[0].gt_label)
        final_labels.append(gt_label)

        # 6-2) raw_inputs 추출 (리스트 또는 Tensor)
        raw_inputs = batch["inputs"]

        # 6-3) 클립별로 hook → 임베딩 수집 → 평균
        clip_embs = []
        # 클립 임베딩을 모아줄 임시 리스트
        def _hook_fn(_, __, out):
            # out: (1, D)
            clip_embs.append(out.detach().cpu().squeeze(0))
        handle = last_lin.register_forward_hook(_hook_fn)

        # 리스트인 경우 클립별로, 아니면 한 번만
        if isinstance(raw_inputs, list):
            for clip in raw_inputs:
                inp = clip.unsqueeze(0).to(DEVICE)  # (1, C, T, V, M)
                runner.model.forward(inp, data_samples, mode="predict")
        else:
            inp = raw_inputs.unsqueeze(0).to(DEVICE) if raw_inputs.dim()==4 else raw_inputs.to(DEVICE)
            runner.model.forward(inp, data_samples, mode="predict")

        handle.remove()

        # 클립별 임베딩 평균 → (D,)
        video_emb = torch.stack(clip_embs, dim=0).mean(dim=0)
        final_embs.append(video_emb.numpy())

# 7) 결과 저장
emb_array = np.stack(final_embs, axis=0)  # (N, D)
label_array = np.array(final_labels, dtype=np.int64)  # (N,)

np.save(OUTPUT_EMB, emb_array)
np.save(OUTPUT_LABELS, label_array)

print(f"✅ Saved embeddings: {emb_array.shape} → {OUTPUT_EMB}")
print(f"✅ Saved labels:     {label_array.shape} → {OUTPUT_LABELS}")
