In [4]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from pathlib import Path

BASE   = Path(r"D:\Jabez\golf\fusion\embbeding_data\stgcnpp")
SELECT = "train"          # "train" or "valid"

EMB_PATH   = BASE / SELECT / "embeddings.npy"
LABEL_PATH = BASE / SELECT / "labels.npy"

# 1) 데이터 로드
embeddings = np.load(EMB_PATH)
labels     = np.squeeze(np.load(LABEL_PATH))

# 2) 기본 정보
print(f"Embeddings shape: {embeddings.shape}")
print(f"Labels shape    : {labels.shape}")

# 3) 클래스 분포
unique, counts = np.unique(labels, return_counts=True)
dist_df = pd.DataFrame({"class": unique, "count": counts})
print("\nClass distribution:")
print(dist_df)

# 4) NaN 포함 여부 확인 ------------------------------------
#   → 모든 비특징 차원(axis≥1)에 대해 하나라도 NaN이면 해당 샘플을 True 로 표시
nan_rows = np.isnan(embeddings).any(axis=tuple(range(1, embeddings.ndim)))
num_nan  = nan_rows.sum()
print(f"\n⚠️  NaN 포함 샘플 수: {num_nan} / {len(nan_rows)}")
if num_nan:
    print("   예시 인덱스:", np.where(nan_rows)[0][:10])

# (선택) NaN을 0으로 바꿔 후속 통계·시각화를 이어가려면:
clean_emb = np.nan_to_num(embeddings, nan=0.0, posinf=0.0, neginf=0.0)

# 5) 임베딩 노름 통계 (NaN 치환본 사용)
norms = np.linalg.norm(clean_emb.reshape(clean_emb.shape[0], -1), axis=1)
print(f"Embedding norms → mean: {norms.mean():.4f}, std: {norms.std():.4f}")


Embeddings shape: (392, 10, 256)
Labels shape    : (392,)

Class distribution:
   class  count
0      0    198
1      1    194

⚠️  NaN 포함 샘플 수: 0 / 392
Embedding norms → mean: 20.0341, std: 2.5646


In [5]:
import numpy as np
import torch
from mmengine.config import Config
from mmengine.runner import load_checkpoint
from mmaction.registry import MODELS
from sklearn.metrics import accuracy_score

# 파일 경로
EMB_PATH   = "train_embeddings.npy"
LABEL_PATH = "train_labels.npy"
CFG_PATH   = r"D:\mmaction2\configs\skeleton\stgcnpp\my_stgcnpp.py"
CKPT_PATH  = r"D:\mmaction2\checkpoints\stgcnpp_8xb16-joint-u100-80e_ntu60-xsub-keypoint-2d_20221228-86e1e77a.pth"

# 1) 임베딩·라벨 로드
embeddings = np.load(EMB_PATH)   # (N,10,256) or (N,256)
labels     = np.load(LABEL_PATH) # (N,)

# 비디오 단위 평균
if embeddings.ndim == 3:
    embeddings = embeddings.mean(axis=1)  # (N,256)

# 2) 체크포인트 불러와서 orig_num_classes 추출
ckpt = torch.load(CKPT_PATH, map_location="cpu")
state_dict = ckpt.get("state_dict", ckpt)
orig_num_classes = state_dict["cls_head.fc.weight"].shape[0]
print(f"원본 num_classes = {orig_num_classes}")

# 3) config 로드 후 cls_head.num_classes 덮어쓰기
cfg = Config.fromfile(CFG_PATH)
cfg.model.cls_head.num_classes = orig_num_classes

# 4) 모델 빌드 및 가중치 로드
model = MODELS.build(cfg.model).to("cpu")
load_checkpoint(model, CKPT_PATH, map_location="cpu", strict=False)

# 5) fc 레이어 가져오기
fc = model.cls_head.fc  # nn.Linear(256 -> orig_num_classes)

# 6) 임베딩 → logits 재생성 & 예측
with torch.no_grad():
    emb_tensor = torch.from_numpy(embeddings).float()       # (N,256)
    logits     = emb_tensor @ fc.weight.t() + fc.bias.unsqueeze(0)  # (N,orig_num_classes)
    preds      = logits.argmax(dim=1).numpy()               # (N,)

# 7) 정확도 계산
acc = accuracy_score(labels, preds)
print(f"🔍 Reconstructed head accuracy: {acc*100:.2f}%")

# 8) 샘플별 비교 (최대 5개)
for i in range(min(5, len(labels))):
    print(f"Sample {i}: GT={labels[i]}, Pred={preds[i]}")


원본 num_classes = 60
Loads checkpoint by local backend from path: D:\mmaction2\checkpoints\stgcnpp_8xb16-joint-u100-80e_ntu60-xsub-keypoint-2d_20221228-86e1e77a.pth
🔍 Reconstructed head accuracy: 0.00%
Sample 0: GT=1, Pred=11
Sample 1: GT=1, Pred=43
Sample 2: GT=1, Pred=10
Sample 3: GT=1, Pred=29
Sample 4: GT=1, Pred=11
