In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

In [2]:
# -------------------------------
# Step 1: 讀取 CSV 並轉成骨架向量
# -------------------------------
data_dir = "./dance_csv/"  # CSV 資料夾
all_poses = []

def parse_point(s):
    s = s.strip("()")
    parts = [float(p.strip(" '")) for p in s.split(",")]
    return parts

for file in os.listdir(data_dir):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(data_dir, file))
        pose_cols = [c for c in df.columns if c != "frame"]
        poses = []
        for _, row in df.iterrows():
            pose = []
            for c in pose_cols:
                pose += parse_point(row[c])
            poses.append(pose)
        all_poses.append(np.array(poses))

all_poses = np.concatenate(all_poses, axis=0)
print("骨架資料 shape:", all_poses.shape)

骨架資料 shape: (271194, 99)


In [3]:
# -------------------------------
# Step 2: 標準化
# -------------------------------
scaler = StandardScaler()
all_poses = scaler.fit_transform(all_poses)

In [4]:
# -------------------------------
# Step 3: Dataset / DataLoader
# -------------------------------
class PoseDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

dataset = PoseDataset(all_poses)
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

In [5]:
# -------------------------------
# Step 4: 載入模型（確保與訓練一致）
# -------------------------------
import torch.nn as nn

class VectorQuantizer(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.embedding.weight.data.uniform_(-1/num_embeddings, 1/num_embeddings)

    def forward(self, x):
        distances = (
            torch.sum(x**2, dim=1, keepdim=True)
            + torch.sum(self.embedding.weight**2, dim=1)
            - 2 * torch.matmul(x, self.embedding.weight.t())
        )
        encoding_indices = torch.argmin(distances, dim=1)
        quantized = self.embedding(encoding_indices)
        return quantized, encoding_indices

class VQVAE(nn.Module):
    def __init__(self, input_dim=99, hidden_dim=128, latent_dim=32, num_embeddings=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim)
        )
        self.vq = VectorQuantizer(num_embeddings, latent_dim)
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
    def forward(self, x):
        z = self.encoder(x)
        z_q, indices = self.vq(z)
        x_recon = self.decoder(z_q)
        return x_recon, indices, z, z_q

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = VQVAE().to(device)
model_path = "vqvae_model.pth"
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
print("✅ 模型已載入")

✅ 模型已載入


  model.load_state_dict(torch.load(model_path, map_location=device))


In [7]:
# -------------------------------
# Step 5: 收集 codebook index
# -------------------------------
all_indices = []

with torch.no_grad():
    for batch in dataloader:
        batch = batch.to(device)
        _, indices, _, _ = model(batch)
        all_indices.extend(indices.cpu().numpy().tolist())

In [8]:
# -------------------------------
# Step 6: 統計 codebook 使用率
# -------------------------------
unique, counts = np.unique(all_indices, return_counts=True)
distribution = dict(zip(unique, counts))

print("📊 Codebook 使用統計 (index: count)：")
for k, v in distribution.items():
    print(f"Index {k}: {v} 次")

📊 Codebook 使用統計 (index: count)：
Index 30: 191656 次
Index 34: 79538 次


In [9]:
# -------------------------------
# Step 7: 將 index 轉成符號
# -------------------------------
def index_to_symbol(i):
    symbols = [chr(c) for c in range(65, 91)] + [chr(c) for c in range(97, 123)] + [str(d) for d in range(10)]
    return symbols[i % len(symbols)]

symbol_sequence = [index_to_symbol(i) for i in all_indices]

print("符號序列預覽：", "".join(symbol_sequence[:200]))

符號序列預覽： eeeeeeeeeeiieeiieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeiiiiii
