In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

In [2]:
# =========================================================
# step1. Data pre-processing
# =========================================================
folder_path = "./ballet_train/"  # 放骨架 csv 的資料夾
all_poses = []

In [3]:
def parse_point(s):
    """把 ('x','y','z') 字串轉成 float list"""
    s = s.strip("()")
    parts = [float(p.strip(" '")) for p in s.split(",")]
    return parts

def csv_to_pose_array(csv_path):
    """讀取一個 CSV 檔，轉成骨架矩陣 (N_frames, 99)"""
    df = pd.read_csv(csv_path)
    pose_cols = [c for c in df.columns if c != "frame"]
    poses = []
    for i, row in df.iterrows():
        pose = []
        for c in pose_cols:
            pose += parse_point(row[c])
        poses.append(pose)
    return np.array(poses)

In [4]:
print("📂 開始讀取資料夾內所有 csv 檔...")
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        poses = csv_to_pose_array(file_path)
        all_poses.append(poses)

📂 開始讀取資料夾內所有 csv 檔...


In [5]:
# 合併所有檔案
all_poses = np.concatenate(all_poses, axis=0)
print("✅ 讀取完成，資料形狀：", all_poses.shape)  # (總幀數, 99)

✅ 讀取完成，資料形狀： (12641, 99)


In [6]:
# 標準化
scaler = StandardScaler()
all_poses = scaler.fit_transform(all_poses)

In [7]:
# =========================================================
# step2. Create Dataset and DataLoader
# =========================================================
class PoseDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

dataset = PoseDataset(all_poses)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

In [8]:
# =========================================================
# step3. Defining the VQ-VAE model
# =========================================================
class VectorQuantizer(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.embedding.weight.data.uniform_(-1/num_embeddings, 1/num_embeddings)

    def forward(self, x):
        # x: (batch, latent_dim)
        distances = (
            torch.sum(x**2, dim=1, keepdim=True)
            + torch.sum(self.embedding.weight**2, dim=1)
            - 2 * torch.matmul(x, self.embedding.weight.t())
        )
        encoding_indices = torch.argmin(distances, dim=1)
        quantized = self.embedding(encoding_indices)
        return quantized, encoding_indices

In [9]:
class VQVAE(nn.Module):
    def __init__(self, input_dim=99, hidden_dim=128, latent_dim=32, num_embeddings=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim)
        )
        self.vq = VectorQuantizer(num_embeddings, latent_dim)
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        z_q, indices = self.vq(z)
        x_recon = self.decoder(z_q)
        return x_recon, indices, z, z_q

In [10]:
# =========================================================
# step4. Training the model
# =========================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
model = VQVAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

EPOCHS = 20
print("🚀 開始訓練 VQ-VAE 模型...")

🚀 開始訓練 VQ-VAE 模型...


In [11]:
for epoch in range(EPOCHS):
    total_loss = 0
    for batch in dataloader:
        batch = batch.to(device)
        optimizer.zero_grad()

        x_recon, indices, z, z_q = model(batch)
        recon_loss = loss_fn(x_recon, batch)
        vq_loss = torch.mean((z_q.detach() - z)**2) + 0.25 * torch.mean((z_q - z.detach())**2)
        loss = recon_loss + vq_loss

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.6f}")

print("✅ 訓練完成！")

Epoch 1/20, Loss: 1.005913
Epoch 2/20, Loss: 0.900204
Epoch 3/20, Loss: 0.764241
Epoch 4/20, Loss: 0.758410
Epoch 5/20, Loss: 0.774953
Epoch 6/20, Loss: 0.789111
Epoch 7/20, Loss: 0.797114
Epoch 8/20, Loss: 0.797204
Epoch 9/20, Loss: 0.798800
Epoch 10/20, Loss: 0.798049
Epoch 11/20, Loss: 0.802491
Epoch 12/20, Loss: 0.810797
Epoch 13/20, Loss: 0.811931
Epoch 14/20, Loss: 0.813884
Epoch 15/20, Loss: 0.814514
Epoch 16/20, Loss: 0.813715
Epoch 17/20, Loss: 0.812585
Epoch 18/20, Loss: 0.814700
Epoch 19/20, Loss: 0.816158
Epoch 20/20, Loss: 0.819582
✅ 訓練完成！


In [12]:
# =========================================================
# step5. Storing model weights
# =========================================================
save_path = "vqvae_ballet.pth"
torch.save(model.state_dict(), save_path)
print(f"💾 模型已儲存至：{save_path}")

💾 模型已儲存至：vqvae_ballet.pth
