In [1]:
## 데이터에서 키포인트를 제외하고 앵글만으로 학습 
## 레이어 깊게 만듦 num_layers=2 => 6

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

In [2]:
# prompt 전체 출력되게
np.set_printoptions(threshold=np.inf, linewidth=np.inf)

In [3]:
# 데이터 로드드
data = np.load('../data/seq1~3000_60fps_1741667844.npy')
data.shape

### label - idx mapping정보 가져오기
import pickle
with open('../data/label_to_idx.pickle', 'rb') as f:
    label_to_idx = pickle.load(f)
print(label_to_idx)

{np.str_('0'): 0, np.str_('1'): 1, np.str_('10'): 2, np.str_('100'): 3, np.str_('1000'): 4, np.str_('10000'): 5, np.str_('11'): 6, np.str_('112'): 7, np.str_('119'): 8, np.str_('12'): 9, np.str_('13'): 10, np.str_('14'): 11, np.str_('15'): 12, np.str_('16'): 13, np.str_('17'): 14, np.str_('18'): 15, np.str_('19'): 16, np.str_('2'): 17, np.str_('20'): 18, np.str_('21'): 19, np.str_('22'): 20, np.str_('23'): 21, np.str_('24'): 22, np.str_('25'): 23, np.str_('26'): 24, np.str_('27'): 25, np.str_('28'): 26, np.str_('29'): 27, np.str_('3'): 28, np.str_('30'): 29, np.str_('31'): 30, np.str_('32'): 31, np.str_('33'): 32, np.str_('34'): 33, np.str_('35'): 34, np.str_('36'): 35, np.str_('37'): 36, np.str_('38'): 37, np.str_('39'): 38, np.str_('4'): 39, np.str_('40'): 40, np.str_('41'): 41, np.str_('42'): 42, np.str_('43'): 43, np.str_('44'): 44, np.str_('45'): 45, np.str_('46'): 46, np.str_('47'): 47, np.str_('48'): 48, np.str_('49'): 49, np.str_('5'): 50, np.str_('50'): 51, np.str_('51'): 52, 

In [4]:
# 라벨 값 분리
x_data = data[:,:,168:-1] # 21*4 + 21*4 = 168개의 키포인트 제외
labels = data[:, 0, -1]

print(x_data.shape)
print(labels.shape)

(159020, 60, 30)
(159020,)


In [5]:
# train test data split
from sklearn.model_selection import train_test_split
x_data = x_data.astype(np.float32)
y_data = labels.astype(np.float32)

x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size = 0.1, random_state=42)

print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)


(143118, 60, 30) (143118,)
(15902, 60, 30) (15902,)


In [6]:
# 네트워크 변수 설정

batch_size = 64
seq_len = 60
num_angles = 30
num_classes = len(label_to_idx)

In [7]:
# 데이터 로더
from torch.utils.data import DataLoader, Dataset


class NumpyToTensorDataset(Dataset):
    def __init__(self, x, y):
        self.x = x  # numpy 배열
        self.y = y  # numpy 배열

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x_item = torch.tensor(self.x[idx], dtype=torch.float32)  # numpy → tensor 변환
        y_item = torch.tensor(self.y[idx], dtype=torch.long)  # numpy → tensor 변환
        return x_item, y_item

train_dataset = NumpyToTensorDataset(x_train, y_train)
test_dataset = NumpyToTensorDataset(x_val, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [None]:
# 모델 초기화

class Transformer(nn.Module):
    def __init__(self, num_angles, num_classes, d_model=128, num_heads=4, num_layers=6, dropout=0.1):
        super().__init__()
        self.embedding = nn.Linear(num_angles, d_model)# 각 프레임의 앵글 값을 d_model 차원으로 변환
        self.pos_encoder = nn.Parameter(torch.zeros(1,seq_len, d_model)) # 위치 인코딩

        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dropout=dropout) 
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        
        self.fc = nn.Linear(d_model, num_classes) 

    def forward(self, x):
        x = self.embedding(x) + self.pos_encoder # B, T, d_model
        x = self.transformer_encoder(x) # B,T, d_model
        x = x.mean(dim=1) # 전체 시퀀스에 대한 평균 (B, d_model)
        return self.fc(x)  # (B, num_classes)
    
num_layers=2

In [9]:
import torch.optim.lr_scheduler as lr_scheduler

model = Transformer(num_angles=num_angles, num_classes=num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)


best =0
# 간단한 학습 과정
epochs = 200

cpu




In [10]:
### setting wandb ###
wandb.init(project="sign-language-transformer3",

           config = {
               "batch_size": batch_size,
               "epochs": epochs,
               "learning_rate" : lr
           })
wandb.watch(model, log="all")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mhjkim1770[0m ([33mhjkim1770-gachon-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [11]:


for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

    test_loss = 0
    val_accuracy =0
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        acc = []
        for x, y in test_dataloader:
            x, y = x.to(device), y.to(device)

            outputs = model(x)
            _,predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            test_loss += criterion(outputs, y).item()
            acc.append(100*correct/total)
            val_accuracy = 100*correct/total
    test_loss = test_loss/total
    scheduler.step(test_loss)


        
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
    print('\t [Test] : Average loss : {:.4f}, Accuracy : {}/{}({:.0f}%)\n'
                      .format(test_loss,correct,total,val_accuracy))
    wandb.log({
        "train_loss": loss.item(),
        "learning rate": scheduler.get_last_lr()[0],
        "val_loss": test_loss,
        "val_accuracy": val_accuracy
    })

    if val_accuracy > best:
        best = val_accuracy
        torch.save(model.state_dict(), "../model/best_model.pth")
        

  y_item = torch.tensor(self.y[idx], dtype=torch.long)  # numpy → tensor 변환


Epoch 1, Loss: 0.3081, LR: 0.001000
	 [Test] : Average loss : 0.7338, Accuracy : 12852/15902(81%)

Epoch 2, Loss: 0.0259, LR: 0.001000
	 [Test] : Average loss : 0.7769, Accuracy : 12299/15902(77%)



KeyboardInterrupt: 

In [None]:
# 확인
from sklearn.metrics import confusion_matrix

y_true = []
y_pred = []

loaded_model = Transformer(num_angles=num_angles, num_classes=num_classes)
loaded_model.load_state_dict(torch.load("../model/transformer_60fps.pth", map_location=torch.device('cpu')))
test_loss = 0
with torch.no_grad():
    loaded_model.eval()
    correct = 0
    total = 0
    acc = []
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)

        outputs = loaded_model(x)
        _,predicted = torch.max(outputs.data, 1)
        y_true.append(y)
        y_pred.append(predicted)
        total += y.size(0)
        correct += (predicted == y).sum().item()
        test_loss += criterion(outputs, y).item()
        acc.append(100*correct/total)
        val_accuracy = 100*correct/total
    
    test_loss = test_loss/total
    print('\t [Test] : Average loss : {:.4f}, Accuracy : {}/{}({:.0f}%)\n'
                    .format(test_loss,correct,total,val_accuracy))
    
    y_true = torch.cat(y_true, dim=0)
    y_pred = torch.cat(y_pred, dim=0)

    cm = confusion_matrix(y_true.numpy(), y_pred.numpy())
    print(cm)


  y_item = torch.tensor(self.y[idx], dtype=torch.long)  # numpy → tensor 변환


	 [Test] : Average loss : 1.0147, Accuracy : 12784/15902(80%)

[[29  0  0 ...  0  0  0]
 [ 0 17  0 ...  0  0  0]
 [ 0  0 46 ...  0  0  0]
 ...
 [ 0  0  0 ...  7  0  0]
 [ 0  0  0 ...  0 11  0]
 [ 0  0  0 ...  0  0 32]]


In [None]:
print(cm) 

[[29  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0