## 필요한 모듈 선언 ##

In [None]:
# 필요한 모듈 선언
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import os
import json
from sklearn.preprocessing import LabelEncoder

In [None]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
print('device : ', device)

In [None]:
folder_path = 'morpheme/01'

# 단어들을 저장할 리스트
word_list = []

# 파일 이름 얻어오기
file_names = [f for f in os.listdir(folder_path) if f.endswith('.json') and "F_morpheme" in f]

# 파일 이름을 번호 순서대로 정렬하기
file_names.sort(key=lambda x: int(x.split('_')[2][4:]))

for filename in file_names:
    file_path = os.path.join(folder_path, filename)
    
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
        
        # 'data' 키 안의 요소들 순회
        for item in data['data']:
            for attribute in item['attributes']:
                word_list.append(attribute['name'])

# 결과 출력 
print(len(word_list))
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(word_list)
# label_mapping 딕셔너리 생성
label_mapping = {f"NIA_SL_WORD{str(i+1).zfill(4)}_REAL01_F": encoded_labels[i] for i,word in enumerate(word_list)}
print(label_mapping)

In [None]:
print(label_encoder.inverse_transform([1941]))

## Dataset 클래스 선언 ##

In [None]:
# 임시!!!!!
class SignLanguageDataset(Dataset):
    def __init__(self, data_dir, folder_to_label):
        self.data_dir = data_dir
        self.folder_to_label = folder_to_label
        self.data, self.labels = self.load_data()

    def load_data(self):
        file_list = []
        labels = []
        for folder_name in os.listdir(self.data_dir):
            if folder_name.endswith("F") and folder_name in self.folder_to_label:  # "F"로 끝나는 폴더만 처리
                label = self.folder_to_label[folder_name]
                label = int(label)
                label_name = label_encoder.inverse_transform([label])
                folder_path = os.path.join(self.data_dir, folder_name)
                if os.path.isdir(folder_path):
                    json_files = [os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path) if file_name.endswith('.json')]
                    file_list.append(json_files)
                    labels.append(label)
                    print(f"Label: {label_name},label_num: {label} Folder: {folder_name}, Frame count: {len(json_files)}")
        return file_list, labels

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        all_keypoints = []
        
        json_file_list = self.data[index]
        label = self.labels[index]
        for file_path in json_file_list:
            with open(file_path, 'r') as f:
                data = json.load(f)
            frame_data = data['people']
            if frame_data:
                keypoints_2d = np.array(frame_data['face_keypoints_2d'] +
                                        frame_data['pose_keypoints_2d'] +
                                        frame_data['hand_left_keypoints_2d'] +
                                        frame_data['hand_right_keypoints_2d'])
                keypoints_2d = keypoints_2d.reshape(-1, 3)[:, :2].flatten()  # (num_keypoints * 2,)
                
                keypoints_3d = np.array(frame_data['face_keypoints_3d'] +
                                        frame_data['pose_keypoints_3d'] +
                                        frame_data['hand_left_keypoints_3d'] +
                                        frame_data['hand_right_keypoints_3d'])
                keypoints_3d = keypoints_3d.reshape(-1, 4)[:, :3].flatten() 
                
                keypoints = np.concatenate((keypoints_2d, keypoints_3d))
                all_keypoints.append(keypoints)
        
        all_keypoints = np.array(all_keypoints)  # (num_frames, num_keypoints * (2 + 3))
        return torch.tensor(all_keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [None]:
root_dir = "keypoints/01"

In [None]:
print(os.listdir(root_dir))
label = int((''.join(filter(str.isdigit, 'NIA_SL_WORD0101_REAL01_L')))[0:4])


In [None]:
print(label_mapping)
print(label)

In [None]:
dataset = SignLanguageDataset(root_dir, label_mapping)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)  


In [None]:
for batch_idx, (inputs, labels) in enumerate(dataloader):
    print(f"Batch {batch_idx}:")
    print(f"Inputs: {inputs}")
    print(f"Labels: {labels}")
    print(f"Inputs shape: {inputs.shape}")
    print(f"Labels shape: {labels.shape}")
    
    # 배치 2개만 출력하고 종료
    if batch_idx == 1:
        break

## 모델 선언 ##

In [None]:
# 모델 하이퍼파라미터 설정
num_keypoints = 138  # face, pose, hand_left, hand_right keypoints
input_dim = 685 # 각 키포인트의 2D 좌표(2)와 3D 좌표(3)를 사용
model_dim = 512  # 모델 차원
num_heads = 16  # 멀티헤드 어텐션의 헤드 수
num_layers = 8  # Transformer 레이어 수
num_classes = 2771  # 출력 클래스 수

class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, num_classes):
        super(TransformerModel, self).__init__()
        self.input_fc = nn.Linear(input_dim, model_dim)
        encoder_layers = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads,batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(model_dim, num_classes)
    
    def forward(self, x):
        x = self.input_fc(x)
        x = self.transformer_encoder(x)  # transformer 대신 transformer_encoder 사용
        x = x.mean(dim=1)  # 시퀀스 차원 축소
        x = self.fc(x)
        return x
    
    

model = TransformerModel(input_dim, model_dim, num_heads, num_layers, num_classes)

In [None]:
# 손실 함수 및 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.train()
min_loss = 100
num_epochs = 100
num_batches_to_train = 100  # 학습시킬 데이터 수

for epoch in range(num_epochs):
    batch_count = 0
    for inputs, labels in dataloader:
        
        mean = torch.mean(inputs)
        std = torch.std(inputs)
        inputs = (inputs - mean) / std
        
        inputs, labels = inputs.to(device), labels.to(device)  # 입력 데이터와 라벨을 GPU로 전송
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        ll = loss.item()
        if ll < min_loss:
            min_loss = ll
        
        batch_count += 1
        if batch_count >= num_batches_to_train:
            break
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'Minimum Loss: {min_loss:.4f}')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# 모델을 평가 모드로 전환
model.eval()

# 평가 데이터를 로드 (학습 데이터와 동일한 데이터로 테스트하는 경우 예제)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

# 평가 지표 초기화
correct = 0
total = 0

# 평가 모드에서 그라디언트 계산 비활성화
with torch.no_grad():
    for inputs, labels in dataloader:
        
        mean = torch.mean(inputs)
        std = torch.std(inputs)
        inputs = (inputs - mean) / std
        
        inputs, labels = inputs.to(device), labels.to(device)  # 입력 데이터와 라벨을 GPU로 전송
        
        # 모델 출력 계산
        outputs = model(inputs)
        
        # 소프트맥스 함수로 확률로 변환
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        
        # 가장 높은 확률을 가진 클래스의 인덱스 구하기
        _, predicted_class = torch.max(probabilities, 1)
        
        # 정확도 계산
        total += labels.size(0)
        correct += (predicted_class == labels).sum().item()
        
        # 예측된 클래스 인덱스를 원래 라벨로 변환
        predicted_label = word_list[predicted_class.item()]
        
        # 출력
        print(f"Actual Label (encoded): {labels.item()}")
        print(f"Predicted Label (encoded): {predicted_class.item()}")
        print(f"Predicted Label: {predicted_label}")
        print('-' * 30)
        

# 전체 정확도 출력
accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')
        