## 필요한 모듈 선언 ##

In [1]:
# 필요한 모듈 선언
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import os
import json
from sklearn.preprocessing import LabelEncoder
import math

In [2]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
print('device : ', device)

device :  cuda


In [3]:
def get_word_list(start, end):
    folder_path = 'morpheme/01'

    # 단어들을 저장할 리스트
    word_list = []

    # 파일 이름 얻어오기
    file_names = [f for f in os.listdir(folder_path) if f.endswith('.json') and "F_morpheme" in f]

    # 파일 이름을 번호 순서대로 정렬하기
    file_names.sort(key=lambda x: int(x.split('_')[2][4:]))

    for filename in file_names:
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            
            # 'data' 키 안의 요소들 순회
            for item in data['data']:
                for attribute in item['attributes']:
                    word_list.append(attribute['name'])

    # 결과 출력 
    #print(len(word_list))
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(word_list)
    # label_mapping 딕셔너리 생성
    folder_to_label = {}
    for i, word in enumerate(word_list):
        for j in range(start, end + 1):  # 01부터 07까지
            folder_name = f"NIA_SL_WORD{str(i+1).zfill(4)}_REAL{str(j).zfill(2)}_F"
            folder_to_label[folder_name] = encoded_labels[i]
    return folder_to_label, label_encoder, word_list

In [4]:
folder_to_label, label_encoder, word_list = get_word_list(1, 1)
print(folder_to_label)

{'NIA_SL_WORD0001_REAL01_F': np.int64(178), 'NIA_SL_WORD0002_REAL01_F': np.int64(1147), 'NIA_SL_WORD0003_REAL01_F': np.int64(1426), 'NIA_SL_WORD0004_REAL01_F': np.int64(486), 'NIA_SL_WORD0005_REAL01_F': np.int64(545), 'NIA_SL_WORD0006_REAL01_F': np.int64(633), 'NIA_SL_WORD0007_REAL01_F': np.int64(1960), 'NIA_SL_WORD0008_REAL01_F': np.int64(882), 'NIA_SL_WORD0009_REAL01_F': np.int64(1458), 'NIA_SL_WORD0010_REAL01_F': np.int64(2101), 'NIA_SL_WORD0011_REAL01_F': np.int64(1581), 'NIA_SL_WORD0012_REAL01_F': np.int64(1640), 'NIA_SL_WORD0013_REAL01_F': np.int64(1680), 'NIA_SL_WORD0014_REAL01_F': np.int64(1833), 'NIA_SL_WORD0015_REAL01_F': np.int64(1739), 'NIA_SL_WORD0016_REAL01_F': np.int64(1485), 'NIA_SL_WORD0017_REAL01_F': np.int64(550), 'NIA_SL_WORD0018_REAL01_F': np.int64(461), 'NIA_SL_WORD0019_REAL01_F': np.int64(1586), 'NIA_SL_WORD0020_REAL01_F': np.int64(2138), 'NIA_SL_WORD0021_REAL01_F': np.int64(1721), 'NIA_SL_WORD0022_REAL01_F': np.int64(2495), 'NIA_SL_WORD0023_REAL01_F': np.int64(1

In [4]:
print(label_encoder.inverse_transform([2632]))

['하는수없다']


## Dataset 클래스 선언 ##

In [5]:
# 임시!!!!!
class SignLanguageDataset(Dataset):
    def __init__(self, data_dir, folder_to_label):
        self.data_dir = data_dir
        self.folder_to_label = folder_to_label
        self.data, self.labels = self.load_data()

    def load_data(self):
        file_list = []
        labels = []
        for folder_name in os.listdir(self.data_dir):
            if folder_name.endswith("F") and folder_name in self.folder_to_label:  # "F"로 끝나는 폴더만 처리
                label = self.folder_to_label[folder_name]
                label = int(label)
                label_name = label_encoder.inverse_transform([label])
                folder_path = os.path.join(self.data_dir, folder_name)
                if os.path.isdir(folder_path):
                    json_files = [os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path) if file_name.endswith('.json')]
                    file_list.append(json_files)
                    labels.append(label)
                    print(f"Label: {label_name},label_num: {label} Folder: {folder_name}, Frame count: {len(json_files)}")
        return file_list, labels
    

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        all_keypoints = []
        
        json_file_list = self.data[index]
        label = self.labels[index]
        for file_path in json_file_list:
            with open(file_path, 'r') as f:
                data = json.load(f)
            frame_data = data['people']
            if frame_data:
                keypoints_2d = np.array(frame_data['face_keypoints_2d'] +
                                        frame_data['pose_keypoints_2d'] +
                                        frame_data['hand_left_keypoints_2d'] +
                                        frame_data['hand_right_keypoints_2d'])
                keypoints_2d = keypoints_2d.reshape(-1, 3)[:, :2].flatten()  # (num_keypoints * 2,)
                
                keypoints_3d = np.array(frame_data['face_keypoints_3d'] +
                                        frame_data['pose_keypoints_3d'] +
                                        frame_data['hand_left_keypoints_3d'] +
                                        frame_data['hand_right_keypoints_3d'])
                keypoints_3d = keypoints_3d.reshape(-1, 4)[:, :3].flatten() 
                
                keypoints = np.concatenate((keypoints_2d, keypoints_3d))
                all_keypoints.append(keypoints)
        
        all_keypoints = np.array(all_keypoints)  # (num_frames, num_keypoints * (2 + 3))
        return torch.tensor(all_keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [6]:
# 리얼리얼 임시
class SignLanguageDataset(Dataset):
    def __init__(self, data_dir, folder_to_label, start, end, label_encoder):
        self.data_dir = data_dir
        self.folder_to_label = folder_to_label
        self.load_num_start = start
        self.load_num_end = end
        self.label_encoder = label_encoder
        self.data, self.labels = self.load_data()

    def load_data(self):
        file_list = []
        labels = []
        for subdir in range(self.load_num_start, self.load_num_end + 1):  # 01~07 디렉토리
            subdir_path = os.path.join(self.data_dir, f'{subdir:02d}')
            for folder_name in os.listdir(subdir_path):
                if folder_name.endswith("F") and folder_name in self.folder_to_label:  # "F"로 끝나는 폴더만 처리
                    label = self.folder_to_label[folder_name]
                    label = int(label)
                    label_name = label_encoder.inverse_transform([label])
                    folder_path = os.path.join(subdir_path, folder_name)
                    if os.path.isdir(folder_path):
                        json_files = [os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path) if file_name.endswith('.json')]
                        file_list.append(json_files)
                        labels.append(label)
                        print(f"Label: {label_name},label_num: {label} Folder: {folder_name}, Frame count: {len(json_files)}")
        return file_list, labels
    

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        all_keypoints = []
        
        json_file_list = self.data[index]
        label = self.labels[index]
        for file_path in json_file_list:
            with open(file_path, 'r') as f:
                data = json.load(f)
            frame_data = data['people']
            if frame_data:
                keypoints_2d = np.array(frame_data['face_keypoints_2d'] +
                                        frame_data['pose_keypoints_2d'] +
                                        frame_data['hand_left_keypoints_2d'] +
                                        frame_data['hand_right_keypoints_2d'])
                keypoints_2d = keypoints_2d.reshape(-1, 3)[:, :2].flatten()  # (num_keypoints * 2,)
                
                keypoints_3d = np.array(frame_data['face_keypoints_3d'] +
                                        frame_data['pose_keypoints_3d'] +
                                        frame_data['hand_left_keypoints_3d'] +
                                        frame_data['hand_right_keypoints_3d'])
                keypoints_3d = keypoints_3d.reshape(-1, 4)[:, :3].flatten() 
                
                keypoints = np.concatenate((keypoints_2d, keypoints_3d))
                all_keypoints.append(keypoints)
        
        all_keypoints = np.array(all_keypoints)  # (num_frames, num_keypoints * (2 + 3))
        return torch.tensor(all_keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [None]:
print(len(dta))

In [7]:
from torch.nn.utils.rnn import pad_sequence
# Collate 함수 정의
def collate_fn(batch):
    keypoints, labels = zip(*batch)
    keypoints = [torch.tensor(k) for k in keypoints]
    labels = torch.tensor(labels)
    keypoints_padded = pad_sequence(keypoints, batch_first=True, padding_value=0)
    lengths = torch.tensor([len(k) for k in keypoints])
    return keypoints_padded, labels, lengths

In [8]:
root_dir = "keypoints"

In [6]:
print(os.listdir(root_dir))
label = int((''.join(filter(str.isdigit, 'NIA_SL_WORD0101_REAL01_L')))[0:4])


['NIA_SL_WORD1731_REAL01_L', 'NIA_SL_WORD2775_REAL01_U', 'NIA_SL_WORD1948_REAL01_R', 'NIA_SL_WORD1083_REAL01_R', 'NIA_SL_WORD2918_REAL01_R', 'NIA_SL_WORD1709_REAL01_D', 'NIA_SL_WORD1075_REAL01_U', 'NIA_SL_WORD0223_REAL01_R', 'NIA_SL_WORD2794_REAL01_R', 'NIA_SL_WORD0687_REAL01_R', 'NIA_SL_WORD0019_REAL01_D', 'NIA_SL_WORD0438_REAL01_L', 'NIA_SL_WORD0293_REAL01_L', 'NIA_SL_WORD2787_REAL01_R', 'NIA_SL_WORD2515_REAL01_U', 'NIA_SL_WORD2600_REAL01_D', 'NIA_SL_WORD1123_REAL01_D', 'NIA_SL_WORD1025_REAL01_U', 'NIA_SL_WORD1923_REAL01_F', 'NIA_SL_WORD1533_REAL01_L', 'NIA_SL_WORD1081_REAL01_L', 'NIA_SL_WORD2814_REAL01_L', 'NIA_SL_WORD2298_REAL01_U', 'NIA_SL_WORD2050_REAL01_D', 'NIA_SL_WORD1890_REAL01_L', 'NIA_SL_WORD0141_REAL01_U', 'NIA_SL_WORD0973_REAL01_F', 'NIA_SL_WORD2832_REAL01_F', 'NIA_SL_WORD0164_REAL01_L', 'NIA_SL_WORD0462_REAL01_U', 'NIA_SL_WORD2981_REAL01_D', 'NIA_SL_WORD2439_REAL01_L', 'NIA_SL_WORD0457_REAL01_D', 'NIA_SL_WORD0958_REAL01_F', 'NIA_SL_WORD1274_REAL01_D', 'NIA_SL_WORD2596_RE

In [9]:
batch_size = 1
dataset = SignLanguageDataset(root_dir, folder_to_label, 1, 1, label_encoder)
dataloader = DataLoader(dataset, batch_size, collate_fn=collate_fn)  
print(dataloader)


Label: ['하는수없다'],label_num: 2632 Folder: NIA_SL_WORD1923_REAL01_F, Frame count: 116
Label: ['못견디다'],label_num: 786 Folder: NIA_SL_WORD0973_REAL01_F, Frame count: 114
Label: ['이백'],label_num: 1994 Folder: NIA_SL_WORD2832_REAL01_F, Frame count: 95
Label: ['칩거'],label_num: 2496 Folder: NIA_SL_WORD0958_REAL01_F, Frame count: 107
Label: ['임기응변'],label_num: 2089 Folder: NIA_SL_WORD2310_REAL01_F, Frame count: 147
Label: ['남원시청'],label_num: 493 Folder: NIA_SL_WORD1857_REAL01_F, Frame count: 135
Label: ['잘못하다'],label_num: 2122 Folder: NIA_SL_WORD1336_REAL01_F, Frame count: 143
Label: ['소불고기'],label_num: 1383 Folder: NIA_SL_WORD0097_REAL01_F, Frame count: 115
Label: ['백억'],label_num: 955 Folder: NIA_SL_WORD2991_REAL01_F, Frame count: 102
Label: ['흑백'],label_num: 2755 Folder: NIA_SL_WORD0349_REAL01_F, Frame count: 101
Label: ['불알'],label_num: 1119 Folder: NIA_SL_WORD0385_REAL01_F, Frame count: 99
Label: ['구'],label_num: 295 Folder: NIA_SL_WORD2640_REAL01_F, Frame count: 89
Label: ['희미'],label_num

In [41]:
print(len(dataloader))

188


In [10]:
for batch_idx, (inputs, labels, lengths) in enumerate(dataloader):
    print(f"Batch {batch_idx}:")
    print(f"Inputs: {inputs}")
    print(f"Labels: {labels}")
    print(f"Lengths: {lengths}")
    print(f"Inputs shape: {inputs.shape}")
    print(f"Labels shape: {labels.shape}")
    
    # 배치 2개만 출력하고 종료
    if batch_idx == 1:
        break

Batch 0:
Inputs: tensor([[[ 9.0718e+02,  2.3556e+02,  9.0718e+02,  ..., -1.8762e-01,
           5.0495e-01,  2.4338e+00],
         [ 9.1722e+02,  2.3588e+02,  9.1722e+02,  ..., -1.2512e-02,
           1.3704e-01,  2.1267e+00],
         [ 9.0717e+02,  2.3335e+02,  9.0717e+02,  ..., -1.9075e-01,
           5.0099e-01,  2.4050e+00],
         ...,
         [ 9.1302e+02,  2.4398e+02,  9.1382e+02,  ...,  5.0804e-02,
          -1.0943e-01,  2.1172e+00],
         [ 9.0551e+02,  2.3500e+02,  9.0551e+02,  ..., -1.9046e-01,
           5.0617e-01,  2.4234e+00],
         [ 9.1141e+02,  2.3443e+02,  9.1054e+02,  ..., -5.4972e-02,
           2.4577e-01,  2.1373e+00]]])
Labels: tensor([2632])
Lengths: tensor([116])
Inputs shape: torch.Size([1, 116, 685])
Labels shape: torch.Size([1])
Batch 1:
Inputs: tensor([[[ 8.9625e+02,  2.1753e+02,  8.9678e+02,  ..., -5.8807e-03,
           4.9475e-02,  2.3519e+00],
         [ 8.9733e+02,  2.1875e+02,  8.9733e+02,  ..., -9.9854e-02,
           1.0286e-01,  2.1635e

  keypoints = [torch.tensor(k) for k in keypoints]


In [43]:
class PositionalEncoding(nn.Module):
    def __init__(self, dim_model, dropout_p, max_len):
        super().__init__()
        
        self.dropout = nn.Dropout(dropout_p)
        
        # Encoding - From formula
        pos_encoding = torch.zeros(max_len, dim_model)
        positions_list = torch.arange(0, max_len, dtype=torch.float).view(-1, 1) # 0, 1, 2, 3, 4, 5
        division_term = torch.exp(torch.arange(0, dim_model, 2).float() * (-math.log(10000.0)) / dim_model) # 1000^(2i/dim_model)

        pos_encoding[:, 0::2] = torch.sin(positions_list * division_term)
        pos_encoding[:, 1::2] = torch.cos(positions_list * division_term)

        # Saving buffer (same as parameter without gradients needed)
        pos_encoding = pos_encoding.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pos_encoding", pos_encoding)
        
    def forward(self, token_embedding: torch.tensor) -> torch.tensor:
        # Residual connection + pos encoding
        return self.dropout(token_embedding + self.pos_encoding[:token_embedding.size(0), :])

## 모델 선언 ##

In [52]:
# 손실 함수 및 옵티마이저 설정
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, num_classes):
        super(TransformerModel, self).__init__()
        self.input_fc = nn.Linear(input_dim, model_dim)
        encoder_layers = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(model_dim, num_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)
        self.relu = nn.ReLU()  # 추가된 ReLU 활성화 함수
    
    def forward(self, x, src_key_padding_mask=None):
        x = self.input_fc(x)
        x = self.relu(x)  # 활성화 함수 적용
        x = self.transformer_encoder(x, src_key_padding_mask=src_key_padding_mask)
        x = x.mean(dim=1)  # 시퀀스 차원 축소
        x = self.fc(x)
        x = self.log_softmax(x)
        return x
    
model = tl.TransformerModel(input_dim, model_dim, num_heads, num_layers, num_classes).to(device)

In [54]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model.to(device)

min_loss = 100
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    batch_count = 0
    for inputs, labels, lengths in dataloader:
        
        mean = torch.mean(inputs)
        std = torch.std(inputs)
        inputs = (inputs - mean) / std
        
        inputs, labels = inputs.to(device), labels.to(device)  # 입력 데이터와 라벨을 GPU로 전송
        
        # 마스킹 생성
        src_key_padding_mask = (inputs.sum(dim=-1) == 0).to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs, src_key_padding_mask=src_key_padding_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        ll = loss.item()
        if ll < min_loss:
            min_loss = ll


    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'Minimum Loss: {min_loss:.4f}')

cuda


  keypoints = [torch.tensor(k) for k in keypoints]


Epoch [1/20], Loss: 7.9369
Epoch [2/20], Loss: 7.1457
Epoch [3/20], Loss: 6.5580
Epoch [4/20], Loss: 9.2144
Epoch [5/20], Loss: 8.8607
Epoch [6/20], Loss: 8.3717
Epoch [7/20], Loss: 8.1646
Epoch [8/20], Loss: 8.1160
Epoch [9/20], Loss: 8.0122
Epoch [10/20], Loss: 7.9000
Epoch [11/20], Loss: 7.8757
Epoch [12/20], Loss: 7.8605
Epoch [13/20], Loss: 7.8351
Epoch [14/20], Loss: 7.8129
Epoch [15/20], Loss: 7.7821
Epoch [16/20], Loss: 7.7583
Epoch [17/20], Loss: 7.7552
Epoch [18/20], Loss: 7.7464
Epoch [19/20], Loss: 7.7331
Epoch [20/20], Loss: 7.7264
Minimum Loss: 6.3876


In [62]:
def create_padding_mask(sequences, pad_token=0):
    return (sequences == pad_token)

# 모델을 평가 모드로 전환
model.eval()

# 평가 데이터를 로드 (학습 데이터와 동일한 데이터로 테스트하는 경우 예제)
#dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

# 평가 지표 초기화
correct = 0
total = 0

# 평가 모드에서 그라디언트 계산 비활성화
with torch.no_grad():
    for inputs, labels, lengths in dataloader:
        # 모델 출력 계산
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 패딩 마스크 생성
        src_key_padding_mask = create_padding_mask(inputs[:,:,0])
        src_key_padding_mask = src_key_padding_mask.to(device)
        
        outputs = model(inputs, src_key_padding_mask=src_key_padding_mask)  

        # 소프트맥스 함수로 확률로 변환
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        
        # 가장 높은 확률을 가진 클래스의 인덱스 구하기
        _, predicted_classes = torch.max(probabilities, 1)
        
        # 정확도 계산
        total += labels.size(0)
        correct += (predicted_classes == labels).sum().item()
        
        # 예측된 클래스 인덱스를 원래 라벨로 변환
        for predicted_class, label in zip(predicted_classes, labels):
            predicted_label = word_list[predicted_class.item()]
            # 출력
            print(f"Actual Label (encoded): {label.item()}")
            print(f"Predicted Label (encoded): {predicted_class.item()}")
            print(f"Predicted Label: {predicted_label}")
            print('-' * 30)
            break
            

# 전체 정확도 출력
accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')
        

  keypoints = [torch.tensor(k) for k in keypoints]


Actual Label (encoded): 2632
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 721
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 2098
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 1464
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 1553
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 1038
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 2681
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 1643
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Actual Label (encoded): 281
Predicted Label (encoded): 112
Predicted Label: 참외
------------------------------
Act

KeyboardInterrupt: 

In [32]:
print(max(probabilities[12]))

tensor(0.0012, device='cuda:0')


In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# CUDA 런타임 설정
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.train()
model.to(device)

min_loss = 100
num_epochs = 14
num_batches_to_train = 100

for epoch in range(num_epochs):
    
    batch_count = 0
    for inputs, labels in dataloader:
        
        mean = torch.mean(inputs)
        std = torch.std(inputs)
        inputs = (inputs - mean) / std
        
        inputs, labels = inputs.to(device), labels.to(device)  # 입력 데이터와 라벨을 GPU로 전송
        
        # 마스킹 생성
        src_key_padding_mask = (inputs.sum(dim=-1) == 0).to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs, src_key_padding_mask=src_key_padding_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        ll = loss.item()
        if ll < min_loss:
            min_loss = ll

        batch_count += inputs.size(0)
        if batch_count >= num_batches_to_train:
            break

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'Minimum Loss: {min_loss:.4f}')

cuda
Epoch [1/14], Loss: 8.6446
Epoch [2/14], Loss: 5.7826
Epoch [3/14], Loss: 4.2756
Epoch [4/14], Loss: 3.6182
Epoch [5/14], Loss: 3.3103
Epoch [6/14], Loss: 2.9136
Epoch [7/14], Loss: 2.6531
Epoch [8/14], Loss: 2.6265
Epoch [9/14], Loss: 3.1065
Epoch [10/14], Loss: 2.7106
Epoch [11/14], Loss: 3.3941
Epoch [12/14], Loss: 3.4774
Epoch [13/14], Loss: 3.7048
Epoch [14/14], Loss: 3.1528
Minimum Loss: 2.0218


In [36]:
# 모델을 평가 모드로 전환
model.eval()

# 평가 데이터를 로드 (학습 데이터와 동일한 데이터로 테스트하는 경우 예제)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

# 평가 지표 초기화
correct = 0
total = 0

def create_padding_mask(sequences, pad_token=0):
    return (sequences == pad_token)

# 평가 모드에서 그라디언트 계산 비활성화
with torch.no_grad():
    for inputs, labels in dataloader:
        # 모델 출력 계산
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 패딩 마스크 생성
        src_key_padding_mask = create_padding_mask(inputs[:,:,0])
        src_key_padding_mask = src_key_padding_mask.to(device)
        
        outputs = model(inputs, src_key_padding_mask=src_key_padding_mask)
        # 모델 출력 계산
        print(outputs)
        """ 
        # 소프트맥스 함수로 확률로 변환
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        """
        
        # 가장 높은 확률을 가진 클래스의 인덱스 구하기
        _, predicted_class = torch.max(outputs, 1)
        
        # 정확도 계산
        total += labels.size(0)
        correct += (predicted_class == labels).sum().item()
        
        # 예측된 클래스 인덱스를 원래 라벨로 변환
        predicted_label = word_list[predicted_class.item()]
        
        # 출력
        print(f"Actual Label (encoded): {labels.item()}")
        print(f"Predicted Label (encoded): {predicted_class.item()}")
        print(f"Predicted Label: {predicted_label}")
        print('-' * 30)

# 전체 정확도 출력
accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')
        

tensor([[-12.3956, -12.4071, -12.8809,  ..., -12.7229, -12.9759, -12.5692]],
       device='cuda:0')
Actual Label (encoded): 2632
Predicted Label (encoded): 1012
Predicted Label: 강북구주민지원센터
------------------------------
tensor([[-12.3956, -12.4071, -12.8809,  ..., -12.7229, -12.9759, -12.5692]],
       device='cuda:0')
Actual Label (encoded): 786
Predicted Label (encoded): 1012
Predicted Label: 강북구주민지원센터
------------------------------
tensor([[-12.3956, -12.4071, -12.8809,  ..., -12.7229, -12.9759, -12.5692]],
       device='cuda:0')
Actual Label (encoded): 1994
Predicted Label (encoded): 1012
Predicted Label: 강북구주민지원센터
------------------------------
tensor([[-12.3956, -12.4071, -12.8809,  ..., -12.7229, -12.9759, -12.5692]],
       device='cuda:0')
Actual Label (encoded): 2496
Predicted Label (encoded): 1012
Predicted Label: 강북구주민지원센터
------------------------------
tensor([[-12.3956, -12.4071, -12.8809,  ..., -12.7229, -12.9759, -12.5692]],
       device='cuda:0')
Actual Label (encode

KeyboardInterrupt: 