In [2]:
## 모듈 로드
import pandas as pd
from konlpy.tag import Okt
from collections import Counter
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import optim
from torch import nn

import os
import json
import re
import string

In [3]:
class reviewClassifierModel(nn.Module):
    def __init__(self, n_vocab, hidden_dim, embedding_dim, n_classes,
                 n_layers, dropout=0.5, bidirectional=True) -> None:
        super().__init__()
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,         # num_embeddings = vocab이 들어감
            embedding_dim=embedding_dim,
            padding_idx=0
        )
        self.model = nn.LSTM(
            input_size = embedding_dim,         # Input의 사이즈에 해당하는 수
            hidden_size=hidden_dim,             # 은닉층의 사이즈에 해당하는 수
            num_layers=n_layers,                # RNN의 은닉층 레이어 개수, default = 1
            bidirectional=bidirectional,        # bidrectional True일시 양방향 RNN, default = False
            dropout=dropout,                    # dropout 비율설정 기본값 0
            batch_first=True,                   # True일 경우 Output 사이즈는 (batch, seq, feature) 기본값 False
        )
        if bidirectional:
            self.classifier1 = nn.Linear(hidden_dim*2,n_classes)
            self.classifier2 = nn.Linear(hidden_dim*2,1)
        else:
            self.classifier1 = nn.Linear(hidden_dim,n_classes)
            self.classifier2 = nn.Linear(hidden_dim,1)
        self.dropout = nn.Dropout(dropout)

    def forward(self,inputs):
        embeddings = self.embedding(inputs)
        output, _ = self.model(embeddings)
        last_output = output[:, -1, :]
        last_output = self.dropout(last_output)
        classesd = self.classifier1(last_output)
        logits = self.classifier2(last_output)

        # LogSoftmax 적용
        classesd = nn.LogSoftmax(dim=1)(classesd)  # 다중 클래스 출력에 LogSoftmax 적용

        return classesd, logits

In [6]:
TRAIN_PATH = './DATA/Training/'
# 여러 폴더 경로를 리스트로 저장
folder_paths = os.listdir(TRAIN_PATH)

# 빈 데이터프레임 리스트 생성
dataframes = []

# 각 폴더 내의 JSON 파일을 읽어와 데이터프레임으로 변환
for folder_path in folder_paths:
    FOLDER_PATH = TRAIN_PATH+folder_path
    print(f"Processing folder: {folder_path}")
    
    # 폴더 내의 모든 JSON 파일 리스트
    json_files = [file for file in os.listdir(FOLDER_PATH) if file.endswith('.json')]

    for file in json_files:
        file_path = os.path.join(FOLDER_PATH, file)
        print(f"Loading file: {file_path}")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
                # 파일 내용 확인 및 데이터프레임으로 변환
                if data:
                    # Aspects만 추출
                    for review in data:
                        aspects = pd.json_normalize(review.get('Aspects'))
                        dataframes.append(aspects)
                else:
                    print(f"No data found in {file}")
                    
        except json.JSONDecodeError:
            print(f"Error loading {file}: Invalid JSON")

# 데이터프레임 결합
if dataframes:
    final_dataframe = pd.concat(dataframes, ignore_index=True)
    print(final_dataframe)
else:
    print("No valid dataframes to concatenate.")
final_dataframe.to_csv('./DATA/train.csv')

Processing folder: TL_쇼핑몰_01.패션_1-1.여성의류
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(1).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(10).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(100).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(101).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(102).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(103).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(104).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(105).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(106).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(107).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(108).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(109).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(11).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.

In [7]:
Val_PATH = './DATA/Validation/'
# 여러 폴더 경로를 리스트로 저장
folder_paths = os.listdir(Val_PATH)

# 빈 데이터프레임 리스트 생성
dataframes = []

# 각 폴더 내의 JSON 파일을 읽어와 데이터프레임으로 변환
for folder_path in folder_paths:
    FOLDER_PATH = Val_PATH+folder_path
    print(f"Processing folder: {folder_path}")
    
    # 폴더 내의 모든 JSON 파일 리스트
    json_files = [file for file in os.listdir(FOLDER_PATH) if file.endswith('.json')]

    for file in json_files:
        file_path = os.path.join(FOLDER_PATH, file)
        print(f"Loading file: {file_path}")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
                # 파일 내용 확인 및 데이터프레임으로 변환
                if data:
                    # Aspects만 추출
                    for review in data:
                        aspects = pd.json_normalize(review.get('Aspects'))
                        dataframes.append(aspects)
                else:
                    print(f"No data found in {file}")
                    
        except json.JSONDecodeError:
            print(f"Error loading {file}: Invalid JSON")

# 데이터프레임 결합
if dataframes:
    final_dataframe = pd.concat(dataframes, ignore_index=True)
    print(final_dataframe)
else:
    print("No valid dataframes to concatenate.")
final_dataframe.to_csv('./DATA/test.csv')

Processing folder: VL_쇼핑몰_01.패션_1-1.여성의류
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(156).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(157).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(158).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(159).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(160).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(161).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(162).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(163).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(164).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(165).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(166).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(167).json
Loading file: ./DATA/Validation/VL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(168).json
Loading fi

In [12]:
trainDF=pd.read_csv(r'C:\Users\MSI\Desktop\TORCH_NLP\1010\DATA\train.csv', usecols=[1,2,4])
testDF=pd.read_csv(r'C:\Users\MSI\Desktop\TORCH_NLP\1010\DATA\test.csv',usecols=[1,2,4])

In [13]:
# 데이터 인코딩 함수
def data_encoding(DF):
    labelCD = DF.Aspect.unique().tolist()                   # Aspect 컬럼의 유니크 값 리스트 
    DF['Aspect'] = DF['Aspect'].map(lambda x: labelCD.index(x))         # 다중 분류 라벨링 인코딩
    DF.loc[DF['SentimentPolarity'] == -1, 'SentimentPolarity'] = 0      # 2진 분류 인코딩
    return DF, labelCD

In [14]:
# 단어사전 만드는 함수
def build_vocab(corpus, n_vocab, special_tokens):
    counter = Counter()                                     # Counter 인스턴스 생성
    for tokens in corpus:                                   # 입력받은 corpus로 카운터 모델 초기화
        counter.update(tokens)
    vocab = special_tokens.copy()                           
    for token, count in counter.most_common(n_vocab):       # 상위 중복 언어 단어사전에 추가
        vocab.append(token)
    return vocab


In [15]:
# 패딩함수
def pad_sequences(sequences, max_length, pad_value):
    result = list()
    for sequence in sequences:                              
        sequence = sequence[:max_length]                    # max_length 만큼 자르기
        pad_length = max_length - len(sequence)             # max_length보다 단어가 적다면
        padded_sequence = sequence + [pad_value] * pad_length   # 정해진 수 채우기
        result.append(padded_sequence)
    return np.asarray(result)


In [16]:
# 자연어 인코딩 함수
def encoding_ids(token_to_id, tokens, unk_id):
    return [
        [token_to_id.get(token, unk_id) for token in review] for review in tokens
    ]   # 자연어 인코딩


In [17]:
# 학습함수
def model_train(model, datasets, cl_criterion, bn_criterion, optimizer, device, interval):
    model.train()
    losses = []

    for step, (input_ids, labels) in enumerate(datasets):
        input_ids = input_ids.to(device)                    # 인풋데이터
        cl_labels = labels[:, 0].to(device)                 # 라벨 다중분류
        bn_labels = labels[:, 1].to(device).float()         # 라벨 2진분류  (float형)

        # Forward pass
        classesd, logits = model(input_ids)

        # Calculate losses
        loss_cl = cl_criterion(classesd, cl_labels)         # 
        loss_bn = bn_criterion(logits.squeeze(), bn_labels) # 
        loss = loss_cl + loss_bn
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % interval == 0:
            print(f'Train Loss {step} : {np.mean(losses)}')


In [18]:
# test 함수

def model_test(model, datasets, cl_criterion, bn_criterion, device, epoch, results_df):
    model.eval()
    losses = []
    cl_score = []
    bn_score = []

    with torch.no_grad(): 
        for step, (input_ids, labels) in enumerate(datasets):
            input_ids = input_ids.to(device)
            cl_labels = labels[:, 0].to(device).long()
            bn_labels = labels[:, 1].to(device).float() 

            # Forward pass
            classesd, logits = model(input_ids)

            # Calculate losses
            loss_cl = cl_criterion(classesd, cl_labels)
            loss_bn = bn_criterion(logits.squeeze(), bn_labels)
            loss = loss_cl + loss_bn
            losses.append(loss.item())
            
            # Calculate class accuracy
            cl_predictions = torch.argmax(torch.softmax(classesd, dim=1), dim=1)  # 다중 클래스 예측
            cl_score.extend(cl_predictions.eq(cl_labels).cpu().numpy())  # 정확도 계산
            
            # Calculate binary accuracy
            bn_predictions = (torch.sigmoid(logits) > 0.5).int().squeeze()  # 이진 예측
            bn_score.extend(bn_predictions.eq(bn_labels.int()).cpu().numpy())  # 정확도 계산
        
        # 정확도 계산
        cl_accuracy = np.mean(cl_score)
        bn_accuracy = np.mean(bn_score)
        
        print(f'Epoch {epoch} - Val Loss: {np.mean(losses)}, bn_score Val Accuracy: {bn_accuracy}, cl_score Val Accuracy: {cl_accuracy}')


In [83]:
# 실행함수
def main():
    N_VOCAB = 5000
    MAX_LENGTH = 10
    EPOCHS =100
    INTERVAL = 500
    BATCH_SIZE = 32
    LR = 0.001
    special_tokens = ['<pad>', '<unk>']

    trainDF, testDF = load_data('./DATA/Train_Fashion_reivew.csv','./DATA/Val_Fashion_reivew.csv')

    trainDF, aspectCD = data_encoding(trainDF)
    testDF, _ = data_encoding(testDF)

    ## 토큰화 및 불용어 처리 ------------------------------------------------------------------------------------------------------------
    punc=string.punctuation

    for p in punc:
        trainDF['SentimentText'] = trainDF['SentimentText'].str.replace(p, '')
        testDF['SentimentText']=testDF['SentimentText'].str.replace(p,'')

    m=re.compile('[^ ㄱ-ㅣ가-힣]+')

    trainDF['SentimentText']=trainDF['SentimentText'].apply(lambda x: m.sub(' ', x))
    testDF['SentimentText']=testDF['SentimentText'].apply(lambda x: m.sub(' ', x))

    stop_word='./stopwords.txt'

    with open(stop_word, 'r', encoding='utf-8') as f:
        stop_words = [line.strip() for line in f]


    tokenizer = Okt()
    train_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in trainDF['SentimentText']]
    test_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in testDF['SentimentText']]
    # -------------------------------------------------------------------------------------------------------------------------------


    vocab = build_vocab(train_tokens, N_VOCAB, special_tokens)
    token_to_id = {token: idx for idx, token in enumerate(vocab)}
    id_to_token = {idx: token for idx, token in enumerate(vocab)}

    pad_id = token_to_id['<pad>']
    unk_id = token_to_id['<unk>']
    train_ids = encoding_ids(token_to_id, train_tokens, unk_id)
    test_ids = encoding_ids(token_to_id, test_tokens, unk_id)
    train_ids = pad_sequences(train_ids, MAX_LENGTH, pad_id)
    test_ids = pad_sequences(test_ids, MAX_LENGTH, pad_id)

    # 텐서화
    train_ids = torch.tensor(train_ids, dtype=torch.long)
    test_ids = torch.tensor(test_ids, dtype=torch.long)

    # 레이블 텐서화
    train_labels = torch.tensor(list(zip(trainDF['Aspect'].values, trainDF['SentimentPolarity'].values)), dtype=torch.long)
    test_labels = torch.tensor(list(zip(testDF['Aspect'].values, testDF['SentimentPolarity'].values)), dtype=torch.float32)

    # 데이터셋 생성
    train_dataset = TensorDataset(train_ids, train_labels)
    test_dataset = TensorDataset(test_ids, test_labels)

    # 데이터 로더 생성
    train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False)

    # 모델 초기화
    n_vocab = len(token_to_id)  # 어휘 크기 계산
    hidden_dim = 64 
    embedding_dim = 128
    n_layers = 2
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    classifier = reviewClassifierModel(
        n_vocab=n_vocab, hidden_dim=hidden_dim, embedding_dim=embedding_dim, n_classes=len(aspectCD), n_layers=n_layers
    ).to(device)

    # 손실 함수 및 최적화기 설정
    cl_criterion = nn.NLLLoss().to(device)
    bn_criterion = nn.BCEWithLogitsLoss().to(device)
    optimizer = optim.RMSprop(classifier.parameters(), lr=LR)


    # 결과를 저장할 DataFrame 생성
    results_df = pd.DataFrame(columns=['Val Loss', 'bn_score Val Accuracy', 'cl_score Val Accuracy'])

    for epoch in range(EPOCHS):
        model_train(classifier, train_loader, cl_criterion, bn_criterion, optimizer, device, INTERVAL)
        model_test(classifier, test_loader, cl_criterion, bn_criterion, device, epoch, results_df)  # DataFrame 전달

        # 모델 저장 (에포크 번호 추가)
        model_save_path = f'./saved_model/review_classifier_epoch_{epoch + 1}.pt'  # 에포크 번호 포함
        torch.save(classifier.state_dict(), model_save_path)
        print(f'Model saved at {model_save_path}')

    # 결과 DataFrame 저장
    results_df.to_csv('./saved_model/evaluation_results.csv', index=True)
    print("평가 결과가 저장되었습니다.")


In [60]:
TRAIN_PATH = './DATA/Training/'
# 여러 폴더 경로를 리스트로 저장
folder_paths = os.listdir(TRAIN_PATH)

# 빈 데이터프레임 리스트 생성
dataframes = []

# 각 폴더 내의 JSON 파일을 읽어와 데이터프레임으로 변환
for folder_path in folder_paths:
    FOLDER_PATH = TRAIN_PATH+folder_path
    print(f"Processing folder: {folder_path}")
    
    # 폴더 내의 모든 JSON 파일 리스트
    json_files = [file for file in os.listdir(FOLDER_PATH) if file.endswith('.json')]

    for file in json_files:
        file_path = os.path.join(FOLDER_PATH, file)
        print(f"Loading file: {file_path}")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
                # 파일 내용 확인 및 데이터프레임으로 변환
                if data:
                    # Aspects만 추출
                    for review in data:
                        aspects = pd.json_normalize(review.get('Aspects'))
                        dataframes.append(aspects)
                else:
                    print(f"No data found in {file}")
                    
        except json.JSONDecodeError:
            print(f"Error loading {file}: Invalid JSON")

# 데이터프레임 결합
if dataframes:
    final_dataframe = pd.concat(dataframes, ignore_index=True)
    print(final_dataframe)
else:
    print("No valid dataframes to concatenate.")
final_dataframe.to_csv('./DATA/Train_Fashion_reivew.csv')
Val_PATH = './DATA/Validation/'
# 여러 폴더 경로를 리스트로 저장
folder_paths = os.listdir(Val_PATH)

# 빈 데이터프레임 리스트 생성
dataframes = []

# 각 폴더 내의 JSON 파일을 읽어와 데이터프레임으로 변환
for folder_path in folder_paths:
    FOLDER_PATH = Val_PATH+folder_path
    print(f"Processing folder: {folder_path}")
    
    # 폴더 내의 모든 JSON 파일 리스트
    json_files = [file for file in os.listdir(FOLDER_PATH) if file.endswith('.json')]

    for file in json_files:
        file_path = os.path.join(FOLDER_PATH, file)
        print(f"Loading file: {file_path}")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
                # 파일 내용 확인 및 데이터프레임으로 변환
                if data:
                    # Aspects만 추출
                    for review in data:
                        aspects = pd.json_normalize(review.get('Aspects'))
                        dataframes.append(aspects)
                else:
                    print(f"No data found in {file}")
                    
        except json.JSONDecodeError:
            print(f"Error loading {file}: Invalid JSON")

# 데이터프레임 결합
if dataframes:
    final_dataframe = pd.concat(dataframes, ignore_index=True)
    print(final_dataframe)
else:
    print("No valid dataframes to concatenate.")
final_dataframe.to_csv('./DATA/Val_Fashion_reivew.csv')

Processing folder: TL_쇼핑몰_01.패션_1-1.여성의류
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(1).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(10).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(100).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(101).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(102).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(103).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(104).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(105).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(106).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(107).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(108).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(109).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.패션_1-1.여성의류\1-1.여성의류(11).json
Loading file: ./DATA/Training/TL_쇼핑몰_01.

In [84]:
main()

Train Loss 0 : 3.9773545265197754
Train Loss 500 : 2.8104467913062274
Train Loss 1000 : 2.44828752335254
Train Loss 1500 : 2.2350388168891535
Train Loss 2000 : 2.078799213009557
Train Loss 2500 : 1.9664365617049688
Train Loss 3000 : 1.877734921944614
Train Loss 3500 : 1.8073209827030021
Epoch 0 - Val Loss: 6.788484554503559, bn_score Val Accuracy: 0.8706218274111676, cl_score Val Accuracy: 0.0450507614213198
Model saved at ./saved_model/review_classifier_epoch_1.pt
Train Loss 0 : 1.1909698247909546
Train Loss 500 : 1.238012807216949
Train Loss 1000 : 1.221825931098435
Train Loss 1500 : 1.2038205783816673
Train Loss 2000 : 1.194732441254582
Train Loss 2500 : 1.1818323938882813
Train Loss 3000 : 1.1723040420089075
Train Loss 3500 : 1.1620991700327556
Epoch 1 - Val Loss: 7.243276842709245, bn_score Val Accuracy: 0.8893401015228426, cl_score Val Accuracy: 0.058248730964467005
Model saved at ./saved_model/review_classifier_epoch_2.pt
Train Loss 0 : 1.1248247623443604
Train Loss 500 : 1.0044

KeyboardInterrupt: 

In [83]:
def predict(model, text, tokenizer, num_classes):
    model.eval()
    
    # 1. 입력 텍스트를 토큰화 (Okt 사용)
    tokens = tokenizer.morphs(text)
    print(tokens)
    
    vocab = build_vocab(tokens, 5000, ['<pad>', '<unk>'])
    token_to_id = {token: idx for idx, token in enumerate(vocab)}
    id_to_token = {idx: token for idx, token in enumerate(vocab)}

    # 2. 토큰을 ID로 변환 (예: token_to_id)
    input_ids = [token_to_id.get(token, token_to_id["<unk>"]) for token in tokens]
    
    # 3. 입력 길이에 맞게 패딩
    max_length = 32  # 원하는 최대 길이
    pad_id = token_to_id["<pad>"]
    input_ids = input_ids[:max_length] + [pad_id] * (max_length - len(input_ids))
    
    # 4. 입력 텐서 생성
    input_tensor = torch.tensor([input_ids], dtype=torch.float32)  # 배치 차원 추가 및 float로 변환
    
    # 5. 모델에 입력하여 예측
    with torch.no_grad():
        logits = model(input_tensor)
    
    # 6. 소프트맥스 함수로 확률 계산
    probs = torch.softmax(logits, dim=1)
    print(probs)
    
    # 7. 최대 확률을 가진 클래스를 예측
    prediction = torch.argmax(probs, dim=1)

    # 8. 예측 결과 출력
    print(f"Predicted label: {prediction.item()}")

# 입력 예시
num_classes = 9  # 예시: 분류할 클래스 수
predict(reviewClassifierModel, "텍스트", tokenizer, num_classes)


['윈하', '는', '색', '은', '없지만']
tensor([[0.0043, 0.0855, 0.2323, 0.6315, 0.0043, 0.0016, 0.0016, 0.0016, 0.0016,
         0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016,
         0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016,
         0.0016, 0.0016, 0.0016, 0.0016, 0.0016]])
Predicted label: 3
