In [131]:
import os
import json
import pandas as pd
import string
import re
from collections import Counter
from konlpy.tag import Okt
import numpy as np
import torch
from torch import optim
from torch import nn
from torch.utils.data import TensorDataset,DataLoader

---
json 파일 불러오기

In [132]:
TRAIN_PATH='./DATA/Training/'
folder_paths=os.listdir(TRAIN_PATH)

In [133]:
fashion_df=[]

for folder in folder_paths:
    #print(folder)
    folder_path=TRAIN_PATH+folder
    #print(folder_path)
    json_file=[file for file in os.listdir(folder_path) if file.endswith('.json')]
    #print(json_file)
    for file in json_file:
        file_path=os.path.join(folder_path,file)
        #print(file_path)

        try:
            with open(file_path,'r',encoding='utf-8') as f:
                data=json.load(f)

                if data:
                    for review in data:
                        aspects=pd.json_normalize(review.get('Aspects'))
                        fashion_df.append(aspects)
                else:
                    print(f'No data found in {file}')

        except json.JSONDecodeError:
            print(f"Error loading {file}: Invalid JSON")

In [134]:
if fashion_df:
    train_fashionDF=pd.concat(fashion_df,ignore_index=True)
    print(train_fashionDF)
else:
    print('No Valid Dataframe')

         Aspect                       SentimentText SentimentWord  \
0            가격                             가격이 착하고             2   
1           디자인                           디자인이 예쁩니다             2   
2            가격                                  싸고             1   
3           디자인                            디자인이 예뻐요             2   
4            가격                         가성비 가심비 입니다             3   
...         ...                                 ...           ...   
120211       품질                  바퀴도 크고 부드럽게 잘 움직여서             5   
120212  사용성/편의성                         편하게 사용하겠어요.             2   
120213      디자인  캐리어 찾을때도 비슷비슷한 모양들 많은중에 눈에 띌것같네요^^             7   
120214       색상                            색상도 좋으네요             2   
120215  사용성/편의성                4종류 용도에 맞게 사용하기 편리해요             5   

       SentimentPolarity  
0                      1  
1                      1  
2                      1  
3                      1  
4                      1  
...      

In [135]:
val_path='./DATA/Validation/'
folder_paths=os.listdir(val_path)

In [136]:
val_df=[]

for folder in folder_paths:
    path=val_path+folder
    #print(path)

    json_file=[file for file in os.listdir(path) if file.endswith('.json')]

    for file in json_file:
        file_paths=os.path.join(path,file)
        #print(file_paths)

        try:
            with open(file_paths,'r',encoding='utf-8') as f:
                val_data=json.load(f)

                if val_data:
                    for review in val_data:
                        aspects=pd.json_normalize(review.get('Aspects'))
                        val_df.append(aspects)
                else: print(f'no data found in {file}')

        except json.JSONDecodeError: print(f'error loading {file}')

In [137]:
if val_df:
    val_fashionDF=pd.concat(val_df,ignore_index=True)
    print(val_fashionDF)
else: print('no valid dataframe')

      Aspect         SentimentText SentimentWord SentimentPolarity
0         기능          배도 편하게 눌러주고              3                 1
1         길이         미디가 길어 편하다고..             3                 1
2         색상     다섯가지 색상 모두 이쁘네요^^             4                 1
3         기능             입자마자 시원하고             2                 1
4         기능          배 부분도 잘 잡아주고             4                 1
...      ...                   ...           ...               ...
15755     품질                품질 좋아요             2                 1
15756     품질        우선 박음질이 허접합니다.             3                -1
15757     색상              색상은 이뻐요              2                 1
15758     품질  검정백은 지퍼가 뻑뻑해서 잘 안닫혀요             5                -1
15759     가격    50프로 적립 아니였음 반품이였음             4                 1

[15760 rows x 4 columns]


---
결측치 확인

In [138]:
train_fashionDF.isna().sum()

Aspect               0
SentimentText        0
SentimentWord        0
SentimentPolarity    0
dtype: int64

In [139]:
val_fashionDF.isna().sum()

Aspect               0
SentimentText        0
SentimentWord        0
SentimentPolarity    0
dtype: int64

---
'aspect' 글자 -> 숫자

In [140]:
train_fashionDF['Aspect'].value_counts()

Aspect
디자인            13290
가격             11029
사이즈            10633
품질              9162
착화감             8561
소재              8374
기능              8331
색상              7165
착용감             6130
치수/사이즈          4791
무게              4649
핏               4145
길이              3299
두께              3095
신축성             2917
활용성             2896
촉감              2075
기능성             1525
사이즈/폭/길이/두께     1428
제품구성            1378
마감              1306
사용성             1150
내구성              911
사용성/편의성          835
굽                542
냄새               448
수납               151
Name: count, dtype: int64

In [141]:
train_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,가격,가격이 착하고,2,1
1,디자인,디자인이 예쁩니다,2,1
2,가격,싸고,1,1
3,디자인,디자인이 예뻐요,2,1
4,가격,가성비 가심비 입니다,3,1
...,...,...,...,...
120211,품질,바퀴도 크고 부드럽게 잘 움직여서,5,1
120212,사용성/편의성,편하게 사용하겠어요.,2,1
120213,디자인,캐리어 찾을때도 비슷비슷한 모양들 많은중에 눈에 띌것같네요^^,7,1
120214,색상,색상도 좋으네요,2,1


In [142]:
train_labelCD = train_fashionDF.Aspect.unique().tolist()                   # Aspect 컬럼의 유니크 값 리스트 
train_fashionDF['Aspect'] = train_fashionDF['Aspect'].map(lambda x: train_labelCD.index(x))         # 다중 분류 라벨링 인코딩
train_fashionDF['SentimentPolarity']=train_fashionDF['SentimentPolarity'].replace('-1','0')
train_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,가격이 착하고,2,1
1,1,디자인이 예쁩니다,2,1
2,0,싸고,1,1
3,1,디자인이 예뻐요,2,1
4,0,가성비 가심비 입니다,3,1
...,...,...,...,...
120211,9,바퀴도 크고 부드럽게 잘 움직여서,5,1
120212,25,편하게 사용하겠어요.,2,1
120213,1,캐리어 찾을때도 비슷비슷한 모양들 많은중에 눈에 띌것같네요^^,7,1
120214,6,색상도 좋으네요,2,1


In [143]:
train_labelCD

['가격',
 '디자인',
 '착용감',
 '기능',
 '소재',
 '활용성',
 '색상',
 '사이즈',
 '길이',
 '품질',
 '두께',
 '촉감',
 '제품구성',
 '핏',
 '무게',
 '신축성',
 '마감',
 '냄새',
 '기능성',
 '착화감',
 '치수/사이즈',
 '사용성',
 '굽',
 '내구성',
 '사이즈/폭/길이/두께',
 '사용성/편의성',
 '수납']

In [144]:
train_fashionDF['SentimentPolarity'].value_counts()

SentimentPolarity
1    80800
0    39416
Name: count, dtype: int64

In [145]:
val_fashionDF['Aspect'].value_counts()

Aspect
사이즈            1828
디자인            1421
가격             1282
품질             1213
착용감            1211
기능             1197
소재              966
착화감             867
핏               845
색상              840
신축성             728
치수/사이즈          525
길이              494
무게              484
두께              418
활용성             223
제품구성            200
촉감              186
사용성/편의성         160
사용성             159
기능성             139
사이즈/폭/길이/두께     125
마감               93
내구성              61
수납               40
굽                28
냄새               27
Name: count, dtype: int64

In [146]:
val_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,기능,배도 편하게 눌러주고,3,1
1,길이,미디가 길어 편하다고..,3,1
2,색상,다섯가지 색상 모두 이쁘네요^^,4,1
3,기능,입자마자 시원하고,2,1
4,기능,배 부분도 잘 잡아주고,4,1
...,...,...,...,...
15755,품질,품질 좋아요,2,1
15756,품질,우선 박음질이 허접합니다.,3,-1
15757,색상,색상은 이뻐요,2,1
15758,품질,검정백은 지퍼가 뻑뻑해서 잘 안닫혀요,5,-1


In [147]:
val_labelCD = val_fashionDF.Aspect.unique().tolist()                   # Aspect 컬럼의 유니크 값 리스트 
val_fashionDF['Aspect'] = val_fashionDF['Aspect'].map(lambda x: val_labelCD.index(x))         # 다중 분류 라벨링 인코딩
val_fashionDF['SentimentPolarity']=val_fashionDF['SentimentPolarity'].replace('-1','0')
val_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,배도 편하게 눌러주고,3,1
1,1,미디가 길어 편하다고..,3,1
2,2,다섯가지 색상 모두 이쁘네요^^,4,1
3,0,입자마자 시원하고,2,1
4,0,배 부분도 잘 잡아주고,4,1
...,...,...,...,...
15755,7,품질 좋아요,2,1
15756,7,우선 박음질이 허접합니다.,3,0
15757,2,색상은 이뻐요,2,1
15758,7,검정백은 지퍼가 뻑뻑해서 잘 안닫혀요,5,0


In [148]:
val_labelCD

['기능',
 '길이',
 '색상',
 '착용감',
 '사이즈',
 '두께',
 '신축성',
 '품질',
 '핏',
 '가격',
 '활용성',
 '무게',
 '소재',
 '디자인',
 '제품구성',
 '마감',
 '촉감',
 '냄새',
 '치수/사이즈',
 '착화감',
 '사용성',
 '기능성',
 '내구성',
 '굽',
 '사이즈/폭/길이/두께',
 '사용성/편의성',
 '수납']

In [149]:
val_fashionDF['SentimentPolarity'].value_counts()

SentimentPolarity
1    10146
0     5614
Name: count, dtype: int64

---
구두점 지우고 한글만 남기기

In [150]:
punc=string.punctuation

for p in punc:
    train_fashionDF['SentimentText'] = train_fashionDF['SentimentText'].str.replace(p, '')
    val_fashionDF['SentimentText']=val_fashionDF['SentimentText'].str.replace(p,'')

train_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,가격이 착하고,2,1
1,1,디자인이 예쁩니다,2,1
2,0,싸고,1,1
3,1,디자인이 예뻐요,2,1
4,0,가성비 가심비 입니다,3,1
...,...,...,...,...
120211,9,바퀴도 크고 부드럽게 잘 움직여서,5,1
120212,25,편하게 사용하겠어요,2,1
120213,1,캐리어 찾을때도 비슷비슷한 모양들 많은중에 눈에 띌것같네요,7,1
120214,6,색상도 좋으네요,2,1


In [151]:
val_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,배도 편하게 눌러주고,3,1
1,1,미디가 길어 편하다고,3,1
2,2,다섯가지 색상 모두 이쁘네요,4,1
3,0,입자마자 시원하고,2,1
4,0,배 부분도 잘 잡아주고,4,1
...,...,...,...,...
15755,7,품질 좋아요,2,1
15756,7,우선 박음질이 허접합니다,3,0
15757,2,색상은 이뻐요,2,1
15758,7,검정백은 지퍼가 뻑뻑해서 잘 안닫혀요,5,0


In [152]:
m=re.compile('[^ ㄱ-ㅣ가-힣]+')

train_fashionDF['SentimentText']=train_fashionDF['SentimentText'].apply(lambda x: m.sub(' ', x))
train_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,가격이 착하고,2,1
1,1,디자인이 예쁩니다,2,1
2,0,싸고,1,1
3,1,디자인이 예뻐요,2,1
4,0,가성비 가심비 입니다,3,1
...,...,...,...,...
120211,9,바퀴도 크고 부드럽게 잘 움직여서,5,1
120212,25,편하게 사용하겠어요,2,1
120213,1,캐리어 찾을때도 비슷비슷한 모양들 많은중에 눈에 띌것같네요,7,1
120214,6,색상도 좋으네요,2,1


In [153]:
val_fashionDF['SentimentText']=val_fashionDF['SentimentText'].apply(lambda x: m.sub(' ', x))
val_fashionDF

Unnamed: 0,Aspect,SentimentText,SentimentWord,SentimentPolarity
0,0,배도 편하게 눌러주고,3,1
1,1,미디가 길어 편하다고,3,1
2,2,다섯가지 색상 모두 이쁘네요,4,1
3,0,입자마자 시원하고,2,1
4,0,배 부분도 잘 잡아주고,4,1
...,...,...,...,...
15755,7,품질 좋아요,2,1
15756,7,우선 박음질이 허접합니다,3,0
15757,2,색상은 이뻐요,2,1
15758,7,검정백은 지퍼가 뻑뻑해서 잘 안닫혀요,5,0


---
단어사전 만들고 불용어 처리

In [154]:
def build_vocab(corpus, n_vocab, special_tokens):
    counter = Counter()
    for tokens in corpus:
        counter.update(tokens)
    vocab = special_tokens

    for token, count in counter.most_common(n_vocab):
        vocab.append(token)

    return vocab

In [155]:
stop_word='./stopwords.txt'

with open(stop_word, 'r', encoding='utf-8') as f:
    stop_words = [line.strip() for line in f]

In [156]:
tokenizer = Okt()

train_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in train_fashionDF['SentimentText']]
val_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in val_fashionDF['SentimentText']]


vocab = build_vocab(corpus=train_tokens, n_vocab=20000, special_tokens=['<pad>','<unk>'])
token_to_id = {token:idx for idx, token in enumerate(vocab)}
id_to_token = {idx:token for idx, token in enumerate(vocab)}

print(vocab[:10])
print(len(vocab))

['<pad>', '<unk>', '가격', '디자인', '사이즈', '좋아요', '잘', '색상', '같아요', '맘']
20002


---
패딩

In [173]:
len(max(train_tokens, key=len))

18

In [158]:
def pad_sequences(sequences, max_length, pad_value):
    result = list()
    for sequence in sequences:
        sequence = sequence[:max_length]
        pad_length = max_length - len(sequence)
        padded_sequence = sequence + [pad_value] * pad_length
        result.append(padded_sequence)
    return np.asarray(result)

In [159]:
unk_id = token_to_id["<unk>"]
train_ids = [[token_to_id.get(token, unk_id) for token in review] for review in train_tokens]
val_ids = [[token_to_id.get(token, unk_id) for token in review] for review in val_tokens]

max_length = 18
pad_id = token_to_id['<pad>']
train_ids = pad_sequences(train_ids, max_length, pad_id)
val_ids = pad_sequences(val_ids, max_length, pad_id)

print(train_ids[0])

[  2 344   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]


In [160]:
print(val_ids[0])

[2129   37    1    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0]


In [161]:
train_tokens[0]

['가격', '착하고']

In [70]:
val_tokens[0]

['배도', '편하게', '눌러주고']

In [162]:
train_fashionDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120216 entries, 0 to 120215
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   Aspect             120216 non-null  int64 
 1   SentimentText      120216 non-null  object
 2   SentimentWord      120216 non-null  object
 3   SentimentPolarity  120216 non-null  object
dtypes: int64(1), object(3)
memory usage: 3.7+ MB


In [163]:
train_fashionDF['SentimentPolarity']=train_fashionDF['SentimentPolarity'].astype(int)
train_fashionDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120216 entries, 0 to 120215
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   Aspect             120216 non-null  int64 
 1   SentimentText      120216 non-null  object
 2   SentimentWord      120216 non-null  object
 3   SentimentPolarity  120216 non-null  int32 
dtypes: int32(1), int64(1), object(2)
memory usage: 3.2+ MB


In [164]:
val_fashionDF['SentimentPolarity']=val_fashionDF['SentimentPolarity'].astype(int)
val_fashionDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15760 entries, 0 to 15759
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Aspect             15760 non-null  int64 
 1   SentimentText      15760 non-null  object
 2   SentimentWord      15760 non-null  object
 3   SentimentPolarity  15760 non-null  int32 
dtypes: int32(1), int64(1), object(2)
memory usage: 431.1+ KB


---
데이터셋, 데이터로더

In [74]:
train_ids=torch.tensor(train_ids)
val_ids=torch.tensor(val_ids)

train_labels=torch.tensor(train_fashionDF['SentimentPolarity'],dtype=torch.float32)
val_labels=torch.tensor(val_fashionDF['SentimentPolarity'],dtype=torch.float32)

train_ds=TensorDataset(train_ids,train_labels)
val_ds=TensorDataset(val_ids,val_labels)

train_dl=DataLoader(train_ds,batch_size=16,shuffle=True)
val_dl=DataLoader(val_ds,batch_size=16,shuffle=False)

---
학습

In [165]:
def train_model(model, datasets, multi_criterion, binary_criterion, optimizer, device, interval):
    model. train()
    total_loss=[]

    for step, (input_ids, labels) in enumerate(datasets):
        input_ids=input_ids.to(device)
        multi_labels=labels[:,0].to(device)
        binary_labels=labels[:,1].to(device).float()

        classesd, logits=model(input_ids)

        multi_loss=multi_criterion(classesd,multi_labels)
        binary_loss=binary_criterion(logits.squeeze(1),binary_labels)
        loss=multi_loss+binary_loss

        total_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step%interval ==0:
            print(f'Train loss {step}: {np.mean(total_loss)}')


In [166]:
def test_model(model, datasets, multi_criterion, binary_criterion, device):
    model.eval()
    total_loss=[]
    multi_score=[]
    binary_score=[]

    with torch.no_grad():
        for step, (input_ids, labels) in enumerate(datasets):
            input_ids=input_ids.to(device)
            multi_labels=labels[:,0].to(device).long()
            binary_labels=labels[:,1].to(device).float()

            classesd,logits=model(input_ids)

            multi_loss=multi_criterion(classesd,multi_labels)
            binary_loss=binary_criterion(logits.squeeze(), binary_labels)
            loss=multi_loss+binary_loss
            total_loss.append(loss.item())

            multi_pred=torch.argmax(torch.softmax(classesd, dim=1),dim=1)
            multi_score.extend(multi_pred.eq(multi_labels).cpu().numpy())

            binary_pred=(torch.sigmoid(logits)>0.5).int().squeeze()
            binary_score.extend(binary_pred.eq(binary_labels.int()).cpu().numpy())
        multi_accuracy=np.mean(multi_score)
        binary_accuracy=np.mean(binary_score)

        print(f'Val Loss : {np.mean(total_loss)}, Val Binary Accuracy : {binary_accuracy}, Val Multi Accuracy : {multi_accuracy}')

In [167]:
class reviewClassifierModel(nn.Module):
    def __init__(self, n_vocab, hidden_dim, embedding_dim, n_classes,
                 n_layers, dropout=0.5, bidirectional=True) -> None:
        super().__init__()
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,         # num_embeddings = vocab이 들어감
            embedding_dim=embedding_dim,
            padding_idx=0
        )
        self.model = nn.LSTM(
            input_size = embedding_dim,         # Input의 사이즈에 해당하는 수
            hidden_size=hidden_dim,             # 은닉층의 사이즈에 해당하는 수
            num_layers=n_layers,                # RNN의 은닉층 레이어 개수, default = 1
            bidirectional=bidirectional,        # bidrectional True일시 양방향 RNN, default = False
            dropout=dropout,                    # dropout 비율설정 기본값 0
            batch_first=True,                   # True일 경우 Output 사이즈는 (batch, seq, feature) 기본값 False
        )
        if bidirectional:
            self.classifier1 = nn.Linear(hidden_dim*2,n_classes)
            self.classifier2 = nn.Linear(hidden_dim*2,1)
        else:
            self.classifier1 = nn.Linear(hidden_dim,n_classes)
            self.classifier2 = nn.Linear(hidden_dim,1)
        self.dropout = nn.Dropout(dropout)

    def forward(self,inputs):
        embeddings = self.embedding(inputs)
        output, _ = self.model(embeddings)
        last_output = output[:, -1, :]
        last_output = self.dropout(last_output)
        classesd = self.classifier1(last_output)
        logits = self.classifier2(last_output)
        return classesd, logits

In [168]:
N_VOCAB = 5000
MAX_LENGTH = 18
EPOCHS = 100
INTERVAL = 500
BATCH_SIZE = 64
LR = 0.001
special_tokens = ['<pad>', '<unk>']

raw_trainDF=pd.read_csv(r'C:\Users\MSI\Desktop\TORCH_NLP\1010\DATA\train.csv', usecols=[1, 2, 4])
raw_testDF=pd.read_csv(r'C:\Users\MSI\Desktop\TORCH_NLP\1010\DATA\test.csv', usecols=[1, 2, 4])

top10_train=raw_trainDF['Aspect'].value_counts().nlargest(n=9).index.tolist()
trainDF=raw_trainDF.drop(index=raw_trainDF[~raw_trainDF['Aspect'].isin(values=top10_train)].index).reset_index()

top10_test=raw_testDF['Aspect'].value_counts().nlargest(n=9).index.tolist()
testDF=raw_testDF.drop(index=raw_testDF[~raw_testDF['Aspect'].isin(values=top10_test)].index).reset_index()

trainDF, aspectCD = data_encoding(trainDF)
testDF, _ = data_encoding(testDF)

############################################################################
punc=string.punctuation

for p in punc:
    trainDF['SentimentText'] = trainDF['SentimentText'].str.replace(p, '')
    testDF['SentimentText']=testDF['SentimentText'].str.replace(p,'')

m=re.compile('[^ ㄱ-ㅣ가-힣]+')

trainDF['SentimentText']=trainDF['SentimentText'].apply(lambda x: m.sub(' ', x))
testDF['SentimentText']=testDF['SentimentText'].apply(lambda x: m.sub(' ', x))

stop_word='./stopwords.txt'

with open(stop_word, 'r', encoding='utf-8') as f:
    stop_words = [line.strip() for line in f]


tokenizer = Okt()
train_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in trainDF['SentimentText']]
val_tokens = [[token for token in tokenizer.morphs(text) if token not in stop_words] for text in testDF['SentimentText']]

###################################################################################

vocab = build_vocab(train_tokens, N_VOCAB, special_tokens)
token_to_id = {token: idx for idx, token in enumerate(vocab)}
id_to_token = {idx: token for idx, token in enumerate(vocab)}

pad_id = token_to_id['<pad>']
unk_id = token_to_id['<unk>']
train_ids = encoding_ids(token_to_id, train_tokens, unk_id)
test_ids = encoding_ids(token_to_id, val_tokens, unk_id)
train_ids = pad_sequences(train_ids, MAX_LENGTH, pad_id)
test_ids = pad_sequences(test_ids, MAX_LENGTH, pad_id)

train_ids = torch.tensor(train_ids, dtype=torch.long)
test_ids = torch.tensor(test_ids, dtype=torch.long)

train_labels = torch.tensor(list(zip(trainDF['Aspect'].values, trainDF['SentimentPolarity'].values)), dtype=torch.long)
test_labels = torch.tensor(list(zip(testDF['Aspect'].values, testDF['SentimentPolarity'].values)), dtype=torch.float32)

train_dataset = TensorDataset(train_ids, train_labels)
test_dataset = TensorDataset(test_ids, test_labels)

train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False)

n_vocab = len(token_to_id)
hidden_dim = 64
embedding_dim = 128
n_layers = 2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
classifier = reviewClassifierModel(
    n_vocab=n_vocab, hidden_dim=hidden_dim, embedding_dim=embedding_dim, n_classes=len(aspectCD), n_layers=n_layers
).to(device)

cl_criterion = nn.CrossEntropyLoss().to(device)
bn_criterion = nn.BCEWithLogitsLoss().to(device)
optimizer = optim.RMSprop(classifier.parameters(), lr=LR)

for epoch in range(EPOCHS):
    print(f'{epoch+1}/{EPOCHS}')
    model_train(classifier, train_loader, cl_criterion, bn_criterion, optimizer, device, INTERVAL)
    model_test(classifier, test_loader, cl_criterion, bn_criterion, device)

    # 모델 저장 (에포크 번호 추가)
    model_save_path = f'./saved_model/review_classifier_epoch_{epoch + 1}.pt'  # 에포크 번호 포함
    torch.save(classifier.state_dict(), model_save_path)
    print(f'Model saved at {model_save_path}')

NameError: name 'aspectCD' is not defined