In [None]:
!pip install mxnet
!pip install gluonnlp pandas tqdm
!pip install sentencepiece
!pip install transformers
!pip install torch



In [None]:
!pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'

Collecting kobert_tokenizer
  Cloning https://github.com/SKTBrain/KoBERT.git to /tmp/pip-install-gi_ockpx/kobert-tokenizer_43725478b5e846a99def286fb32c1794
  Running command git clone --filter=blob:none --quiet https://github.com/SKTBrain/KoBERT.git /tmp/pip-install-gi_ockpx/kobert-tokenizer_43725478b5e846a99def286fb32c1794
  Resolved https://github.com/SKTBrain/KoBERT.git to commit 47a69af87928fc24e20f571fe10c3cc9dd9af9a3
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, tqdm_notebook

In [None]:
# Hugging Face를 통한 모델 및 토크나이저 Import
from kobert_tokenizer import KoBERTTokenizer
from transformers import BertModel

from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [None]:
# GPU 사용 시
device = torch.device("cuda:0")

In [None]:
# 필요한 클래스 정의

class BERTSentenceTransform:
    r"""BERT style data transformation.

    Parameters
    ----------
    tokenizer : BERTTokenizer.
        Tokenizer for the sentences.
    max_seq_length : int.
        Maximum sequence length of the sentences.
    pad : bool, default True
        Whether to pad the sentences to maximum length.
    pair : bool, default True
        Whether to transform sentences or sentence pairs.
    """

    def __init__(self, tokenizer, max_seq_length,vocab, pad=True, pair=True):
        self._tokenizer = tokenizer
        self._max_seq_length = max_seq_length
        self._pad = pad
        self._pair = pair
        self._vocab = vocab

    def __call__(self, line):
        """Perform transformation for sequence pairs or single sequences.

        The transformation is processed in the following steps:
        - tokenize the input sequences
        - insert [CLS], [SEP] as necessary
        - generate type ids to indicate whether a token belongs to the first
        sequence or the second sequence.
        - generate valid length

        For sequence pairs, the input is a tuple of 2 strings:
        text_a, text_b.

        Inputs:
            text_a: 'is this jacksonville ?'
            text_b: 'no it is not'
        Tokenization:
            text_a: 'is this jack ##son ##ville ?'
            text_b: 'no it is not .'
        Processed:
            tokens: '[CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]'
            type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
            valid_length: 14

        For single sequences, the input is a tuple of single string:
        text_a.

        Inputs:
            text_a: 'the dog is hairy .'
        Tokenization:
            text_a: 'the dog is hairy .'
        Processed:
            text_a: '[CLS] the dog is hairy . [SEP]'
            type_ids: 0     0   0   0  0     0 0
            valid_length: 7

        Parameters
        ----------
        line: tuple of str
            Input strings. For sequence pairs, the input is a tuple of 2 strings:
            (text_a, text_b). For single sequences, the input is a tuple of single
            string: (text_a,).

        Returns
        -------
        np.array: input token ids in 'int32', shape (batch_size, seq_length)
        np.array: valid length in 'int32', shape (batch_size,)
        np.array: input token type ids in 'int32', shape (batch_size, seq_length)

        """

        # convert to unicode
        text_a = line[0]
        if self._pair:
            assert len(line) == 2
            text_b = line[1]

        tokens_a = self._tokenizer.tokenize(text_a)
        tokens_b = None

        if self._pair:
            tokens_b = self._tokenizer(text_b)

        if tokens_b:
            # Modifies `tokens_a` and `tokens_b` in place so that the total
            # length is less than the specified length.
            # Account for [CLS], [SEP], [SEP] with "- 3"
            self._truncate_seq_pair(tokens_a, tokens_b,
                                    self._max_seq_length - 3)
        else:
            # Account for [CLS] and [SEP] with "- 2"
            if len(tokens_a) > self._max_seq_length - 2:
                tokens_a = tokens_a[0:(self._max_seq_length - 2)]

        # The embedding vectors for `type=0` and `type=1` were learned during
        # pre-training and are added to the wordpiece embedding vector
        # (and position vector). This is not *strictly* necessary since
        # the [SEP] token unambiguously separates the sequences, but it makes
        # it easier for the model to learn the concept of sequences.

        # For classification tasks, the first vector (corresponding to [CLS]) is
        # used as as the "sentence vector". Note that this only makes sense because
        # the entire model is fine-tuned.
        #vocab = self._tokenizer.vocab
        vocab = self._vocab
        tokens = []
        tokens.append(vocab.cls_token)
        tokens.extend(tokens_a)
        tokens.append(vocab.sep_token)
        segment_ids = [0] * len(tokens)

        if tokens_b:
            tokens.extend(tokens_b)
            tokens.append(vocab.sep_token)
            segment_ids.extend([1] * (len(tokens) - len(segment_ids)))

        input_ids = self._tokenizer.convert_tokens_to_ids(tokens)

        # The valid length of sentences. Only real  tokens are attended to.
        valid_length = len(input_ids)

        if self._pad:
            # Zero-pad up to the sequence length.
            padding_length = self._max_seq_length - valid_length
            # use padding tokens for the rest
            input_ids.extend([vocab[vocab.padding_token]] * padding_length)
            segment_ids.extend([0] * padding_length)

        return np.array(input_ids, dtype='int32'), np.array(valid_length, dtype='int32'),\
            np.array(segment_ids, dtype='int32')


================================================================================

In [None]:
import pandas as pd

# 엑셀 파일 불러오기
df = pd.read_excel('/content/한국어_단발성_대화_데이터셋.xlsx')

# 필요없는 열 삭제
df = df[['Sentence', 'Emotion']]

In [None]:
df.loc[(df['Emotion'] == "공포"), 'Emotion'] = 0  #공포 => 0
df.loc[(df['Emotion'] == "놀람"), 'Emotion'] = 1  #놀람 => 1
df.loc[(df['Emotion'] == "분노"), 'Emotion'] = 2  #분노 => 2
df.loc[(df['Emotion'] == "슬픔"), 'Emotion'] = 3  #슬픔 => 3
df.loc[(df['Emotion'] == "중립"), 'Emotion'] = 4  #중립 => 4
df.loc[(df['Emotion'] == "행복"), 'Emotion'] = 5  #행복 => 5
df.loc[(df['Emotion'] == "혐오"), 'Emotion'] = 6  #혐오 => 6

In [None]:
data_list = []
for q, label in zip(df['Sentence'], df['Emotion'])  :
    data = []
    data.append(q)
    data.append(str(label))

    data_list.append(data)

In [None]:
#데이터분리
from sklearn.model_selection import train_test_split
dataset_train, dataset_test = train_test_split(data_list, test_size=0.25, random_state=0)

print(len(dataset_train))
print(len(dataset_test))

28945
9649


In [None]:
from gluonnlp.data import TSVDataset

dataset_train = pd.DataFrame(dataset_train, columns=['Sentence', 'Emotion'])
dataset_test = pd.DataFrame(dataset_test, columns=['Sentence', 'Emotion'])

dataset_train.to_csv('dataset_train.tsv', sep='\t', index=False)
dataset_test.to_csv('dataset_test.tsv', sep='\t', index=False)

In [None]:
dataset_train = nlp.data.TSVDataset('dataset_train.tsv', field_indices=[0, 1], num_discard_samples=1)
dataset_test = nlp.data.TSVDataset('dataset_test.tsv', field_indices=[0, 1], num_discard_samples=1)

In [None]:
# Setting parameters
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 1
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

In [None]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, vocab, max_len,
                 pad, pair):
        transform = BERTSentenceTransform(bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        #transform = nlp.data.BERTSentenceTransform(
        #    tokenizer, max_seq_length=max_len, pad=pad, pair=pair)
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')

#
data_train = BERTDataset(dataset_train, 0, 1, tokenizer, vocab, max_len, True, False)
data_test = BERTDataset(dataset_test, 0, 1, tokenizer, vocab, max_len, True, False)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. 
The class this function is called from is 'KoBERTTokenizer'.


In [None]:
train_dataloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)



================================================================================


In [None]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=7,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate

        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)

    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)

        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [None]:
model = BERTClassifier(bertmodel,  dr_rate=0.5).to(device)

In [None]:
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [None]:
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)



In [None]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 425969065378319802
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14200668160
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 14852063117212376685
 physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
 xla_global_id: 416903419]

In [None]:
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):


  0%|          | 0/453 [00:00<?, ?it/s]



epoch 1 batch id 1 loss 1.9650264978408813 train acc 0.109375
epoch 1 batch id 201 loss 1.190937876701355 train acc 0.36963619402985076
epoch 1 batch id 401 loss 1.1100730895996094 train acc 0.440928927680798
epoch 1 train acc 0.451276619919491


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):


  0%|          | 0/151 [00:00<?, ?it/s]

epoch 1 test acc 0.5417286119745912


In [None]:
model1 = model # 원본 모델 복사

In [None]:
def predict(predict_sentence): # 모델을 통한 예측
  data = [predict_sentence, '0']
  dataset_another = [data]

  another_test = BERTDataset(dataset_another, 0, 1, tokenizer, vocab, max_len, True, False)
  test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=5)

  model1.eval()

  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
          token_ids = token_ids.long().to(device)
          segment_ids = segment_ids.long().to(device)
          valid_length= valid_length
          label = label.long().to(device)
          out = model1(token_ids, valid_length, segment_ids)

          result = ""

          for i in out:

            # 소프트맥스 함수를 적용하여 확률값 계산
            probs = torch.nn.functional.softmax(out, dim=-1)
            probs = probs.detach().cpu().numpy()

            # 가장 높은 확률을 갖는 클래스 선택
            predicted_class = np.argmax(probs, axis=1)[0]
            # 확률 계산
            probability = probs[0, predicted_class]*100


            class_names = ["공포", "놀람", "분노", "슬픔", "중립", "행복", "혐오"]

            # 결과 및 확률 출력
            result = f">> 입력하신 내용은 {probability:.2f}% 확률로 {class_names[predicted_class]}으로 예측됩니다."
            print(result)

In [None]:
#질문 무한 반복 1을 누르면 종료
while True:
  sentence = input("하고싶은 말을 입력해주세요 : ")
  if sentence == "1":
    break
  print(sentence)
  predict(sentence)
  print("\n")


하고싶은 말을 입력해주세요 : 오늘 비가 와요
오늘 비가 와요


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):


  0%|          | 0/1 [00:00<?, ?it/s]

>> 입력하신 내용은 27.73% 확률로 공포으로 예측됩니다.


하고싶은 말을 입력해주세요 : 1


#병합 데이터셋을 다른 모델 시도 결과


In [None]:
import pandas as pd

# 엑셀 파일 불러오기
df1 = pd.read_excel('/content/감성대화말뭉치(최종데이터)_Training.xlsx')
df2 = pd.read_excel('/content/감성대화말뭉치(최종데이터)_Validation.xlsx')
df3 = pd.read_excel('/content/한국어_단발성_대화_데이터셋.xlsx')

# 필요없는 열 삭제 (문장과 감정만 남김)
df1 = df1[['사람문장1', '감정_대분류']]
df2 = df2[['사람문장1', '감정_대분류']]
df3 = df3[['Sentence', 'Emotion']]

# 열 이름 변경 (Sentence와 Emotion으로 통일)
df1.rename(columns={'사람문장1': 'Sentence', '감정_대분류': 'Emotion'}, inplace=True)
df2.rename(columns={'사람문장1': 'Sentence', '감정_대분류': 'Emotion'}, inplace=True)

# 데이터프레임 합치기 (감성대화말뭉치)
combined_df = pd.concat([df1, df2], ignore_index=True)

# 'Emotion' 열에서 '상처'를 '슬픔'으로 변경 (감성대화말뭉치)
combined_df['Emotion'] = combined_df['Emotion'].replace('상처', '슬픔')

# 변경된 데이터프레임 확인
combined_df[combined_df['Emotion'] == '슬픔']

# 'Emotion' 열에서 '혐오'를 '분노'로 변경 (한국어_단발성_대화_데이터셋)
df3['Emotion'] = df3['Emotion'].replace('혐오', '분노')

# 변경된 데이터프레임 확인
df3[df3['Emotion'] == '분노']

# 'Emotion' 열에서 '중립'을 '평온'으로 변경 (한국어_단발성_대화_데이터셋)
df3['Emotion'] = df3['Emotion'].replace('중립', '평온')

# 변경된 데이터프레임 확인
df3[df3['Emotion'] == '평온']

# 'Emotion' 열에서 '공포'를 '불안'으로 변경 (한국어_단발성_대화_데이터셋)
df3['Emotion'] = df3['Emotion'].replace('공포', '불안')

# 'Emotion' 열에서 '놀람'을 '당황'으로 변경 (한국어_단발성_대화_데이터셋)
df3['Emotion'] = df3['Emotion'].replace('놀람', '당황')

# 'Emotion' 열에서 '행복'을 '기쁨'으로 변경 (한국어_단발성_대화_데이터셋)
df3['Emotion'] = df3['Emotion'].replace('행복', '기쁨')
# 데이터프레임 합치기 (한국어_단발성_대화_데이터셋 + 감성대화말뭉치)
final_df = pd.concat([combined_df, df3], ignore_index=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1.rename(columns={'사람문장1': 'Sentence', '감정_대분류': 'Emotion'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.rename(columns={'사람문장1': 'Sentence', '감정_대분류': 'Emotion'}, inplace=True)


In [None]:
# 데이터를 훈련 및 테스트 세트로 분할
X = final_df['Sentence']  #: Sentence 열을 사용
y = final_df['Emotion']   #: Emotion 열을 사용
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

 머신러닝 모델은 일반적으로 숫자 데이터를 입력으로 받아들이기 때문에 문자열 데이터를 다루는 데 문제-> 문자열데이터를 벡터로 변환

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# 데이터를 훈련 및 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF 변환기 초기화
tfidf_vectorizer = TfidfVectorizer(max_features=100)  # 최대 특징 수를 조절할 수 있음

# 훈련 데이터를 TF-IDF로 변환
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

# 테스트 데이터를 TF-IDF로 변환
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# 모델 학습
# 나이브 베이즈 모델 학습
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# k-최근접 이웃 모델 학습
knn_model = KNeighborsClassifier()
knn_model.fit(X_train_tfidf, y_train)

# 의사결정 나무 모델 학습
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_tfidf, y_train)

# 앙상블 모델 ( 랜덤 포레스트) 학습
rf_model = RandomForestClassifier()
rf_model.fit(X_train_tfidf, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

# 각 모델의 예측
nb_pred = nb_model.predict(X_test_tfidf)
knn_pred = knn_model.predict(X_test_tfidf)
dt_pred = dt_model.predict(X_test_tfidf)
rf_pred = rf_model.predict(X_test_tfidf)

# 정확도 출력
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print("K-Nearest Neighbors Accuracy:", accuracy_score(y_test, knn_pred))
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))

# Classification Report 출력
print("Naive Bayes Report:\n", classification_report(y_test, nb_pred))
print("K-Nearest Neighbors Report:\n", classification_report(y_test, knn_pred))
print("Decision Tree Report:\n", classification_report(y_test, dt_pred))
print("Random Forest Report:\n", classification_report(y_test, rf_pred))


Naive Bayes Accuracy: 0.31203220977649304
K-Nearest Neighbors Accuracy: 0.3118773550818149
Decision Tree Accuracy: 0.33902854488205236
Random Forest Accuracy: 0.35425592319207144
Naive Bayes Report:
               precision    recall  f1-score   support

          기쁨       0.43      0.25      0.31      2639
          당황       0.43      0.05      0.09      3096
          분노       0.38      0.18      0.24      4352
          불안       0.51      0.15      0.23      3219
          슬픔       0.28      0.77      0.41      5115
          평온       0.12      0.02      0.03       952

    accuracy                           0.31     19373
   macro avg       0.36      0.24      0.22     19373
weighted avg       0.38      0.31      0.26     19373



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


K-Nearest Neighbors Report:
               precision    recall  f1-score   support

          기쁨       0.34      0.24      0.28      2639
          당황       0.22      0.17      0.19      3096
          분노       0.29      0.60      0.39      4352
          불안       0.31      0.20      0.24      3219
          슬픔       0.40      0.32      0.35      5115
          평온       0.00      0.00      0.00       952

    accuracy                           0.31     19373
   macro avg       0.26      0.25      0.24     19373
weighted avg       0.30      0.31      0.29     19373

Decision Tree Report:
               precision    recall  f1-score   support

          기쁨       0.39      0.26      0.31      2639
          당황       0.26      0.12      0.16      3096
          분노       0.30      0.63      0.40      4352
          불안       0.39      0.18      0.25      3219
          슬픔       0.40      0.43      0.41      5115
          평온       0.25      0.00      0.00       952

    accuracy             