In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from konlpy.tag import Mecab

In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Embedding
from tensorflow.keras.utils import to_categorical

In [9]:
mecab = Mecab()

In [4]:
test_data = pd.read_csv('../05_Machine_Learning/data/bank_app_reviews_test.csv')
test_data.head()

Unnamed: 0,리뷰일,평점,사용자리뷰,업체답변,은행명
0,2024-02-08,5,고경민계장님감사해요,"안녕하세요 최순녀 고객님. 칭찬 진심으로 감사드리며, 더욱 편리하고 안정적인 서비스...",우리
1,2023-07-24,5,저축목표피드 새로 생긴거 너무좋은데 분명 카테고리를 저축으로 했는데 왜 인식이 안되...,"신아​ 님, 안녕하세요? 뱅크샐러드 고객감동팀​입니다. 소중한 시간내어 고객센터에 ...",뱅크샐러드
2,2023-09-25,1,아니 이딴걸 편리하게 사용하는앱이라고 쳐만들엇나 이렇게 불편하게만든건 일부러그런거에...,안녕하세요. 우리은행입니다. 먼저 우리WON뱅킹 이용에 불편을 드려 죄송합니다. 보...,우리
3,2024-02-15,3,몇 년째 만족하며 사용중이라 조금식 개선되어거는 모습에 만족하며 사용중입니다. 하지...,안녕하세요? 뱅크샐러드 고객감동팀입니다. 뱅크샐러드에 KB pay를 연결해 모든 자...,뱅크샐러드
4,2023-06-19,5,스타뱅킹을 사용 하고나서부터 편안해서 좋아요,"한송림 고객님, 안녕하세요! KB스타뱅킹을 이용해 주셔서 진심으로 감사드립니다. 앞...",국민


In [6]:
import re

def clean_text(text):
    cleaned = re.sub(r'[^가-힣a-zA-Z0-9\s]', '', text) # 한글, 영문, 숫자 추출
    cleaned = re.sub(r'\s+',' ', cleaned) # 연속된 공백을 하나의 공백으로 줄임
    return cleaned.strip()

In [7]:
test_data['사용자리뷰'] = test_data['사용자리뷰'].apply(clean_text)

In [8]:
test_data['is_good'] = test_data['평점'].apply(lambda x : 1 if x >3 else 0)

In [11]:
tokenized_docs = test_data['사용자리뷰'].apply(mecab.morphs)

### train에서 사용했던 tokenizer를 불러와서 one-hot encoding

In [12]:
import joblib

In [14]:
token = joblib.load('./model/bank_app_tokenizer.joblib')
x = token.texts_to_sequences(tokenized_docs)
print(x[0])

[6248, 327, 111, 71]


### train에서 사용했던 padding 길이 (model에 넣을 컬럼 수)

In [16]:
max_length = joblib.load('./model/bank_app_max_length.joblib')
print(max_length)

302


In [18]:
X_padded = pad_sequences(x, maxlen=max_length, padding='post')
print(X_padded[0])

[6248  327  111   71    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0 

In [19]:
y = test_data['is_good']

# model 불러와서 예측하고 결과 비교하기

In [20]:
birnn_best = load_model('./model/bank_app_review_birnn.keras')
cnn_lstm_best = load_model('./model/bank_app_review_lstm_cnn.keras')
attn_best = load_model('./model/bank_app_review_attn_model.keras')

I0000 00:00:1747887842.853728    1881 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1347 MB memory:  -> device: 0, name: NVIDIA GeForce MX450, pci bus id: 0000:01:00.0, compute capability: 7.5


In [21]:
birnn_pred = birnn_best.predict(X_padded)
cnn_lstm_pred = cnn_lstm_best.predict(X_padded)
attn_pred = attn_best.predict(X_padded)

I0000 00:00:1747887953.967732    2134 service.cc:152] XLA service 0x56240bb7dd20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747887953.967805    2134 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce MX450, Compute Capability 7.5
2025-05-22 13:25:54.019912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1747887954.095518    2134 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 13/298[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 14ms/step

I0000 00:00:1747887955.097463    2134 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step
[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step
[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 52ms/step


In [22]:
birnn_pred = pd.DataFrame(birnn_pred)
cnn_lstm_pred = pd.DataFrame(cnn_lstm_pred)
attn_pred = pd.DataFrame(attn_pred)

In [25]:
y = pd.DataFrame(y)

In [27]:
birnn_result = y.join(birnn_pred)
cnn_lstm_result = y.join(cnn_lstm_pred)
attn_result = y.join(attn_pred)

In [29]:
birnn_result.loc[:,0] = birnn_result.loc[:,0].apply(lambda x : 1 if x > 0.5 else 0)
cnn_lstm_result = (cnn_lstm_result > 0.5).astype(int)
attn_result = (attn_result > 0.5).astype(int)

In [28]:
from sklearn.metrics import classification_report

In [30]:
print(classification_report(birnn_result[0], birnn_result['is_good']))

              precision    recall  f1-score   support

         0.0       0.86      0.88      0.87      3760
         1.0       0.92      0.90      0.91      5774

    accuracy                           0.89      9534
   macro avg       0.89      0.89      0.89      9534
weighted avg       0.89      0.89      0.89      9534



In [31]:
print(classification_report(cnn_lstm_result[0], cnn_lstm_result['is_good']))

              precision    recall  f1-score   support

           0       0.89      0.85      0.87      4064
           1       0.89      0.93      0.91      5470

    accuracy                           0.89      9534
   macro avg       0.89      0.89      0.89      9534
weighted avg       0.89      0.89      0.89      9534



In [32]:
print(classification_report(attn_result[0], attn_result['is_good']))

              precision    recall  f1-score   support

           0       0.87      0.88      0.88      3805
           1       0.92      0.91      0.92      5729

    accuracy                           0.90      9534
   macro avg       0.90      0.90      0.90      9534
weighted avg       0.90      0.90      0.90      9534



# evaluate

In [33]:
%%time
birnn_best.evaluate(X_padded, test_data['is_good'])

[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.8959 - auc: 0.9458 - loss: 0.2903
CPU times: user 3.92 s, sys: 591 ms, total: 4.51 s
Wall time: 6.65 s


[0.2909605801105499, 0.8936437964439392, 0.9454486966133118]

In [34]:
%%time
cnn_lstm_best.evaluate(X_padded, test_data['is_good'])

[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.8969 - auc: 0.9486 - loss: 0.2877
CPU times: user 5.53 s, sys: 1.35 s, total: 6.89 s
Wall time: 5.12 s


[0.28785431385040283, 0.8932242393493652, 0.9483879804611206]

In [35]:
%%time
attn_best.evaluate(X_padded, test_data['is_good'])

[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 65ms/step - accuracy: 0.9034 - auc: 0.9518 - loss: 0.2687
CPU times: user 7.46 s, sys: 1.79 s, total: 9.25 s
Wall time: 20.2 s


[0.26866504549980164, 0.900041937828064, 0.952237069606781]