# IMDB 영화평데이터 -> 감성분류를 위한 트랜스포머 아키텍쳐 모델 구축

1. 정수토큰 시퀀스(길이 80) 입력
2. 토큰 임베딩 + 위치 임베딩
3. 멀티 헤드 어텐션
4. concat + 정규화
5. FFN (Dense + Dense)
6. concat + 정규화
7. 분류기 (Dense)

# 1. 정수토큰 시퀀스(길이 80) 입력

In [1]:
import tensorflow as tf
from tensorflow.keras import Model, layers, Sequential

2025-09-05 12:43:42.862546: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# 2. 토큰 임베딩 + 위치 임베딩

In [2]:
# 토큰 임베딩
inputs = layers.Input(shape=(80,))
input_embedding = layers.Embedding(input_dim=1000, output_dim=32)(inputs)

I0000 00:00:1757043824.359765   61895 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [3]:
# 위치 임베딩
positions = tf.range(start=0, limit=80)
pos_embedding = layers.Embedding(input_dim=80, output_dim=32)(positions)
pos_enc_output = pos_embedding + input_embedding

# 3. 멀티 헤드 어텐션 3헤드

In [4]:
attention_output = layers.MultiHeadAttention(num_heads=3, key_dim=32)(pos_enc_output, pos_enc_output) # K, V

# 4. concat + 정규화

In [5]:
x = layers.add([pos_enc_output, attention_output])
x = layers.BatchNormalization()(x)

# 5. FFN (Dense + Dense)
# 6. concat + 정규화

In [6]:
ffnn = Sequential([
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu')
])(x)
x = layers.add([ffnn, x])
x = layers.BatchNormalization()(x)

# 7. 분류기 (Dense)

In [7]:
x = layers.GlobalAveragePooling1D()(x)
x= layers.Dropout(0.1)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(2, activation='softmax')(x)

# 모델 구성

In [8]:
model = Model(inputs=inputs, outputs=outputs)
model.summary()

In [9]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# imdb data load

In [10]:
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000)

In [11]:
# 텍스트데이터 전처리
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='pre', truncating='pre')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='pre', truncating='pre')

In [12]:
model.fit(X_train_pad, y_train, epochs=15, batch_size=200)

Epoch 1/15


2025-09-05 12:43:48.320245: I external/local_xla/xla/service/service.cc:163] XLA service 0x70f78c008fb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-09-05 12:43:48.320268: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-09-05 12:43:48.357122: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-09-05 12:43:48.598196: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91200
2025-09-05 12:43:48.745432: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-09-05 12:43:48.

[1m  7/125[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 20ms/step - accuracy: 0.5016 - loss: 0.6999

I0000 00:00:1757043835.801861   61984 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.7534 - loss: 0.4953
Epoch 2/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8193 - loss: 0.3994
Epoch 3/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8208 - loss: 0.3873
Epoch 4/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8279 - loss: 0.3756
Epoch 5/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8358 - loss: 0.3650
Epoch 6/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8384 - loss: 0.3567
Epoch 7/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8410 - loss: 0.3515
Epoch 8/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8461 - loss: 0.3405
Epoch 9/15
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x70f9501cbbb0>

In [13]:
model.evaluate(X_test_pad, y_test)

2025-09-05 12:44:05.009320: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-09-05 12:44:05.009370: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-09-05 12:44:05.009426: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.








[1m774/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7774 - loss: 0.4734

2025-09-05 12:44:08.285092: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-09-05 12:44:08.285152: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-09-05 12:44:08.285193: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.








[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.7790 - loss: 0.4717


[0.4717130959033966, 0.7789999842643738]

In [14]:
import numpy as np
pred = model.predict(X_test_pad)
pred = np.argmax(pred, axis=1)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step


In [15]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred)

array([[10172,  2328],
       [ 3197,  9303]])