# IMDB 영화평데이터 > 감성 분류를 위한 트랜스포커 아키텍처 모델 구축

1. 정수토큰 시퀀스(길이 80) 입력
2. 토큰 임베딩 + 위치 임베딩
3. 멀티헤드어텐션 3헤드
4. concate + 정규화
5. FFN (Dense + Dense)
6. concate + 정규화
7. 분류기 (Dense)

## 1. 정수토큰 시퀀스(길이 80) 입력

In [1]:
import tensorflow as tf
from tensorflow.keras import Model, layers

2025-09-05 12:36:02.200703: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 2. 토큰 임베딩 + 위치 임베딩

In [2]:
# 토큰 임베딩
inputs = layers.Input(shape=(80, ))
input_embedding = layers.Embedding(input_dim=1000, output_dim=32)(inputs)

2025-09-05 12:36:03.464846: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-09-05 12:36:03.504431: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
# 위치 임베딩
positions = tf.range(start=0, limit=80)
pos_embedding = layers.Embedding(input_dim=80, output_dim=32)(positions)
pos_enc_output = pos_embedding + input_embedding

## 3. 멀티헤드어텐션 3헤드

In [4]:
# K, V를 받아주는 구조
attention_output = layers.MultiHeadAttention(num_heads=3, key_dim=32)(pos_enc_output, pos_enc_output)   # output이 그대로 들어가야하므로 32개로 통일

## 4. concate + 정규화

In [5]:
x = layers.add([pos_enc_output, attention_output])
x = layers.BatchNormalization()(x)   # 정규화

## 5. FFN (Dense + Dense)

In [6]:
from tensorflow.keras.models import Sequential
ffnn = Sequential(
    [
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu')
    ]
)(x)

## 6. concate + 정규화

In [7]:
x = layers.add([ffnn, x])
x = layers.BatchNormalization()(x)

## 7. 분류기 (Dense)

In [8]:
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0, 1)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0, 1)(x)
outputs = layers.Dense(2, activation='softmax')(x)

# 모델 구성

In [9]:
model = Model(inputs=inputs, outputs=outputs)
model.summary()

In [10]:
# 손실함수와, 옵티마이저 지정
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# imdb data load

In [14]:
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000)
(X_train, y_train), (X_test, y_test)

((array([list([1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]),
         list([1, 194, 2, 194, 2, 78, 228, 5, 6, 2, 2, 2, 134, 26, 4, 715, 8, 118, 2, 14, 394, 20, 13, 119, 954, 

In [15]:
# 텍스트 데이터의 전처리
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')

In [20]:
model.fit(X_train_pad, y_train, epochs=40, batch_size=200)

Epoch 1/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - accuracy: 0.9645 - loss: 0.0915
Epoch 2/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - accuracy: 0.9638 - loss: 0.0938
Epoch 3/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 39ms/step - accuracy: 0.9726 - loss: 0.0766
Epoch 4/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - accuracy: 0.9790 - loss: 0.0632
Epoch 5/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - accuracy: 0.9794 - loss: 0.0586
Epoch 6/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - accuracy: 0.9759 - loss: 0.0656
Epoch 7/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - accuracy: 0.9778 - loss: 0.0611
Epoch 8/40
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - accuracy: 0.9780 - loss: 0.0615
Epoch 9/40
[1m125/125[0m [32m

<keras.src.callbacks.history.History at 0x77ef0b08a290>

In [21]:
model.evaluate(X_test_pad, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.6636 - loss: 4.5756


[4.5756001472473145, 0.6635599732398987]

- 20번 : 782/782 ━━━━━━━━━━━━━━━━━━━━ 4s 2ms/step - accuracy: 0.6694 - loss: 0.9977   
- 30번 : 782/782 ━━━━━━━━━━━━━━━━━━━━ 2s 3ms/step - accuracy: 0.6998 - loss: 1.8136
- 40번 : 782/782 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - accuracy: 0.6636 - loss: 4.5756

In [18]:
import numpy as np
pred = model.predict(X_test_pad)
pred = np.argmax(pred, axis=1)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step


In [19]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred)

array([[10224,  2276],
       [ 5796,  6704]])