<a href="https://colab.research.google.com/github/jaeyeon1234/hondl/blob/main/hondeeplearning04_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import keras_nlp

#BERT 베이스
vocab_size = 30522
num_layers = 12
num_heads = 12
hidden_dim = 768
dropout = 0.1
activation = 'gelu'
max_seq_len = 512

In [19]:
#케라스 패키지 임포트 / 세 개의 임베딩 생성
import keras
from keras import layers

token_ids = keras.Input(shape=(None,))
segment_ids = keras.Input(shape=(None,))
padding_mask = keras.Input(shape=(None,))

token_embedding = layers.Embedding(vocab_size, hidden_dim)(token_ids)
pos_embedding = keras_nlp.layers.PositionEmbedding(max_seq_len)(
    token_embedding)
seg_embedding = layers.Embedding(2, hidden_dim)(segment_ids)

In [20]:
#세 개의 임베딩 모두 더한 후 층 정규화와 드롭아웃 적용
x = layers.Add()((token_embedding, pos_embedding, seg_embedding))
x = layers.LayerNormalization()(x)
x = layers.Dropout(dropout)(x)

In [21]:
#트랜스포머 인코더 모듈 만들기
#x는 토큰 임베딩과 위치 임베딩을 더한 값입니다
def transformer_encoder(x, padding_mask, dropout, activation='relu'):
  residual=x
  key_dim = hidden_dim // num_heads
  #멀티 헤드 어텐션을 통과합니다
  x = layers.MultiHeadAttention(num_heads, key_dim, dropout=dropout)(
      query=x, value=x, attention_mask=padding_mask)
  x = layers.Dropout(dropout)(x)
  #스킵연결
  x = x+residual
  x = layers.LayerNormalization()(x)
  residual = x
  #위치별 피드 포워드 네트워크
  x = layers.Dense(hidden_dim*4, activation=activation)(x)
  x = layers.Dense(hidden_dim)(x)
  x = layers.Dropout(dropout)(x)
  #스킵연결
  x = x + residual
  x = layers.LayerNormalization()(x)
  return x

In [22]:
#트랜스포머 인코더 블록 반복 & hidden_dim크기 추가 -> 백본 모델 완성
for _ in range(num_layers):
  x = transformer_encoder(x, padding_mask, dropout, activation)

outputs = layers.Dense(hidden_dim, activation='tanh')(x[:,0,:])
model = keras.Model(inputs=(token_ids, segment_ids, padding_mask), outputs=(outputs))

In [23]:
model.summary()

이 모델의 파라미터 개수는 1억 개가 넘음! 이런 모델을 훈련시키려면 매우 많은 데이터와 시간 필요 .. → KerasNLP 를 이용하자!(이미 사전훈련된 백본 모델 제공)

In [24]:
#KerasNLP로 BERT 모델 로드하기
!gdown 15ZSv_07b3HCKKn08jSDLl4JO4EFy8t-t
!tar -xzf aclImdb_v1.tar.gz
!rm -r aclImdb/train/unsup  #비지도 학습에 사용하는 데이터 삭제


Downloading...
From (original): https://drive.google.com/uc?id=15ZSv_07b3HCKKn08jSDLl4JO4EFy8t-t
From (redirected): https://drive.google.com/uc?id=15ZSv_07b3HCKKn08jSDLl4JO4EFy8t-t&confirm=t&uuid=93a97ce6-194a-43c2-870e-4b0648256df6
To: /content/aclImdb_v1.tar.gz
100% 84.1M/84.1M [00:01<00:00, 54.0MB/s]


In [25]:
train_ds, val_ds = keras.utils.text_dataset_from_directory(
    'aclImdb/train', subset='both', validation_split=0.2, seed=42)
test_ds = keras.utils.text_dataset_from_directory('aclImdb/test')

Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Using 5000 files for validation.
Found 25000 files belonging to 2 classes.


In [26]:
feature, target = train_ds.unbatch().take(1).get_single_element()
print(feature.numpy()[:100])
print(target.numpy())

b'"Pandemonium" is a horror movie spoof that comes off more stupid than funny. Believe me when I tell '
0


위 코드는 train_ds에서 샘플 하나 추출해서 IMDB 영화 리뷰 데이터셋에서 부정적인 감성의 텍스트 예시:
b 는 문자열이라는거 나타내고 0이라는거는 리뷰가 부정적인 평가임을 나타냄!

In [27]:
classifier = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased",num_classes=2)

In [28]:
classifier.summary()

파라미터 개수 4백만개!!!(BERT모델 중 가장 작은 모델임에도 불구하고)

In [31]:
#BERT 모델 미세 튜닝하기
classifier.fit(train_ds, validation_data=val_ds, epochs=5)

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2396s[0m 4s/step - loss: 0.5976 - sparse_categorical_accuracy: 0.6435 - val_loss: 0.3409 - val_sparse_categorical_accuracy: 0.8556
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2239s[0m 4s/step - loss: 0.3224 - sparse_categorical_accuracy: 0.8684 - val_loss: 0.2949 - val_sparse_categorical_accuracy: 0.8796
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2170s[0m 3s/step - loss: 0.2569 - sparse_categorical_accuracy: 0.8966 - val_loss: 0.3085 - val_sparse_categorical_accuracy: 0.8816
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2147s[0m 3s/step - loss: 0.1970 - sparse_categorical_accuracy: 0.9275 - val_loss: 0.3298 - val_sparse_categorical_accuracy: 0.8806
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2179s[0m 3s/step - loss: 0.1533 - sparse_categorical_accuracy: 0.9457 - val_loss: 0.3238 - val_sparse_categorical_accuracy: 0.

<keras.src.callbacks.history.History at 0x7b8acee35880>

훈련 세트의 정확도 거의 95% 가까이됨.. (3시간 5분 31초동안 훈련했습니다.. 휴):

In [32]:
classifier = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased", num_classes=1, activation='sigmoid'
)

In [33]:
#BERT 모델 훈련하기
rmsprop = keras.optimizers.RMSprop(learning_rate=0.001)
classifier.compile(loss='binary_crossentropy', optimizer=rmsprop,
                   metrics=['accuracy'])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3,
                                                  restore_best_weights=True)
classifier.fit(train_ds, validation_data=val_ds, epochs=20,
               callbacks=[early_stopping_cb])

Epoch 1/20
[1m 21/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33:47[0m 3s/step - accuracy: 0.5100 - loss: 0.6965

KeyboardInterrupt: 

In [None]:
#BERT 모델의 성능 확인하기
epochs = np.array(hist.epoch) + 1
fig, axs = plt.subplots(1,2,figsize=(12,4))
axs[0].plot(epochs, hist.history['loss'], label='loss')
axs[0].plot(epochs, hist.history['val_loss'], label='val_loss')
axs[0].set_xticks(epochs)
axs[0].set_xlabel('epoch')
axs[0].set_ylabel('loss')
axs[0].legend()
axs[1].plot(epochs, hist.history['accuracy'], label='accuracy')
axs[1].plot(epochs, hist.history['val_accuracy'], label='val_accuracy')
axs[1].set_xticks(epochs)
axs[1].set_xlabel('epoch')
axs[1].set_ylabel('accuracy')
axs[1].legend()
plt.show()