<a href="https://colab.research.google.com/github/i-am-chen/fianlinclass113-1/blob/main/fianal_3_3(SGD_).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Embedding, LSTM, Dense, Input, Bidirectional, MultiHeadAttention, LayerNormalization
)
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import RMSprop, SGD, Adam

In [2]:
# 資料集載入與處理
# 載入 IMDB 資料集
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# 填充序列到相同長度
maxlen = 300
x_train = pad_sequences(x_train, maxlen=maxlen, padding='post')
x_test = pad_sequences(x_test, maxlen=maxlen, padding='post')

# 分割訓練集與驗證集
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# 建立 LSTM + Transformer 模型
def create_lstm_transformer_model(vocab_size, embed_dim, lstm_units, transformer_units, num_heads):
    # 輸入層
    inputs = Input(shape=(None,))

    # 嵌入層
    x = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)

    # 雙向 LSTM
    lstm_out = Bidirectional(LSTM(lstm_units, return_sequences=True))(x)

    # Transformer 層
    attention_out = MultiHeadAttention(num_heads=num_heads, key_dim=transformer_units)(lstm_out, lstm_out)
    norm_out = LayerNormalization()(attention_out + lstm_out)

    # 平均池化 - 使用 Keras 的 Lambda 層來包裝 tf.reduce_mean
    pooled_out = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(norm_out)

    # 全連接層
    outputs = Dense(1, activation='sigmoid')(pooled_out)

    # 模型
    model = Model(inputs, outputs)
    return model

# 模型參數
vocab_size = 10000
embed_dim = 128
lstm_units = 64
transformer_units = 128
num_heads = 8

# 建立模型
model = create_lstm_transformer_model(vocab_size, embed_dim, lstm_units=256, transformer_units=128, num_heads=4)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# 使用 SGD 優化器
model.compile(optimizer=SGD(learning_rate=0.001, momentum=0.9),
              loss='binary_crossentropy',  # 使用交叉熵
              metrics=['accuracy'])

history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping]
)
# 評估模型
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"測試準確率: {test_acc:.4f}")



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 77ms/step - accuracy: 0.5018 - loss: 0.7529 - val_accuracy: 0.4874 - val_loss: 0.8453
Epoch 2/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 79ms/step - accuracy: 0.5104 - loss: 0.7225 - val_accuracy: 0.5298 - val_loss: 0.6878
Epoch 3/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 78ms/step - accuracy: 0.5158 - loss: 0.7018 - val_accuracy: 0.4876 - val_loss: 0.7136
Epoch 4/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 79ms/step - accuracy: 0.5206 - loss: 0.7078 - val_accuracy: 0.5126 - val_loss: 0.6965
Epoch 5/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 79ms/step - accuracy: 0.5256 - loss: 0.6980 - val_accuracy: 0.5708 - val_loss: 0.6765
Epoc