In [1]:
import json

import nltk
import numpy as np
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding, Input, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer


##加入Attention
class Attention(Layer):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.W = Dense(units)
        self.U = Dense(units)
        self.V = Dense(1)

    def call(self, hidden_states):
        # 計算注意力權重 (score)
        score = self.V(tf.nn.tanh(self.W(hidden_states)))
        attention_weights = tf.nn.softmax(score, axis=1)
        # 加權求和，使模型更關注重要詞
        context_vector = tf.reduce_sum(attention_weights * hidden_states, axis=1)
        return context_vector

2025-03-30 11:57:31.229608: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-30 11:57:31.241907: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743307051.255248   35743 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743307051.258569   35743 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743307051.268729   35743 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
with open("./data/yelp_academic_dataset_review.json", encoding="utf-8") as f:
    data = [json.loads(line) for line in f]
df = pd.DataFrame(data)
df = df[["text", "stars"]]
df["label"] = df["stars"].apply(lambda x: 1 if x >= 4 else 0)
df = df[["text", "label"]]
print(df.head())
MAX_WORDS = 20000
MAX_LEN = 200
embedding_dim = 300
nltk.download("punkt")
texts = df["text"].astype(str).tolist()
labels = df["label"]
tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
X = pad_sequences(sequences, maxlen=MAX_LEN)
y = labels.values
embedding_matrix = np.random.randn(MAX_WORDS, embedding_dim) * 0.01  # 隨機初始化

                                                text  label
0  My wife took me here on my birthday for breakf...      1
1  I have no idea why some people give bad review...      1
2  love the gyro plate. Rice is so good and I als...      1
3  Rosie, Dakota, and I LOVE Chaparral Dog Park!!...      1
4  General Manager Scott Petello is a good egg!!!...      1


[nltk_data] Downloading package punkt to
[nltk_data]     /home/chesterxalan/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
import tensorflow as tf

rnn_units = 128
attention_units = 128
embedding_layer = Embedding(
    input_dim=MAX_WORDS,
    output_dim=embedding_dim,
    weights=[embedding_matrix],
    input_length=MAX_LEN,
    trainable=False,
)
# 架構 LSTM + Attention 模
inputs = Input(shape=(MAX_LEN,))
embedding = embedding_layer(inputs)
lstm_output = LSTM(rnn_units, return_sequences=True)(embedding)
# 加入 Attention 層
attention_output = Attention(attention_units)(lstm_output)
dropout = Dropout(0.5)(attention_output)
outputs = Dense(1, activation="sigmoid")(dropout)
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

I0000 00:00:1743307104.946569   35743 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [None]:
early_stopping = EarlyStopping(
    monitor="val_loss", patience=3, restore_best_weights=True
)
lr_scheduler = ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=2, min_lr=1e-6
)
model.fit(
    X,
    y,
    validation_split=0.2,
    epochs=10,
    batch_size=64,
    callbacks=[early_stopping, lr_scheduler],
)


def predict_review(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN)
    prediction = model.predict(padded_sequence)
    return "Positive" if prediction[0] > 0.5 else "Negative"


print(predict_review("The food was absolutely delicious!"))
print(predict_review("It was the worst experience ever."))

Epoch 1/10
[1m1155/2874[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m16:09[0m 564ms/step - accuracy: 0.6767 - loss: 0.6256