In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, Dropout, LSTM
import numpy as np
from EDA.DataReader import DataReader
from keras.src.legacy.preprocessing.text import Tokenizer
from keras.src.utils import pad_sequences
from sklearn.metrics import accuracy_score
import torch 
from torcheval.metrics.functional import multiclass_f1_score

In [3]:
dataReader = DataReader("UIT_VFSC") # UIT
# dataReader = DataReader("") # tập foody_raw
df_train =dataReader.df_train
df_test = dataReader.df_test
df_total =dataReader.df_total
n_labels = int(df_total["label"].max().item() + 1)

In [6]:
# Tiền xử lý văn bản
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_train["corpus"])
max_words = len(tokenizer.word_index) + 1 
sequences = tokenizer.texts_to_sequences(df_train["corpus"])
X_train = pad_sequences(sequences)
y_train = np.array(df_train["label"])
X_Test = pad_sequences(tokenizer.texts_to_sequences(df_test["corpus"]))
y_test = np.array(df_test["label"])
# Xây dựng mô hình LSTM
model = tf.keras.models.Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    LSTM(128, dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(n_labels, activation='softmax') 
])

# Biên dịch mô hình
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Huấn luyện mô hình
model.fit(X_train, y_train, epochs=10)
y_pred = model.predict(X_Test)
y_pred_classes = np.argmax(y_pred, axis=1)
acc = accuracy_score(y_test, y_pred_classes)

print(f"Accuracy: {acc:.4f}")

y_pred_classes = torch.tensor(y_pred_classes).type(torch.long)
label = torch.tensor(df_test["label"].tolist()).type(torch.long)
mf1 = multiclass_f1_score(y_pred_classes, label, num_classes=n_labels, average='macro')
wf1 = multiclass_f1_score(y_pred_classes, label, num_classes=n_labels, average='weighted')
print(f"F1-Score: {max(mf1, wf1):.4f}")

Epoch 1/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 108ms/step - accuracy: 0.7200 - loss: 0.6359
Epoch 2/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 125ms/step - accuracy: 0.9183 - loss: 0.2662
Epoch 3/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 115ms/step - accuracy: 0.9287 - loss: 0.2252
Epoch 4/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 182ms/step - accuracy: 0.9456 - loss: 0.1802
Epoch 5/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 231ms/step - accuracy: 0.9475 - loss: 0.1713
Epoch 6/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 205ms/step - accuracy: 0.9510 - loss: 0.1555
Epoch 7/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 220ms/step - accuracy: 0.9587 - loss: 0.1336
Epoch 8/10
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 224ms/step - accuracy: 0.9657 - loss: 0.1072
Epoch 9/10
[1m3