In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
import numpy as np
from EDA.DataReader import DataReader
from keras.src.legacy.preprocessing.text import Tokenizer
from keras.src.utils import pad_sequences
from sklearn.metrics import accuracy_score
import torch 
from torcheval.metrics.functional import multiclass_f1_score

In [2]:
# pre processing
# dataReader = DataReader("UIT_VFSC") # UIT
dataReader = DataReader("") # dataset foody_raw
df_train = dataReader.df_train
df_test = dataReader.df_test
df_total =dataReader.df_total
n_labels = int(df_total["label"].max().item() + 1)

In [3]:
# Tiền xử lý văn bản
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_train["corpus"])
max_words = len(tokenizer.word_index) + 1 
sequences = tokenizer.texts_to_sequences(df_train["corpus"])
X_train = pad_sequences(sequences)
y_train = np.array(df_train["label"])
X_Test = pad_sequences(tokenizer.texts_to_sequences(df_test["corpus"]))
y_test = np.array(df_test["label"])
# Xây dựng mô hình CNN
model = tf.keras.models.Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(n_labels, activation='softmax')
])

# Biên dịch mô hình
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Huấn luyện mô hình
model.fit(X_train, y_train, epochs=10)
y_pred = model.predict(X_Test)
y_pred_classes = np.argmax(y_pred, axis=1)

acc = accuracy_score(y_test, y_pred_classes)

print(f"Accuracy: {acc:.4f}")

y_pred_classes = torch.tensor(y_pred_classes).type(torch.long)
label = torch.tensor(df_test["label"].tolist()).type(torch.long)
mf1 = multiclass_f1_score(y_pred_classes, label, num_classes=n_labels, average='macro')
wf1 = multiclass_f1_score(y_pred_classes, label, num_classes=n_labels, average='weighted')
print(f"F1-Score: {max(mf1, wf1):.4f}")

Epoch 1/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 219ms/step - accuracy: 0.7947 - loss: 0.4314
Epoch 2/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 226ms/step - accuracy: 0.9255 - loss: 0.2048
Epoch 3/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 239ms/step - accuracy: 0.9613 - loss: 0.1147
Epoch 4/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 282ms/step - accuracy: 0.9817 - loss: 0.0536
Epoch 5/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 272ms/step - accuracy: 0.9907 - loss: 0.0294
Epoch 6/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 268ms/step - accuracy: 0.9910 - loss: 0.0258
Epoch 7/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 271ms/step - accuracy: 0.9933 - loss: 0.0189
Epoch 8/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 238ms/step - accuracy: 0.9946 - loss: 0.0148
Epoch 9/