In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
import numpy as np
from EDA.DataReader import DataReader


In [8]:
from keras.src.legacy.preprocessing.text import Tokenizer
from keras.src.utils import pad_sequences
from sklearn.metrics import accuracy_score, f1_score

# pre processing
# dataReader = DataReader("UIT_VFSC") # UIT
dataReader = DataReader("") # dataset foody_raw
df_train = dataReader.df_train
df_test = dataReader.df_test
# Tiền xử lý văn bản
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_train["corpus"])
max_words = len(tokenizer.word_index) + 1 
sequences = tokenizer.texts_to_sequences(df_train["corpus"])
X_train = pad_sequences(sequences)
y_train = np.array(df_train["label"])
X_Test = pad_sequences(tokenizer.texts_to_sequences(df_test["corpus"]))
y_test = np.array(df_test["label"])
# Xây dựng mô hình CNN
model = tf.keras.models.Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Dense(3, activation='softmax')  # Multiclass UIT
    Dense(2, activation='softmax') # 2 classes foody
])

# Biên dịch mô hình
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Huấn luyện mô hình
model.fit(X_train, y_train, epochs=10)
y_pred = model.predict(X_Test)
y_pred_classes = np.argmax(y_pred, axis=1)

acc = accuracy_score(y_test, y_pred_classes)

print(f"Accuracy: {acc:.4f}")

# use for multiclass (UIT)
# mf1 = f1_score(df_test["label"], y_pred_classes, average='macro')
# wf1 = f1_score(df_test["label"], y_pred_classes, average='weighted')

# print(f"F1-Score: {max(mf1, wf1):.4f}")

# use for 2 class (foody)
f1 = f1_score(y_test, y_pred_classes)
print(f"F1 Score: {f1:.4f}")

Epoch 1/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 170ms/step - accuracy: 0.7811 - loss: 0.4383
Epoch 2/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 248ms/step - accuracy: 0.9272 - loss: 0.1995
Epoch 3/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 310ms/step - accuracy: 0.9617 - loss: 0.1131
Epoch 4/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 302ms/step - accuracy: 0.9818 - loss: 0.0558
Epoch 5/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 203ms/step - accuracy: 0.9907 - loss: 0.0280
Epoch 6/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 183ms/step - accuracy: 0.9918 - loss: 0.0236
Epoch 7/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 182ms/step - accuracy: 0.9944 - loss: 0.0163
Epoch 8/10
[1m957/957[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 246ms/step - accuracy: 0.9951 - loss: 0.0144
Epoch 9/