In [9]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter
import joblib
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt



# Load embeddings
train = np.load("train_embeddings.npz")
X_train, y_train = train["X"], train["y"]

test = np.load("test_embeddings.npz")
X_test, y_test = test["X"], test["y"]



In [10]:
# Khởi tạo mô hình SVM
svm = SVC(kernel='linear', probability=True, class_weight='balanced', random_state=42)
svm.fit(X_train, y_train)
svm_acc = accuracy_score(y_test, svm.predict(X_test))
# Lưu mô hình
joblib.dump(svm, "model_svm.joblib")

# Dự đoán
y_pred = svm.predict(X_test)

# Đánh giá
print("\nKết quả mô hình SVM:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, zero_division=0))

# Thống kê lỗi
wrong = [(true, pred) for true, pred in zip(y_test, y_pred) if true != pred]
print(f"[!] Số mẫu sai: {len(wrong)}")
print("Một số lỗi phổ biến:", Counter(wrong).most_common(5))


Kết quả mô hình SVM:
Accuracy: 1.0
               precision    recall  f1-score   support

     Duy Khôi       1.00      1.00      1.00         2
        Dũng       1.00      1.00      1.00         2
      Dương       1.00      1.00      1.00        16
          Hà       1.00      1.00      1.00         2
       Hiếu       1.00      1.00      1.00        24
        Hưng       1.00      1.00      1.00         2
         Khôi       1.00      1.00      1.00         2
        Lành       1.00      1.00      1.00         6
         Linh       1.00      1.00      1.00        36
        Luân       1.00      1.00      1.00         3
       Nghĩa       1.00      1.00      1.00         3
       Nguyên       1.00      1.00      1.00         9
Nhật Tiến       1.00      1.00      1.00         2
         Phú       1.00      1.00      1.00         5
     Phương       1.00      1.00      1.00        18
        Quang       1.00      1.00      1.00        21
        Quân       1.00    

In [11]:
#  Khời tạo mô hình RF

rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1, class_weight='balanced')
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))
joblib.dump(rf, "model_randomforest.joblib")
y_pred = rf.predict(X_test)

print("\n Kết quả mô hình Random Forest:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, zero_division=0))

wrong = [(true, pred) for true, pred in zip(y_test, y_pred) if true != pred]
print(f"[!] Số mẫu sai: {len(wrong)}")
print("Một số lỗi phổ biến:", Counter(wrong).most_common(5))


 Kết quả mô hình Random Forest:
Accuracy: 0.9951690821256038
               precision    recall  f1-score   support

     Duy Khôi       1.00      1.00      1.00         2
        Dũng       1.00      1.00      1.00         2
      Dương       1.00      1.00      1.00        16
          Hà       1.00      1.00      1.00         2
       Hiếu       0.96      1.00      0.98        24
        Hưng       1.00      1.00      1.00         2
         Khôi       1.00      0.50      0.67         2
        Lành       1.00      1.00      1.00         6
         Linh       1.00      1.00      1.00        36
        Luân       1.00      1.00      1.00         3
       Nghĩa       1.00      1.00      1.00         3
       Nguyên       1.00      1.00      1.00         9
Nhật Tiến       1.00      1.00      1.00         2
         Phú       1.00      1.00      1.00         5
     Phương       1.00      1.00      1.00        18
        Quang       1.00      1.00      1.00        21
  

In [6]:
import tensorflow as tf
import os
train_dir = "data_split/train"
test_dir = "data_split/test"
img_size = (160, 160)
batch_size = 32

In [7]:
# Load ảnh thành dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'  # vì softmax
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'
)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf4 in position 85: invalid continuation byte

In [8]:
# Tối ưu hiệu suất
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(100).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Mô hình MobileNetV2 + phân lớp
base_model = tf.keras.applications.MobileNetV2(
    input_shape=img_size + (3,),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False

x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)
output = tf.keras.layers.Dense(train_ds.element_spec[1].shape[-1], activation='softmax')(x)

model = tf.keras.Model(inputs=base_model.input, outputs=output)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Huấn luyện
history = model.fit(train_ds, validation_data=val_ds, epochs=10)
mobilenet_acc = history.history["val_accuracy"][-1]  

print(f"Accuracy MobileNetV2: {mobilenet_acc:.4f}")
model.save("mobilenetv2_face_recognition.h5")


NameError: name 'train_ds' is not defined

In [None]:
# Vẽ biểu đồ so sánh 
models = ['SVM', 'Random Forest', 'MobileNetV2']
accuracies = [svm_acc, rf_acc, mobilenet_acc]

plt.figure(figsize=(8, 5))
bars = plt.bar(models, accuracies, color=["skyblue", "lightgreen", "salmon"])
plt.ylim(0, 1)
plt.title("So sánh độ chính xác giữa các mô hình")
plt.ylabel("Accuracy")

for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f"{acc:.2%}", ha='center')

plt.tight_layout()
plt.show()