In [1]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
import tensorflow as tf

In [2]:


# Cấu hình thiết bị
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Biến đổi ảnh cho MobileNetV2 input
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

data_dir = "C:/Users/Loc/Desktop/Do_An_Co_So/dataset"
save_dir = "C:/Users/Loc/Desktop/Do_An_Co_So/code/saved_mobilenetv2_embeddings"
os.makedirs(save_dir, exist_ok=True)

# Tạo model MobileNetV2 feature extractor (tensorflow)
base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))

for person in os.listdir(data_dir):
    person_path = os.path.join(data_dir, person)
    if not os.path.isdir(person_path):
        continue

    embeddings, labels = [], []
    for img_name in os.listdir(person_path):
        if img_name.lower().endswith(('.jpg', '.png')):
            try:
                img_path = os.path.join(person_path, img_name)
                img = Image.open(img_path).convert("RGB")
                img = img.resize((224, 224))
                img_array = np.array(img).astype(np.float32)

                # Chuẩn hóa theo MobileNetV2
                img_preprocessed = tf.keras.applications.mobilenet_v2.preprocess_input(img_array)
                img_input = np.expand_dims(img_preprocessed, axis=0)  # shape (1,224,224,3)

                # Trích đặc trưng
                features = base_model.predict(img_input)  # (1,1280)
                embeddings.append(features[0])
                labels.append(person)
            except Exception as e:
                print(f"Lỗi {img_name}: {e}")

    if embeddings:
        np.savez(os.path.join(save_dir, f"{person}.npz"),
                 features=np.array(embeddings),
                 labels=np.array(labels))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [6]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import numpy as np
import os
# Mô hình


# Load dữ liệu feature đã lưu
X, y = [], []
save_dir = "C:/Users/Loc/Desktop/Do_An_Co_So/code/saved_mobilenetv2_embeddings"
for file in os.listdir(save_dir):
    if file.endswith(".npz"):
        data = np.load(os.path.join(save_dir, file))
        X.extend(data['features'])
        y.extend(data['labels'])

X = np.array(X)
y = np.array(y)

# Encode nhãn
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

# Chia train/test
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.3, random_state=42)

# MLP phân loại
model = Sequential([
    Dense(256, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(y_cat.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32)

# Lưu model và encoder
model.save("C:/Users/Loc/Desktop/Do_An_Co_So/code/code_final/mobilenet/mlp_mobilenetv2.h5")
import pickle
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)


Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - accuracy: 0.2732 - loss: 2.8143 - val_accuracy: 0.9549 - val_loss: 0.5909
Epoch 2/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8979 - loss: 0.5145 - val_accuracy: 1.0000 - val_loss: 0.0471
Epoch 3/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9697 - loss: 0.1322 - val_accuracy: 1.0000 - val_loss: 0.0135
Epoch 4/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9887 - loss: 0.0543 - val_accuracy: 1.0000 - val_loss: 0.0036
Epoch 5/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9980 - loss: 0.0298 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 6/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 1.0000 - loss: 0.0178 - val_accuracy: 1.0000 - val_loss: 8.6278e-04
Epoch 7/30
[1m21/21[0m [32m━━━━━━━━━━━



In [7]:
# Nhận diện 

import cv2
import numpy as np
import tensorflow as tf
import time
import pickle
from collections import Counter
from PIL import Image
from facenet_pytorch import MTCNN

# Thiết bị và model trích đặc trưng
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(image_size=224, margin=20, device=device)

# Load feature extractor (MobileNetV2)
base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))

# Load model phân loại MLP
mlp_model = tf.keras.models.load_model("C:/Users/Loc/Desktop/Do_An_Co_So/code/code_final/mobilenet/mobilenetv2.h5")

# Load encoder nhãn
with open("C:/Users/Loc/Desktop/Do_An_Co_So/code/code_final/mobilenet/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)
class_names = le.classes_

confidence_threshold = 0.6
prediction_results = []

cap = cv2.VideoCapture(0)
print("[INFO] Webcam đang mở. Nhận diện trong 20 giây...")
start_time = time.time()
duration = 20

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.GaussianBlur(frame, (5, 5), 0)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(rgb_frame)

    boxes, _ = mtcnn.detect(img_pil)

    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = [int(max(0, b)) for b in box]
            x2 = min(x2, frame.shape[1])
            y2 = min(y2, frame.shape[0])
            face = frame[y1:y2, x1:x2]
            if face.size == 0:
                continue

            try:
                face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face_resized = cv2.resize(face_rgb, (224, 224))
                face_array = face_resized.astype(np.float32)

                # Chuẩn hóa input giống lúc train
                face_preprocessed = tf.keras.applications.mobilenet_v2.preprocess_input(face_array)
                face_input = np.expand_dims(face_preprocessed, axis=0)

                # Trích đặc trưng
                feature_vector = base_model.predict(face_input)

                # Dự đoán
                pred = mlp_model.predict(feature_vector)
                pred_index = np.argmax(pred)
                confidence = pred[0][pred_index]

                if confidence >= confidence_threshold:
                    label = class_names[pred_index]
                else:
                    label = "Unknown"

                prediction_results.append(label)

                color = (0, 255, 0) if label != "Unknown" else (0, 0, 255)
                text = f"{label} ({confidence*100:.2f}%)" if label != "Unknown" else "Unknown"
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

            except Exception as e:
                print("Lỗi xử lý khuôn mặt:", e)
                continue

    cv2.imshow("Face Recognition (MobileNetV2 + MLP)", frame)

    if time.time() - start_time > duration:
        break
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# In kết quả
if prediction_results:
    most_common_label, count = Counter(prediction_results).most_common(1)[0]
    print(f"\n--- Kết quả nhận diện ---\n{most_common_label} xuất hiện {count} lần trong {len(prediction_results)} lần nhận dạng")
else:
    print("\nKhông nhận dạng được khuôn mặt nào.")




[INFO] Webcam đang mở. Nhận diện trong 20 giây...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [3