In [1]:
import cv2
import torch
import numpy as np
import os
from facenet_pytorch import InceptionResnetV1, MTCNN
from PIL import Image
import tensorflow as tf
import time
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# NHẬN DIỆN MOBILENETV2

# Thiết bị
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model mobilenetv2
model = tf.keras.models.load_model("mobilenetv2.h5")

# Load 
mtcnn = MTCNN(image_size=160, margin=20, device=device)
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Load danh sách nhãn
embedding_data = np.load("data_split/train_embeddings.npz")
class_names = sorted(np.unique(embedding_data["labels"]))

# Biến đếm kết quả nhận diện
prediction_results = []

# Mở webcam
cap = cv2.VideoCapture(0)
print("[INFO] Webcam đang mở. Nhận diện trong 10 giây...")

start_time = time.time()
duration = 10  # giây

while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(rgb_frame)

    # Dò khuôn mặt
    boxes, _ = mtcnn.detect(img_pil)

    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = [int(b) for b in box]
            face = frame[y1:y2, x1:x2]
            if face.size == 0:
                continue
            try:
                face_pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                face_tensor = mtcnn(face_pil)
                if face_tensor is None:
                    continue
                face_tensor = face_tensor.unsqueeze(0).to(device)

                with torch.no_grad():
                    embedding = facenet(face_tensor).cpu().numpy()

                pred = model.predict(embedding)
                pred_index = np.argmax(pred)
                confidence = pred[0][pred_index]

                label = class_names[pred_index]
                prediction_results.append(label)

                # Hiển thị (tuỳ chọn, có thể bỏ nếu không cần xem)
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{label} ({confidence*100:.2f}%)", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)
            except Exception as e:
                print("Lỗi xử lý khuôn mặt:", e)
                continue

    cv2.imshow("Face Recognition (MobileNetV2)", frame)

    # Thoát sau 10 giây
    if time.time() - start_time > duration:
        break

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# kết quả nhận diện
if prediction_results:
    most_common_label, count = Counter(prediction_results).most_common(1)[0]
    print(f"\n Kết quả nhận diện: {most_common_label} (xuất hiện {count} lần trong {len(prediction_results)} lần nhận dạng)")
else:
    print("\n Không nhận diện được khuôn mặt nào.")




[INFO] Webcam đang mở. Nhận diện trong 10 giây...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m 