In [None]:
#this for croppped out image that i did in labelimg
import os
import cv2
import xml.etree.ElementTree as ET

def crop_faces(images_dir, labels_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for split in ['train', 'val']:
        img_path = os.path.join(images_dir, split)
        lbl_path = os.path.join(labels_dir, split)
        out_path = os.path.join(output_dir, split)
        os.makedirs(out_path, exist_ok=True)
        
        for label_file in os.listdir(lbl_path):
            if not label_file.endswith('.xml'):
                continue
            tree = ET.parse(os.path.join(lbl_path, label_file))
            root = tree.getroot()
            filename = root.find('filename').text
            label_img_path = os.path.join(img_path, filename)
            img = cv2.imread(label_img_path)
            for obj in root.iter('object'):
                name = obj.find('name').text
                bbox = obj.find('bndbox')
                xmin = int(bbox.find('xmin').text)
                ymin = int(bbox.find('ymin').text)
                xmax = int(bbox.find('xmax').text)
                ymax = int(bbox.find('ymax').text)
                face_crop = img[ymin:ymax, xmin:xmax]
                
                person_dir = os.path.join(out_path, name)
                os.makedirs(person_dir, exist_ok=True)
                output_file = os.path.join(person_dir, f"{filename}{xmin}{ymin}.jpg")
                cv2.imwrite(output_file, face_crop)
    print(f"Face crops saved to {output_dir}")

images_dir = r'C:\object_detection\py_T0R\datasets\face_worker\images'
labels_dir = r'C:\object_detection\py_T0R\datasets\face_worker\labels'
output_dir = r'C:\object_detection\py_T0R\datasets\face_worker\cropped_faces'
crop_faces(images_dir, labels_dir, output_dir)

In [None]:
#diff approach using MTCNN
from facenet_pytorch import InceptionResnetV1
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import numpy as np
import os
import pickle

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

dataset_path = r'C:\object_detection\py_T0R\datasets\face_worker\cropped_faces\train'
embedding_file = r'C:\object_detection\py_T0R\datasets\face_worker\face_embeddings.pkl'

transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = datasets.ImageFolder(dataset_path, transform=transform)
loader = DataLoader(dataset, batch_size=1, shuffle=False)

embeddings = []
labels = []

for img_tensor, label in loader:
    img_tensor = img_tensor.to(device)


    
    with torch.no_grad():
        embedding = model(img_tensor).cpu().numpy()[0]
        embeddings.append(embedding)
        labels.append(dataset.classes[label[0]])

with open(embedding_file, 'wb') as f:
    pickle.dump({'embeddings': np.array(embeddings), 'labels': np.array(labels)}, f)

print(f"âœ… Saved {len(embeddings)} face embeddings to:\n{embedding_file}")

In [None]:
#realtime detection main code
import os
import cv2
import torch
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from facenet_pytorch import MTCNN, InceptionResnetV1
import warnings

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Running on:", device)

warnings.filterwarnings("ignore")

# Load known embeddings
with open(r'C:\object_detection\py_T0R\datasets\face_worker\face_embeddings.pkl', 'rb') as f:
    data = pickle.load(f)
known_embeddings = data['embeddings']
known_labels = data['labels']

# Initialize face detector and recognizer
mtcnn = MTCNN(keep_all=True, device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes, _ = mtcnn.detect(img_rgb)

    if boxes is not None:
        faces = mtcnn(img_rgb)  # returns a list of face tensors
        for i, face in enumerate(faces):
            if face is None:
                continue
            face_input = face.unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = resnet(face_input).cpu().numpy()

            similarities = cosine_similarity(embedding, known_embeddings)[0]
            best_match_idx = np.argmax(similarities)
            best_similarity = similarities[best_match_idx]
            name = known_labels[best_match_idx] if best_similarity > 0.75 else "Unknown"

            # Draw bounding box and label
            box = boxes[i].astype(int)
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
            cv2.putText(frame, f"{name} ({best_similarity:.2f})", (box[0], box[1]-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    cv2.imshow("Face Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()