In [2]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from PIL import Image
import cv2
import cvzone
import torchvision
import os
import numpy as np
#from mtcnn.mtcnn import MTCNN

In [None]:
from torch.utils.data import Dataset, DataLoader
class FaceRecognitionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))

        
        self.file_paths = []
        self.labels = []
        for i, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for file_name in os.listdir(class_dir):
                self.file_paths.append(os.path.join(class_dir, file_name))
                self.labels.append(i)

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        label = self.labels[idx]

        image    = cv2.imread(img_path)
        image    = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image    = Image.fromarray(image)

        
        if self.transform:
            img = self.transform(image)
        plt.imshow(np.transpose(np.array(img), (1, 2, 0)))
        return img, label

In [None]:

transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((160, 160)),
    torchvision.transforms.ToTensor(),
    #torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = FaceRecognitionDataset('./FACES', transform = transform)
dataset.__len__()

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
# If required, create a face detection pipeline using MTCNN:
detector = MTCNN(keep_all = True, device = device, margin = 10)

In [None]:
# Detect Faces
img = cv2.imread(img_p)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(img)

boxes, _ = detector.detect(pil_img)
# Calculate embedding (unsqueeze to add batch dimension)
faces = []
for i in boxes:
    x, y, x1, y1 = i
    faces.append(img[int(y):int(y1), int(x):int(x1)])

ctt = len(faces)
ct = 0
# SHOW DETECTED FACES
for i in faces:
    pil = Image.fromarray(i)
    plt.figure(ct)
    ct+=1
    plt.imshow(pil)


In [None]:
plt.imshow(img)

In [None]:
fig, ax = plt.subplots()
ax.imshow(img)
if( boxes is not None ):
    for i, box in enumerate(boxes):
        x, y, w, h = box
        ax.text(x, y, str(i+1), fontsize = 12, color='cyan')
        rect = plt.Rectangle((x, y), w-x, h-y, fill= False, color='magenta', linewidth=2)
        ax.add_patch(rect)
    print(f"Number of faces: {len(boxes)}")
else:
    print("No faces")

In [None]:
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print( "Error: Could not open camera" )

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame")
        break
    img_t = torchvision.transforms.ToTensor()
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(img)

    boxes, _ = detector.detect(pil_img)

    if( boxes is not None ):
        for i, box in enumerate(boxes):
            x, y, w, h = box
            #ax.text(x, y, str(i+1), fontsize = 12, color='cyan')
            #rect = plt.Rectangle((x, y), w-x, h-y, fill= False, color='magenta', linewidth=2)
            #ax.add_patch(rect)
            x, y, w, h = int(x+1), int(y+1), int(w-1), int(h-1)
            img_face = img[y: h, x: w]
            if len(img_face) > 2:
                cv2.imshow('face', img_face)
            #break
            print(x, y, w, h)
            cv2.rectangle(frame, (int(x), int(y)), (int(w), int(h)), (0,0,255), 2)
          
            cvzone.putTextRect(frame, str(i+1), [int(x), int(y+10)], scale=1, thickness =1)
            print(f"Number of faces: {len(boxes)}")
    else:
        print("No faces")
    
    
    cv2.imshow('Webcam', frame)

    if( cv2.waitKey(1) & 0xFF == ord('q')):
        break
cap.release()
cv2.destroyAllWindows()

In [13]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from PIL import Image
import cv2
import cvzone
import torchvision
import os
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# ----------- MODELS ----------- #
resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
detector = MTCNN(keep_all = True, device = device, margin = 10)
#---------------------------------#

def get_face_embedding(image):
    # Convert image from BGR to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(image_rgb)

    # Detect face and get bounding box
    boxes, _ = detector.detect(pil_image)

    if boxes is None or len(boxes) == 0:
        return None

    # Extract face from image
    x, y, w, h = boxes[0].astype(int)
    face_image = pil_image.crop((x, y, w, h))

    # Resize face image to match model input size
    face_tensor = transform(face_image).unsqueeze(0).to(device)
    resnet.to(device)
    # Get face embedding using InceptionResnetV1 model
    with torch.no_grad():  # Disable gradient computation
        embedding = resnet(face_tensor)

    return embedding


# Define known faces and their embeddings (you need to populate this with your own known faces)
known_faces = {
    "felps": get_face_embedding((cv2.imread("DATA_TRAIN/Felipe/img_10.jpg"))),
    "INe": get_face_embedding(cv2.imread("DATA_TRAIN/Jaca/img_5.jpg")),
#    "mateus": get_face_embedding(cv2.imread("FACES/Mateus/img_3.jpg")),
    # Add more known faces as needed
}

In [7]:
def save_checkpoint(state, filename = "my_checkpoint_xx.pth.tar"):
    print("=> Saving Checkpoint")
    torch.save(state, filename)

def load_checkpoint(checkpoint_path, model):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])

# One code

In [16]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from PIL import Image
import cv2
import cvzone
import torchvision
import os
import numpy as np


transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor(),
])

# ------ COLECT DATA FROM CAMERA ------ #
count = 0                         # counter for filename
save = 1                          # save flag
outFolder = './DATA_TRAIN/Jaca'       # output Foler
blurThreshold = 200               # Blur for Capture of Photos (300, 400)
#---------------------------------------#


# ----------- MODELS ----------- #
resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
detector = MTCNN(keep_all = True, device = device, margin = 10)
detector.to(device)
live = torchvision.models.mobilenet_v2()
num_classes = 2
live.classifier[1] = torch.nn.Linear(live.last_channel, num_classes)
criterion = torch.nn.CrossEntropyLoss()
# 4. Optimize your model
optimizer = torch.optim.SGD(live.parameters(), lr=0.001, momentum=0.9)
#load_checkpoint('./liveliness_UP.pth.tar', live)
live.eval()
#---------------------------------#

resnet.to(device)
detector.to(device)
live.to(device)


# ------ Capture From Camera ----- #
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture('rtsp://192.168.3.22/Streaming/Channels/101')

if not cap.isOpened():
    print( "Error: Could not open camera" )
#----------------------------------#

# ------- Parameters for face Recon ------- #
threshold = 190
minDist = 99999
#-------------------------------------------#

# ---------- VARIABLES OF FRAME ----------#
who = 'unknown'
text = 'unknown'
person = 'unknown'
list_person = ['unknown' for _ in range(10)]
index=0
identified = []
update_flag = np.zeros(10)
size =0
last_size =0
alive = ['fake' for _ in range(10)]
#-----------------------------------------#

size = 0
last_size = 0
while True:
    # GET FRAMES
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame")
        break

    # IMAGE CONVERSION
    img_t = torchvision.transforms.ToTensor()
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(img)

    # DETECT FACES IN FRAME
    boxes, probs = detector.detect(pil_img)
    who = 'unknown'    # Set variable incertanty
    # ITERATE OVER THE DETECTED FACES
    if( boxes is not None ):
        #size = len(boxes)
        #if(size < last_size):
        #    list_person = ['unknown' for _ in range(10)]
        #    identified = []
        #    text = 'unknown'
        #last_size = size
        if (len(boxes) == 1):
            list_person[1:] = ['unknown' for _ in range(9)]
        for i, box in enumerate(boxes):
            if probs[i] > 0.94:                                     # ACCEPTANCE LEVEL
                x, y, w, h = box                                    # BOUNDING BOXES COORDINATES
                x, y, w, h = int(x+1), int(y+1), int(w-1), int(h-1) # CONVERT TO INT
                img_face = img[y: h, x: w]                          # CROP IMAGE
                
                # ACCEPT ONLY FACES WITH A CONSIDERABLE SIZE
                if (img_face.size) > 50000 and len(img_face[0]) > 2:
                    img_face = cv2.cvtColor(img_face, cv2.COLOR_BGR2RGB)
                    pil_crop = Image.fromarray(img_face)
                    # Ensure the input tensor has the correct shape
                    img_tensor = transform(pil_crop)
                    
                    # Convert the tensor to the expected data type (torch.ByteTensor)
                    img_tensor = img_tensor.unsqueeze(0)
                    
                    # Pass the tensor to the model
                    liveliness = live(img_tensor.to(device))
                    _, predicted = torch.max(liveliness, 1)  # Get the index of the class with the highest probability
                    # Print the output
                    #print(predicted)
                    
                    cv2.imshow('face', img_face)
                    
                    # Blur calc For saving and recon
                    blurValue = cv2.Laplacian(img_face, cv2.CV_64F).var()
                    # Embeddings (face-info)
                    detected_embedding = get_face_embedding(img_face)
                    
                    if (detected_embedding is not None):
                        ct =0
                        minDist = 99999
                        # ITERATE OVER THE KNOWN FACES
                        for name, known_emb in known_faces.items():
                            if known_emb is not None:
                                distance = np.linalg.norm(detected_embedding.cpu() - known_emb.cpu())
                                # print(distance, ' [', ct, ']')
                                # Determine the face that te sum of all distances is most similar (accept a level of Blur)
                                if (blurValue > blurThreshold) and (distance < threshold) and (distance < minDist):
                                    minDist = distance
                                    who = name
                                    update_flag[i] = 1
                                    print(distance)
                            ct+=1
                    # SAVE FLAG FOR COLECTING TRAINIG DATA
                    if save:
                        if blurValue > blurThreshold:
                            cv2.imwrite(f"{outFolder}/img_{count}.jpg" , img_face)
                            count+=1

                # Update the vector of names of detected person
                if update_flag[i]:
                    person = who
                    update_flag[i] = 0
                    print(person, " Detected!")
                    #ss = person + f" ID: {i}"
                    list_person[i] = person          # Update on the index of the face 
                    
                    
                  
                # BBOX face
                cv2.rectangle(frame, (int(x), int(y)), (int(w), int(h)), (0,0,255), 2)
                
                # Show if face is alive or NOT
                for j in range(len(alive)):
                    if j == i:
                        if predicted == 1:
                            alive[i] = ' REAL'
                        else:
                            alive[i] = ' fake'
                        
                #text += f" ID: {i}" #+ f" Blur: {blurValue}"
                text = str(list_person[i]) + alive[i] + ' ' + str(i)
                cvzone.putTextRect(frame, text, [int(x), int(y+10)], scale=1, thickness =1)

                #print(f"Number of faces: {len(boxes)}")
    else:
        #print("No faces")
        list_person = ['unknown' for _ in range(10)]
        identified = []
        text = 'unknown'
        pass
    
    
    cv2.imshow('Webcam', frame)

    if( cv2.waitKey(1) & 0xFF == ord('q')):
        break
cap.release()
cv2.destroyAllWindows()


149.51619
felps  Detected!
152.98122
felps  Detected!
180.8203
170.30765
INe  Detected!
180.8203
170.30765
INe  Detected!
175.68024
159.12088
INe  Detected!
109.06496
felps  Detected!
101.07203
felps  Detected!
106.03563
felps  Detected!
90.15307
felps  Detected!
93.797844
felps  Detected!
92.842354
felps  Detected!
97.17439
felps  Detected!
79.11109
felps  Detected!
Error: Failed to capture frame


In [105]:
update_flag = np.zeros(10)
update_flag

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])