In [1]:
import cv2
import time
import os
import os.path
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import pygame
from tqdm import tqdm

pygame 2.5.2 (SDL 2.28.3, Python 3.9.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# initialize MTCNN and InceptionResnetV1 

mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval() 

In [3]:
# Read data from folder
currentDir = os.getcwd()
trainPath = os.path.join(currentDir, 'photos')

dataset = datasets.ImageFolder(trainPath) # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in tqdm(loader, desc='Processing images'):
    face, prob = mtcnn0(img, return_prob=True)
    if face is not None and prob > 0.92:
        emb = resnet(face.unsqueeze(0))
        embedding_list.append(emb.detach())
        name_list.append(idx_to_class[idx])       

# save data
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file

print("Model built! Saving to file.")

Processing images: 100%|██████████| 103/103 [00:56<00:00,  1.83it/s]

Model built! Saving to file.





In [4]:
# Initialize pygame mixer for playing audio
pygame.mixer.init()

# loading data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 

cam = cv2.VideoCapture(0) 

# Load the audio file
rizz_song_path = "rizz_song.mp3"
pygame.mixer.music.load(rizz_song_path)

# Variable to track whether the song is playing
song_playing = False

while True:
    ret, frame = cam.read()
    if not ret:
        print("fail to grab frame, try again")
        break
        
    img = Image.fromarray(frame)
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    
    is_coleRizz_detected = False  # Set this flag to True if coleRizz is detected
    
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
                
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it
                
                if min_dist<0.90:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255),1, cv2.LINE_AA)
                    
                    # Check if detected face belongs to coleRizz
                    if name == 'coleRizz':
                        is_coleRizz_detected = True
                    
                frame = cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)

    # Play or stop audio if rizz is detected
    if is_coleRizz_detected:
        if not song_playing:
            pygame.mixer.music.play()
            song_playing = True
    else:
        if song_playing:
            pygame.mixer.music.stop()
            song_playing = False

    cv2.imshow("IMG", frame)
        
    
    k = cv2.waitKey(1)
    if k%256==27: # ESC
        print('Esc pressed, closing...')
        break
    
    # add a case to add images to a new class if you want more people to be added   
    elif k%256==32: # space to save image
        print('Enter your name :')
        name = input()
        
        # create directory if not exists
        if not os.path.exists('photos/'+name):
            os.mkdir('photos/'+name)
            
        img_name = "photos/{}/{}.jpg".format(name, int(time.time()))
        cv2.imwrite(img_name, original_frame)
        print(" saved: {}".format(img_name))

# Stop the music and release resources
pygame.mixer.music.stop()
cam.release()
cv2.destroyAllWindows()



Esc pressed, closing...
