In [105]:
import face_recognition
import cv2
from uuid import  uuid4
import re
import numpy as np
from tqdm import  tqdm
import os

In [106]:
def faces(path):
    # load the file 
    image = face_recognition.load_image_file(path)
    #resize 
    # image = cv2.resize(image)
    # recognize the face locations 
    face_locations = face_recognition.face_locations(image)
    if len(face_locations)==0:
        return None,None
    # extract the faces 
    faces = []
    for location in face_locations:
        top, right, bottom, left = location
        face = image[top:bottom, left:right]
        faces.append(face)

    return faces,face_locations


In [107]:
def get_counter(dir="output"):
    createDir_if_not_exists(dir)
    files = os.listdir(dir)
    
    if len(files) == 0:
        return 1  # If no files, start with person1
    
    # Regular expression to match 'person' followed by an integer (e.g., person1, person12, etc.)
    pattern = re.compile(r"person(\d+)")
    
    max_number = 0
    
    for file in files:
        match = pattern.match(file)
        if match:
            # Extract the number from the file name
            number = int(match.group(1))
            max_number = max(max_number, number)
    
    # Return the next counter
    return max_number + 1
person_count = get_counter()

In [108]:
def save(face,filepath,resize=None):
    # resize the image 
    if not resize is None:
        if isinstance(resize,tuple):
            face=cv2.resize(face, dsize=resize,)
        if isinstance(resize,int):
            face=cv2.resize(face,dsize=(resize,resize))
    cv2.imwrite(filepath,face)
    return

In [109]:
def generate_filepath(extension='jpg'):
    file_name = str(uuid4())
    if re.findall(r'\.',extension):
        file_name = file_name+extension
    else:
        file_name = f"{file_name}.{extension}"
    return file_name


In [110]:
def createDir_if_not_exists(dir):
    import os 
    # check the directory is exists or not
    if not os.path.exists(dir):
        os.mkdir(path=dir)
    return 

In [111]:
import numpy as np

def getEmbedding(face):
    # Convert to RGB as face_recognition expects the image in RGB
    convert_color = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    
    # Get the face encoding (embedding)
    embedding = face_recognition.face_encodings(convert_color)
    
    if len(embedding) == 0:
        return None
    
    return embedding[0]

In [112]:
def save_face(face=None,dir='default',base='output'):
    base = 'output'
    createDir_if_not_exists(base)
    createDir_if_not_exists(f"{base}/{dir}")
    # check the faces is avalilable or not 
    if faces is None:
        return
    filename = f"output/{dir}/{generate_filepath()}"
    # convert the image into RGB to BGR 
    face = cv2.cvtColor(face,cv2.COLOR_RGB2BGR)
    # save the file
    save(face,filename,resize=(64,64))
    print("Face saved")
    return

In [113]:
from qdrant_client import QdrantClient,models
from qdrant_client.models import NamedVector,VectorParams,Distance,PointStruct
threshold = 0.93
def connect_qdrant():
    # Create a QdrantClient instance, specifying the host and port
    client = QdrantClient(host='localhost', port=6333)  # Adjust host and port as needed
    return client
client = connect_qdrant()


In [114]:
def create_collection_if_not_exists(collection_name="faces", vector_size=128,):
    # Check if the collection already exists
    collections = client.get_collections()

    # If the collection does not exist, create it
    if collection_name not in [col.name for col in collections.collections]:
        print(f"Collection '{collection_name}' does not exist. Creating a new one.")
        
        # Create the collection
        client.create_collection(
            collection_name=collection_name,
            vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE),
        )

        print(f"Collection '{collection_name}' created successfully.")
    else:
        print(f"Collection '{collection_name}' already exists.")

In [115]:
create_collection_if_not_exists()

Collection 'faces' already exists.


In [116]:
def compare_embedding(embedding,collection_name="faces",top_k=1):
    results = client.search(
        collection_name=collection_name,      # Collection to search in
        query_vector=embedding,                # The embedding you want to compare
        limit=top_k,                           # Number of nearest neighbors to return
    )
    # Return the search results
    return results

# add the new embedding 
def add_data(embedding,collection_name="faces",name="default"):
    point_id = str(uuid4())  # Replace with actual logic to generate a unique ID, like using a counter or UUID.
    # Create a PointStruct with embedding and metadata
    point = PointStruct(id=point_id, vector=embedding, payload={"name": name})
    # Add the new point to the collection
    client.upsert(collection_name=collection_name, points=[point])
    print("Added")    

In [117]:
def handle_new_face(face,dir_name,embedding):
    global person_count
    dir = f"{dir_name}{person_count}"
    save_face(face=face,dir=dir)
    add_data(embedding=embedding,name=dir)
    person_count+=1
    return person_count


In [118]:
dir_name = "person"

def process_faces(face_locations,faces=None,):
    if faces is None:
        print("No faces")
        return
    
    for face in tqdm(faces,desc="Processing face"):
        embedding = getEmbedding(face)
        if embedding is None:
            continue
        compare_result = compare_embedding(embedding)
        if len(compare_result)==0:
            handle_new_face(face,dir_name,embedding=embedding)
            # add new data 
            continue
        # get the name and score 
        score = compare_result[0].score  
        print(f"Score:{score}")
        name = compare_result[0].payload["name"] 
        if score>threshold:
            #add save the image file in the name directory
            save_face(face,dir=name)
        else:
            handle_new_face(face,dir_name,embedding=embedding)
            
        embedding=None


In [119]:
def read_all_files_in_directory(dir="input"):
    all_files = []
    
    # Walk through the directory and its subdirectories
    for root, dirs, files in os.walk(dir):
        for file in files:
            all_files.append(os.path.join(root, file))  # Add full file path
    
    return all_files

In [120]:
def process(paths):
    for path in tqdm(paths,"Processing file"):
        faces_roi,locations = faces(path)
        if faces_roi is None or  len(faces_roi)==0:
            continue
        process_faces(face_locations=locations,faces=faces_roi)
        

In [121]:
paths = read_all_files_in_directory()

In [None]:

process(paths)

Processing file:   0%|          | 0/1312 [00:00<?, ?it/s]

Face saved
Added




Score:0.83515614
Face saved
Added


Processing face: 100%|██████████| 4/4 [00:01<00:00,  3.83it/s]
Processing file:   0%|          | 1/1312 [00:14<5:23:00, 14.78s/it]

Score:0.8986392
Face saved
Added




Score:0.9165648
Face saved
Added


Processing face: 100%|██████████| 3/3 [00:00<00:00,  4.67it/s]
Processing file:   0%|          | 2/1312 [00:29<5:15:59, 14.47s/it]

Score:0.9731412
Face saved




Score:0.96456146
Face saved




Score:0.9802031
Face saved


Processing face: 100%|██████████| 3/3 [00:00<00:00,  3.30it/s]
Processing file:   0%|          | 3/1312 [00:43<5:15:02, 14.44s/it]

Score:0.9789382
Face saved


Processing face: 100%|██████████| 1/1 [00:00<00:00,  8.34it/s]
Processing face: 100%|██████████| 3/3 [00:00<00:00,  6.11it/s]s/it]
Processing file:   1%|          | 7/1312 [01:37<4:59:14, 13.76s/it]

Score:0.91123617
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00,  3.15it/s]
Processing file:   1%|          | 8/1312 [01:51<5:00:01, 13.80s/it]

Score:0.9658257
Face saved




Score:0.894279
Face saved
Added


Processing face: 100%|██████████| 4/4 [00:00<00:00,  4.39it/s]
Processing file:   1%|          | 9/1312 [02:05<4:59:49, 13.81s/it]

Score:0.94538856
Face saved


Processing face: 100%|██████████| 1/1 [00:00<00:00,  3.31it/s]
Processing file:   1%|          | 10/1312 [02:18<4:54:15, 13.56s/it]

Score:0.9164847
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00, 12.08it/s]
Processing face: 100%|██████████| 1/1 [00:00<00:00, 12.61it/s]7s/it]
Processing file:   1%|          | 13/1312 [02:56<4:41:50, 13.02s/it]

Score:0.88732004
Face saved
Added




Score:0.91986144
Face saved
Added




Score:0.90654695
Face saved
Added




Score:0.9320493
Face saved




Score:0.9224374
Face saved
Added




Score:0.9221407
Face saved
Added




Score:0.9131995
Face saved
Added




Score:0.92900676
Face saved
Added




Score:0.924914
Face saved
Added




Score:0.9263368
Face saved
Added




Score:0.9250974
Face saved
Added


Processing face: 100%|██████████| 19/19 [00:03<00:00,  5.56it/s]
Processing file:   1%|          | 14/1312 [03:12<5:00:24, 13.89s/it]

Score:0.9096268
Face saved
Added




Score:0.92341703
Face saved
Added


Processing face: 100%|██████████| 3/3 [00:00<00:00,  4.23it/s]
Processing face: 100%|██████████| 1/1 [00:00<00:00, 13.46it/s]4s/it]
Processing face: 100%|██████████| 1/1 [00:00<00:00, 18.57it/s]1s/it]
Processing file:   1%|▏         | 18/1312 [04:05<4:48:54, 13.40s/it]

Score:0.9016654
Face saved
Added




Score:0.93050015
Face saved




Score:0.91540444
Face saved
Added




Score:0.88939476
Face saved
Added




Score:0.93302596
Face saved




Score:0.9378251
Face saved




Score:0.94794494
Face saved




Score:0.9339007
Face saved




Score:0.92035973
Face saved
Added




Score:0.91769373
Face saved
Added


Processing face: 100%|██████████| 13/13 [00:03<00:00,  4.31it/s]
Processing file:   1%|▏         | 19/1312 [04:21<5:06:07, 14.21s/it]

Score:0.93607247
Face saved




Score:0.9558487
Face saved




Score:0.9253144
Face saved
Added




Score:0.9309719
Face saved




Score:0.922946
Face saved
Added




Score:0.9186634
Face saved
Added




Score:0.9538295
Face saved




Score:0.90956753
Face saved
Added




Score:0.9100017
Face saved
Added




Score:0.91196114
Face saved
Added


Processing face: 100%|██████████| 16/16 [00:02<00:00,  5.59it/s]
Processing file:   2%|▏         | 20/1312 [04:38<5:18:56, 14.81s/it]

Score:0.91170096
Face saved
Added




Score:0.91256696
Face saved
Added




Score:0.93489075
Face saved




Score:0.9101511
Face saved
Added




Score:0.92779243
Face saved
Added




Score:0.9222258
Face saved
Added


Processing face: 100%|██████████| 9/9 [00:02<00:00,  4.32it/s]
Processing file:   2%|▏         | 21/1312 [04:53<5:22:12, 14.98s/it]

Score:0.9138706
Face saved
Added




Score:0.94090617
Face saved




Score:0.9197992
Face saved
Added




Score:0.9451355
Face saved




Score:0.9395193
Face saved




Score:0.9335929
Face saved




Score:0.95129067
Face saved




Score:0.92062426
Face saved
Added


Processing face: 100%|██████████| 15/15 [00:02<00:00,  6.79it/s]
Processing file:   2%|▏         | 22/1312 [05:08<5:23:49, 15.06s/it]

Score:0.93701196
Face saved




Score:0.92230296
Face saved
Added




Score:0.9182162
Face saved
Added




Score:0.9346794
Face saved




Score:0.93377084
Face saved




Score:0.91331255
Face saved
Added




Score:0.92269266
Face saved
Added




Score:0.9197874
Face saved
Added




Score:0.9414265
Face saved




Score:0.95608044
Face saved




Score:0.9131234
Face saved
Added




Score:0.9146699
Face saved
Added


Processing face: 100%|██████████| 17/17 [00:03<00:00,  4.97it/s]
Processing file:   2%|▏         | 23/1312 [05:25<5:34:53, 15.59s/it]

Score:0.9032585
Face saved
Added


Processing face: 100%|██████████| 1/1 [00:00<00:00,  3.08it/s]
Processing file:   2%|▏         | 24/1312 [05:39<5:21:11, 14.96s/it]

Score:0.94307685
Face saved




Score:0.9130162
Face saved
Added




Score:0.9174044
Face saved
Added




Score:0.94227135
Face saved


Processing face: 100%|██████████| 5/5 [00:01<00:00,  3.76it/s]
Processing file:   2%|▏         | 25/1312 [05:54<5:21:30, 14.99s/it]

Score:0.92266685
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00,  4.17it/s]
Processing file:   2%|▏         | 26/1312 [06:08<5:16:55, 14.79s/it]

Score:0.94123316
Face saved





Score:0.9438491
Face saved


Processing face: 100%|██████████| 3/3 [00:00<00:00,  3.26it/s][A
Processing file:   2%|▏         | 27/1312 [06:22<5:13:14, 14.63s/it]

Score:0.923865
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00,  3.40it/s]
Processing file:   2%|▏         | 28/1312 [06:36<5:09:50, 14.48s/it]

Score:0.9334862
Face saved




Score:0.9363141
Face saved




Score:0.9295115
Face saved
Added




Score:0.9338888
Face saved




Score:0.9357229
Face saved




Score:0.91838527
Face saved
Added


Processing face: 100%|██████████| 7/7 [00:01<00:00,  4.07it/s]
Processing file:   2%|▏         | 29/1312 [06:52<5:15:25, 14.75s/it]

Score:0.9289815
Face saved
Added


Processing face: 100%|██████████| 3/3 [00:00<00:00, 22.44it/s]
Processing face: 100%|██████████| 3/3 [00:00<00:00,  6.90it/s]5s/it]
Processing file:   2%|▏         | 31/1312 [07:20<5:08:19, 14.44s/it]

Score:0.926659
Face saved
Added


Processing face: 100%|██████████| 1/1 [00:00<00:00,  2.24it/s]
Processing file:   2%|▏         | 32/1312 [07:34<5:03:36, 14.23s/it]

Score:0.9068241
Face saved
Added




Score:0.91189486
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00,  2.95it/s]
Processing file:   3%|▎         | 33/1312 [07:47<5:00:36, 14.10s/it]

Score:0.9812707
Face saved


Processing face: 100%|██████████| 2/2 [00:00<00:00,  2.20it/s]
Processing file:   3%|▎         | 34/1312 [08:02<5:01:20, 14.15s/it]

Score:0.9737136
Face saved




Score:0.9459672
Face saved




Score:0.9323122
Face saved




Score:0.94738215
Face saved




Score:0.91231596
Face saved
Added




Score:0.9199224
Face saved
Added




Score:0.9741631
Face saved




Score:0.91739595
Face saved
Added




Score:0.9049807
Face saved
Added




Score:0.89136845
Face saved
Added




Score:0.9454721
Face saved




Score:0.9221777
Face saved
Added


Processing face: 100%|██████████| 14/14 [00:03<00:00,  4.34it/s]
Processing file:   3%|▎         | 35/1312 [08:18<5:15:34, 14.83s/it]

Score:0.90255165
Face saved
Added





Score:0.9202908
Face saved
Added


Processing face: 100%|██████████| 3/3 [00:01<00:00,  2.90it/s][A
Processing file:   3%|▎         | 36/1312 [08:32<5:11:31, 14.65s/it]

Score:0.94541407
Face saved


Processing face: 100%|██████████| 3/3 [00:00<00:00,  6.15it/s]
Processing file:   3%|▎         | 37/1312 [08:46<5:05:34, 14.38s/it]

Score:0.93215233
Face saved


Processing face: 100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
Processing file:   3%|▎         | 38/1312 [09:00<5:00:31, 14.15s/it]

Score:0.92391145
Face saved
Added


Processing face: 100%|██████████| 1/1 [00:00<00:00, 24.98it/s]1s/it]
Processing file:   3%|▎         | 40/1312 [09:26<4:50:45, 13.71s/it]

Score:0.9093364
Face saved
Added




Score:0.9195914
Face saved
Added


Processing face: 100%|██████████| 4/4 [00:01<00:00,  2.87it/s]
Processing file:   3%|▎         | 41/1312 [09:41<4:58:09, 14.08s/it]

Score:0.9135891
Face saved
Added




Score:0.99428844
Face saved




Score:0.98786837
Face saved




Score:0.9749165
Face saved


Processing face: 100%|██████████| 5/5 [00:01<00:00,  3.05it/s]
Processing file:   3%|▎         | 42/1312 [09:56<5:03:40, 14.35s/it]

Score:0.91005504
Face saved
Added




Score:0.9689983
Face saved




Score:0.9281713
Face saved
Added




Score:0.9635101
Face saved




Score:0.9244468
Face saved
Added


Processing face: 100%|██████████| 6/6 [00:01<00:00,  3.14it/s]
Processing file:   3%|▎         | 43/1312 [10:11<5:08:32, 14.59s/it]

Score:0.92700785
Face saved
Added




Score:0.96763414
Face saved




Score:0.9990955
Face saved




Score:0.9713471
Face saved




Score:0.99440044
Face saved


Processing face: 100%|██████████| 6/6 [00:01<00:00,  3.26it/s]
Processing file:   3%|▎         | 44/1312 [10:27<5:13:14, 14.82s/it]

Score:0.9934
Face saved




Score:0.9262748
Face saved
Added




Score:0.9905312
Face saved




Score:0.95527005
Face saved




Score:0.9816799
Face saved




Score:0.9794691
Face saved




Score:0.93399346
Face saved




Score:0.95453435
Face saved




Score:0.9547584
Face saved




Score:0.97003317
Face saved


Processing face: 100%|██████████| 10/10 [00:02<00:00,  3.60it/s]
Processing file:   3%|▎         | 45/1312 [10:43<5:20:49, 15.19s/it]

Score:0.94916517
Face saved




Score:0.99335945
Face saved




Score:0.9877043
Face saved




Score:0.95693517
Face saved




Score:0.9344619
Face saved




Score:0.9707469
Face saved




Score:0.9718563
Face saved




Score:0.96384823
Face saved




Score:0.9740807
Face saved


Processing face: 100%|██████████| 11/11 [00:02<00:00,  4.24it/s]
Processing file:   4%|▎         | 46/1312 [10:59<5:24:38, 15.39s/it]

Score:0.9735122
Face saved




Score:0.989747
Face saved




Score:0.9695982
Face saved




Score:0.98811054
Face saved




Score:0.9352889
Face saved




Score:0.98362947
Face saved




Score:0.9663968
Face saved




Score:0.9643512
Face saved




Score:0.96197414
Face saved


Processing face: 100%|██████████| 12/12 [00:02<00:00,  4.72it/s]
Processing file:   4%|▎         | 47/1312 [11:14<5:27:41, 15.54s/it]

Score:0.96932924
Face saved




Score:0.91918063
Face saved
Added


Processing face: 100%|██████████| 2/2 [00:00<00:00,  3.11it/s]
Processing file:   4%|▎         | 48/1312 [11:28<5:16:37, 15.03s/it]

Score:0.92118746
Face saved
Added


Processing face: 100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
Processing file:   4%|▎         | 49/1312 [11:42<5:07:24, 14.60s/it]

Score:0.9289272
Face saved
Added


Processing face: 100%|██████████| 1/1 [00:00<00:00,  1.70it/s]
Processing file:   4%|▍         | 50/1312 [11:56<5:01:36, 14.34s/it]

Score:0.945534
Face saved


Processing face: 100%|██████████| 2/2 [00:00<00:00,  3.48it/s]
Processing file:   4%|▍         | 51/1312 [12:10<5:00:37, 14.30s/it]

Score:0.91315484
Face saved
Added




Score:0.90727365
Face saved
Added




Score:0.9084298
Face saved
Added




Score:0.9234067
Face saved
Added




Score:0.91478384
Face saved
Added


Processing face: 100%|██████████| 6/6 [00:01<00:00,  4.11it/s]
Processing file:   4%|▍         | 52/1312 [12:26<5:13:35, 14.93s/it]

Score:0.92329925
Face saved
Added




Score:0.9687071
Face saved




Score:0.946138
Face saved


Processing face: 100%|██████████| 6/6 [00:01<00:00,  5.72it/s]
Processing file:   4%|▍         | 53/1312 [12:41<5:09:42, 14.76s/it]

Score:0.92237234
Face saved
Added




Score:0.9132881
Face saved
Added




Score:0.9751638
Face saved


Processing face: 100%|██████████| 5/5 [00:01<00:00,  4.82it/s]
Processing file:   4%|▍         | 54/1312 [12:55<5:07:46, 14.68s/it]

Score:0.971226
Face saved




Score:0.94651747
Face saved




Score:0.9262195
Face saved
Added




Score:0.96867377
Face saved


Processing face: 100%|██████████| 6/6 [00:01<00:00,  5.03it/s]
Processing file:   4%|▍         | 55/1312 [13:10<5:07:50, 14.69s/it]

Score:0.9624469
Face saved


Processing face: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]
Processing file:   4%|▍         | 56/1312 [13:24<5:03:15, 14.49s/it]

Score:0.92577887
Face saved
Added


Processing file:   4%|▍         | 57/1312 [13:38<5:00:12, 14.35s/it]

Score:0.9261168
Face saved
Added




Score:0.97882897
Face saved




Score:0.9200292
Face saved
Added


Processing face: 100%|██████████| 5/5 [00:01<00:00,  3.06it/s]
Processing file:   4%|▍         | 58/1312 [13:53<5:02:24, 14.47s/it]

Score:0.9205809
Face saved
Added




Score:0.984262
Face saved




Score:0.98187196
Face saved




Score:0.9798441
Face saved


Processing face: 100%|██████████| 6/6 [00:01<00:00,  3.26it/s]
Processing file:   4%|▍         | 59/1312 [14:08<5:08:46, 14.79s/it]

Score:0.97293276
Face saved


Processing face: 100%|██████████| 2/2 [00:00<00:00,  4.01it/s]
Processing file:   5%|▍         | 60/1312 [14:23<5:07:08, 14.72s/it]

Score:0.9087075
Face saved
Added
